FFmpeg
Main Page
Related Pages
Modules
Namespaces
Data Structures
Files
Examples
File List
Globals
All
Data Structures
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
Pages
libavutil
x86
float_dsp_init.c
Go to the documentation of this file.
1
/*
2
* This file is part of FFmpeg.
3
*
4
* FFmpeg is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU Lesser General Public
6
* License as published by the Free Software Foundation; either
7
* version 2.1 of the License, or (at your option) any later version.
8
*
9
* FFmpeg is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
* Lesser General Public License for more details.
13
*
14
* You should have received a copy of the GNU Lesser General Public
15
* License along with FFmpeg; if not, write to the Free Software
16
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
*/
18
19
#include "config.h"
20
21
#include "
libavutil/attributes.h
"
22
#include "
libavutil/cpu.h
"
23
#include "
libavutil/float_dsp.h
"
24
#include "
cpu.h
"
25
#include "
asm.h
"
26
27
void
ff_vector_fmul_sse
(
float
*dst,
const
float
*src0,
const
float
*src1,
28
int
len
);
29
void
ff_vector_fmul_avx
(
float
*dst,
const
float
*src0,
const
float
*src1,
30
int
len
);
31
32
void
ff_vector_fmac_scalar_sse
(
float
*dst,
const
float
*
src
,
float
mul,
33
int
len
);
34
void
ff_vector_fmac_scalar_avx
(
float
*dst,
const
float
*
src
,
float
mul,
35
int
len
);
36
37
void
ff_vector_fmul_scalar_sse
(
float
*dst,
const
float
*
src
,
float
mul,
38
int
len
);
39
40
void
ff_vector_dmul_scalar_sse2
(
double
*dst,
const
double
*
src
,
41
double
mul,
int
len
);
42
void
ff_vector_dmul_scalar_avx
(
double
*dst,
const
double
*
src
,
43
double
mul,
int
len
);
44
45
void
ff_vector_fmul_add_sse
(
float
*dst,
const
float
*src0,
const
float
*src1,
46
const
float
*src2,
int
len
);
47
void
ff_vector_fmul_add_avx
(
float
*dst,
const
float
*src0,
const
float
*src1,
48
const
float
*src2,
int
len
);
49
50
void
ff_vector_fmul_reverse_sse
(
float
*dst,
const
float
*src0,
51
const
float
*src1,
int
len
);
52
void
ff_vector_fmul_reverse_avx
(
float
*dst,
const
float
*src0,
53
const
float
*src1,
int
len
);
54
55
float
ff_scalarproduct_float_sse
(
const
float
*v1,
const
float
*v2,
int
order);
56
57
void
ff_butterflies_float_sse
(
float
*src0,
float
*src1,
int
len
);
58
59
#if HAVE_6REGS && HAVE_INLINE_ASM
60
static
void
vector_fmul_window_3dnowext(
float
*dst,
const
float
*src0,
61
const
float
*src1,
const
float
*win,
62
int
len
)
63
{
64
x86_reg
i = -len * 4;
65
x86_reg
j = len * 4 - 8;
66
__asm__
volatile
(
67
"1: \n"
68
"pswapd (%5, %1), %%mm1 \n"
69
"movq (%5, %0), %%mm0 \n"
70
"pswapd (%4, %1), %%mm5 \n"
71
"movq (%3, %0), %%mm4 \n"
72
"movq %%mm0, %%mm2 \n"
73
"movq %%mm1, %%mm3 \n"
74
"pfmul %%mm4, %%mm2 \n"
// src0[len + i] * win[len + i]
75
"pfmul %%mm5, %%mm3 \n"
// src1[j] * win[len + j]
76
"pfmul %%mm4, %%mm1 \n"
// src0[len + i] * win[len + j]
77
"pfmul %%mm5, %%mm0 \n"
// src1[j] * win[len + i]
78
"pfadd %%mm3, %%mm2 \n"
79
"pfsub %%mm0, %%mm1 \n"
80
"pswapd %%mm2, %%mm2 \n"
81
"movq %%mm1, (%2, %0) \n"
82
"movq %%mm2, (%2, %1) \n"
83
"sub $8, %1 \n"
84
"add $8, %0 \n"
85
"jl 1b \n"
86
"femms \n"
87
:
"+r"
(i),
"+r"
(j)
88
:
"r"
(dst +
len
),
"r"
(src0 + len),
"r"
(src1),
"r"
(win + len)
89
);
90
}
91
92
static
void
vector_fmul_window_sse(
float
*dst,
const
float
*src0,
93
const
float
*src1,
const
float
*win,
int
len)
94
{
95
x86_reg
i = -len * 4;
96
x86_reg
j = len * 4 - 16;
97
__asm__
volatile
(
98
"1: \n"
99
"movaps (%5, %1), %%xmm1 \n"
100
"movaps (%5, %0), %%xmm0 \n"
101
"movaps (%4, %1), %%xmm5 \n"
102
"movaps (%3, %0), %%xmm4 \n"
103
"shufps $0x1b, %%xmm1, %%xmm1 \n"
104
"shufps $0x1b, %%xmm5, %%xmm5 \n"
105
"movaps %%xmm0, %%xmm2 \n"
106
"movaps %%xmm1, %%xmm3 \n"
107
"mulps %%xmm4, %%xmm2 \n"
// src0[len + i] * win[len + i]
108
"mulps %%xmm5, %%xmm3 \n"
// src1[j] * win[len + j]
109
"mulps %%xmm4, %%xmm1 \n"
// src0[len + i] * win[len + j]
110
"mulps %%xmm5, %%xmm0 \n"
// src1[j] * win[len + i]
111
"addps %%xmm3, %%xmm2 \n"
112
"subps %%xmm0, %%xmm1 \n"
113
"shufps $0x1b, %%xmm2, %%xmm2 \n"
114
"movaps %%xmm1, (%2, %0) \n"
115
"movaps %%xmm2, (%2, %1) \n"
116
"sub $16, %1 \n"
117
"add $16, %0 \n"
118
"jl 1b \n"
119
:
"+r"
(i),
"+r"
(j)
120
:
"r"
(dst +
len
),
"r"
(src0 + len),
"r"
(src1),
"r"
(win + len)
121
);
122
}
123
#endif
/* HAVE_6REGS && HAVE_INLINE_ASM */
124
125
av_cold
void
ff_float_dsp_init_x86
(
AVFloatDSPContext
*fdsp)
126
{
127
int
cpu_flags
=
av_get_cpu_flags
();
128
129
#if HAVE_6REGS && HAVE_INLINE_ASM
130
if
(
INLINE_AMD3DNOWEXT
(cpu_flags)) {
131
fdsp->
vector_fmul_window
= vector_fmul_window_3dnowext;
132
}
133
if
(
INLINE_SSE
(cpu_flags)) {
134
fdsp->
vector_fmul_window
= vector_fmul_window_sse;
135
}
136
#endif
137
if
(
EXTERNAL_SSE
(cpu_flags)) {
138
fdsp->
vector_fmul
=
ff_vector_fmul_sse
;
139
fdsp->
vector_fmac_scalar
=
ff_vector_fmac_scalar_sse
;
140
fdsp->
vector_fmul_scalar
=
ff_vector_fmul_scalar_sse
;
141
fdsp->
vector_fmul_add
=
ff_vector_fmul_add_sse
;
142
fdsp->
vector_fmul_reverse
=
ff_vector_fmul_reverse_sse
;
143
fdsp->
scalarproduct_float
=
ff_scalarproduct_float_sse
;
144
fdsp->
butterflies_float
=
ff_butterflies_float_sse
;
145
}
146
if
(
EXTERNAL_SSE2
(cpu_flags)) {
147
fdsp->
vector_dmul_scalar
=
ff_vector_dmul_scalar_sse2
;
148
}
149
if
(
EXTERNAL_AVX
(cpu_flags)) {
150
fdsp->
vector_fmul
=
ff_vector_fmul_avx
;
151
fdsp->
vector_fmac_scalar
=
ff_vector_fmac_scalar_avx
;
152
fdsp->
vector_dmul_scalar
=
ff_vector_dmul_scalar_avx
;
153
fdsp->
vector_fmul_add
=
ff_vector_fmul_add_avx
;
154
fdsp->
vector_fmul_reverse
=
ff_vector_fmul_reverse_avx
;
155
}
156
}
Generated on Sun Mar 23 2014 23:50:15 for FFmpeg by
1.8.2