33 #define LFC_FUNC(DIR, DEPTH, OPT) \
34 void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, const int *tc, const uint8_t *no_p, const uint8_t *no_q);
36 #define LFL_FUNC(DIR, DEPTH, OPT) \
37 void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, const int *tc, const uint8_t *no_p, const uint8_t *no_q);
39 #define LFC_FUNCS(type, depth, opt) \
40 LFC_FUNC(h, depth, opt) \
41 LFC_FUNC(v, depth, opt)
43 #define LFL_FUNCS(type, depth, opt) \
44 LFL_FUNC(h, depth, opt) \
45 LFL_FUNC(v, depth, opt)
63 #define IDCT_DC_FUNCS(W, opt) \
64 void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \
65 void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \
66 void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs)
75 #define IDCT_FUNCS(opt) \
76 void ff_hevc_idct_4x4_8_ ## opt(int16_t *coeffs, int col_limit); \
77 void ff_hevc_idct_4x4_10_ ## opt(int16_t *coeffs, int col_limit); \
78 void ff_hevc_idct_8x8_8_ ## opt(int16_t *coeffs, int col_limit); \
79 void ff_hevc_idct_8x8_10_ ## opt(int16_t *coeffs, int col_limit); \
80 void ff_hevc_idct_16x16_8_ ## opt(int16_t *coeffs, int col_limit); \
81 void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \
82 void ff_hevc_idct_32x32_8_ ## opt(int16_t *coeffs, int col_limit); \
83 void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit);
89 #define ff_hevc_pel_filters ff_hevc_qpel_filters
90 #define DECL_HV_FILTER(f) \
91 const uint8_t *hf = ff_hevc_ ## f ## _filters[mx]; \
92 const uint8_t *vf = ff_hevc_ ## f ## _filters[my];
94 #define FW_PUT(p, a, b, depth, opt) \
95 void ff_hevc_put_hevc_ ## a ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
96 int height, intptr_t mx, intptr_t my,int width) \
99 ff_h2656_put_ ## b ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
102 #define FW_PUT_UNI(p, a, b, depth, opt) \
103 void ff_hevc_put_hevc_uni_ ## a ## _ ## depth ## _##opt(uint8_t *dst, ptrdiff_t dststride, \
104 const uint8_t *src, ptrdiff_t srcstride, \
105 int height, intptr_t mx, intptr_t my, int width) \
108 ff_h2656_put_uni_ ## b ## _ ## depth ## _##opt(dst, dststride, src, srcstride, height, hf, vf, width); \
111 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
113 #define FW_PUT_FUNCS(p, a, b, depth, opt) \
114 FW_PUT(p, a, b, depth, opt) \
115 FW_PUT_UNI(p, a, b, depth, opt)
117 #define FW_PEL(w, depth, opt) FW_PUT_FUNCS(pel, pel_pixels##w, pixels##w, depth, opt)
119 #define FW_DIR(npel, n, w, depth, opt) \
120 FW_PUT_FUNCS(npel, npel ## _h##w, n ## tap_h##w, depth, opt) \
121 FW_PUT_FUNCS(npel, npel ## _v##w, n ## tap_v##w, depth, opt)
123 #define FW_DIR_HV(npel, n, w, depth, opt) \
124 FW_PUT_FUNCS(npel, npel ## _hv##w, n ## tap_hv##w, depth, opt)
138 #define FW_EPEL(w, depth, opt) FW_DIR(epel, 4, w, depth, opt)
139 #define FW_EPEL_HV(w, depth, opt) FW_DIR_HV(epel, 4, w, depth, opt)
140 #define FW_EPEL_FUNCS(w, depth, opt) \
141 FW_EPEL(w, depth, opt) \
142 FW_EPEL_HV(w, depth, opt)
146 FW_EPEL_FUNCS(4, 8, sse4)
147 FW_EPEL_FUNCS(6, 8, sse4)
148 FW_EPEL_FUNCS(8, 8, sse4)
149 FW_EPEL_FUNCS(16, 8, sse4)
150 FW_EPEL_FUNCS(4, 10, sse4)
151 FW_EPEL_FUNCS(6, 10, sse4)
152 FW_EPEL_FUNCS(8, 10, sse4)
153 FW_EPEL_FUNCS(4, 12, sse4)
154 FW_EPEL_FUNCS(6, 12, sse4)
155 FW_EPEL_FUNCS(8, 12, sse4)
157 #define FW_QPEL(w, depth, opt) FW_DIR(qpel, 8, w, depth, opt)
158 #define FW_QPEL_HV(w, depth, opt) FW_DIR_HV(qpel, 8, w, depth, opt)
159 #define FW_QPEL_FUNCS(w, depth, opt) \
160 FW_QPEL(w, depth, opt) \
161 FW_QPEL_HV(w, depth, opt)
166 FW_QPEL_FUNCS(4, 8, sse4)
167 FW_QPEL_FUNCS(8, 8, sse4)
168 FW_QPEL_FUNCS(4, 10, sse4)
169 FW_QPEL_FUNCS(8, 10, sse4)
170 FW_QPEL_FUNCS(4, 12, sse4)
171 FW_QPEL_FUNCS(8, 12, sse4)
173 #if HAVE_AVX2_EXTERNAL
176 FW_PUT(pel, pel_pixels16, pixels16, 10, avx2)
179 FW_EPEL(16, 10, avx2)
181 FW_EPEL_HV(32, 8, avx2)
182 FW_EPEL_HV(16, 10, avx2)
185 FW_QPEL(16, 10, avx2)
187 FW_QPEL_HV(16, 10, avx2)
192 #define mc_rep_func(name, bitd, step, W, opt) \
193 void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, \
194 const uint8_t *_src, ptrdiff_t _srcstride, int height, \
195 intptr_t mx, intptr_t my, int width) \
199 for (i = 0; i < W; i += step) { \
200 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
202 ff_hevc_put_hevc_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
205 #define mc_rep_uni_func(name, bitd, step, W, opt) \
206 void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \
207 const uint8_t *_src, ptrdiff_t _srcstride, int height, \
208 intptr_t mx, intptr_t my, int width) \
212 for (i = 0; i < W; i += step) { \
213 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
214 dst = _dst + (i * ((bitd + 7) / 8)); \
215 ff_hevc_put_hevc_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \
216 height, mx, my, width); \
219 #define mc_rep_bi_func(name, bitd, step, W, opt) \
220 void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src, \
221 ptrdiff_t _srcstride, const int16_t *_src2, \
222 int height, intptr_t mx, intptr_t my, int width) \
226 for (i = 0; i < W ; i += step) { \
227 const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
228 const int16_t *src2 = _src2 + i; \
229 dst = _dst + (i * ((bitd + 7) / 8)); \
230 ff_hevc_put_hevc_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \
231 height, mx, my, width); \
235 #define mc_rep_funcs(name, bitd, step, W, opt) \
236 mc_rep_func(name, bitd, step, W, opt) \
237 mc_rep_uni_func(name, bitd, step, W, opt) \
238 mc_rep_bi_func(name, bitd, step, W, opt)
240 #define mc_rep_func2(name, bitd, step1, step2, W, opt) \
241 void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *dst, \
242 const uint8_t *src, ptrdiff_t _srcstride, int height, \
243 intptr_t mx, intptr_t my, int width) \
245 ff_hevc_put_hevc_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
246 ff_hevc_put_hevc_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)), \
247 _srcstride, height, mx, my, width); \
249 #define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
250 void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \
251 const uint8_t *src, ptrdiff_t _srcstride, int height, \
252 intptr_t mx, intptr_t my, int width) \
254 ff_hevc_put_hevc_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width);\
255 ff_hevc_put_hevc_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
256 src + (step1 * ((bitd + 7) / 8)), _srcstride, \
257 height, mx, my, width); \
259 #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \
260 void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
261 ptrdiff_t _srcstride, const int16_t *src2, \
262 int height, intptr_t mx, intptr_t my, int width) \
264 ff_hevc_put_hevc_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\
265 ff_hevc_put_hevc_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
266 src + (step1 * ((bitd + 7) / 8)), _srcstride, \
267 src2 + step1, height, mx, my, width); \
270 #define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \
271 mc_rep_func2(name, bitd, step1, step2, W, opt) \
272 mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
273 mc_rep_bi_func2(name, bitd, step1, step2, W, opt)
275 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
277 #define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
278 void ff_hevc_put_hevc_##name##width1##_10_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \
279 int height, intptr_t mx, intptr_t my, int width) \
282 ff_hevc_put_hevc_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width); \
283 ff_hevc_put_hevc_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \
286 #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
287 void ff_hevc_put_hevc_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
288 ptrdiff_t _srcstride, const int16_t *src2, \
289 int height, intptr_t mx, intptr_t my, int width) \
291 ff_hevc_put_hevc_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2, \
292 height, mx, my, width); \
293 ff_hevc_put_hevc_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2,\
294 height, mx, my, width); \
297 #define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
298 void ff_hevc_put_hevc_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \
299 const uint8_t *src, ptrdiff_t _srcstride, int height, \
300 intptr_t mx, intptr_t my, int width) \
302 ff_hevc_put_hevc_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \
303 height, mx, my, width); \
304 ff_hevc_put_hevc_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \
305 height, mx, my, width); \
308 #define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4) \
309 mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
310 mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
311 mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)
313 #define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
314 void ff_hevc_put_hevc_##name##width1##_8_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \
315 int height, intptr_t mx, intptr_t my, int width) \
318 ff_hevc_put_hevc_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width); \
319 ff_hevc_put_hevc_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width); \
322 #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
323 void ff_hevc_put_hevc_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \
324 ptrdiff_t _srcstride, const int16_t *src2, \
325 int height, intptr_t mx, intptr_t my, int width) \
327 ff_hevc_put_hevc_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
328 src2, height, mx, my, width); \
329 ff_hevc_put_hevc_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
330 src2+width2, height, mx, my, width); \
333 #define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
334 void ff_hevc_put_hevc_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \
335 const uint8_t *src, ptrdiff_t _srcstride, int height, \
336 intptr_t mx, intptr_t my, int width) \
338 ff_hevc_put_hevc_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
339 height, mx, my, width); \
340 ff_hevc_put_hevc_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
341 height, mx, my, width); \
344 #define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2) \
345 mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
346 mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
347 mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2)
349 #if HAVE_AVX2_EXTERNAL
351 mc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4)
352 mc_rep_mixs_8(epel_hv, 48, 32, 16, avx2, sse4)
353 mc_rep_mixs_8(epel_h , 48, 32, 16, avx2, sse4)
354 mc_rep_mixs_8(epel_v , 48, 32, 16, avx2, sse4)
356 mc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32)
357 mc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32)
358 mc_rep_mixs_10(epel_hv, 24, 16, 8, avx2, sse4, 32)
359 mc_rep_mixs_10(epel_h , 24, 16, 8, avx2, sse4, 32)
360 mc_rep_mixs_10(epel_v , 24, 16, 8, avx2, sse4, 32)
363 mc_rep_mixs_10(qpel_h , 24, 16, 8, avx2, sse4, 32)
364 mc_rep_mixs_10(qpel_v , 24, 16, 8, avx2, sse4, 32)
365 mc_rep_mixs_10(qpel_hv, 24, 16, 8, avx2, sse4, 32)
401 mc_rep_mixs_8(qpel_h , 48, 32, 16, avx2, sse4)
404 mc_rep_mixs_8(qpel_v, 48, 32, 16, avx2, sse4)
538 #define mc_rep_uni_w(bitd, step, W, opt) \
539 void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \
540 int height, int denom, int _wx, int _ox) \
544 for (i = 0; i < W; i += step) { \
545 const int16_t *src = _src + i; \
546 dst= _dst + (i * ((bitd + 7) / 8)); \
547 ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, \
548 height, denom, _wx, _ox); \
552 mc_rep_uni_w(8, 6, 12, sse4)
553 mc_rep_uni_w(8, 8, 16, sse4)
554 mc_rep_uni_w(8, 8, 24, sse4)
555 mc_rep_uni_w(8, 8, 32, sse4)
556 mc_rep_uni_w(8, 8, 48, sse4)
557 mc_rep_uni_w(8, 8, 64, sse4)
559 mc_rep_uni_w(10, 6, 12, sse4)
560 mc_rep_uni_w(10, 8, 16, sse4)
561 mc_rep_uni_w(10, 8, 24, sse4)
562 mc_rep_uni_w(10, 8, 32, sse4)
563 mc_rep_uni_w(10, 8, 48, sse4)
564 mc_rep_uni_w(10, 8, 64, sse4)
566 mc_rep_uni_w(12, 6, 12, sse4)
567 mc_rep_uni_w(12, 8, 16, sse4)
568 mc_rep_uni_w(12, 8, 24, sse4)
569 mc_rep_uni_w(12, 8, 32, sse4)
570 mc_rep_uni_w(12, 8, 48, sse4)
571 mc_rep_uni_w(12, 8, 64, sse4)
573 #define mc_rep_bi_w(bitd, step, W, opt) \
574 void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \
575 const int16_t *_src2, int height, \
576 int denom, int _wx0, int _wx1, int _ox0, int _ox1) \
580 for (i = 0; i < W; i += step) { \
581 const int16_t *src = _src + i; \
582 const int16_t *src2 = _src2 + i; \
583 dst = _dst + (i * ((bitd + 7) / 8)); \
584 ff_hevc_put_hevc_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2, \
585 height, denom, _wx0, _wx1, _ox0, _ox1); \
589 mc_rep_bi_w(8, 6, 12, sse4)
590 mc_rep_bi_w(8, 8, 16, sse4)
591 mc_rep_bi_w(8, 8, 24, sse4)
592 mc_rep_bi_w(8, 8, 32, sse4)
593 mc_rep_bi_w(8, 8, 48, sse4)
594 mc_rep_bi_w(8, 8, 64, sse4)
596 mc_rep_bi_w(10, 6, 12, sse4)
597 mc_rep_bi_w(10, 8, 16, sse4)
598 mc_rep_bi_w(10, 8, 24, sse4)
599 mc_rep_bi_w(10, 8, 32, sse4)
600 mc_rep_bi_w(10, 8, 48, sse4)
601 mc_rep_bi_w(10, 8, 64, sse4)
603 mc_rep_bi_w(12, 6, 12, sse4)
604 mc_rep_bi_w(12, 8, 16, sse4)
605 mc_rep_bi_w(12, 8, 24, sse4)
606 mc_rep_bi_w(12, 8, 32, sse4)
607 mc_rep_bi_w(12, 8, 48, sse4)
608 mc_rep_bi_w(12, 8, 64, sse4)
610 #define mc_uni_w_func(name, bitd, W, opt) \
611 void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
612 const uint8_t *_src, ptrdiff_t _srcstride, \
613 int height, int denom, \
615 intptr_t mx, intptr_t my, int width) \
617 LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
618 ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
619 ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox);\
622 #define mc_uni_w_funcs(name, bitd, opt) \
623 mc_uni_w_func(name, bitd, 4, opt) \
624 mc_uni_w_func(name, bitd, 8, opt) \
625 mc_uni_w_func(name, bitd, 12, opt) \
626 mc_uni_w_func(name, bitd, 16, opt) \
627 mc_uni_w_func(name, bitd, 24, opt) \
628 mc_uni_w_func(name, bitd, 32, opt) \
629 mc_uni_w_func(name, bitd, 48, opt) \
630 mc_uni_w_func(name, bitd, 64, opt)
632 mc_uni_w_funcs(pel_pixels, 8, sse4)
633 mc_uni_w_func(pel_pixels, 8, 6, sse4)
634 mc_uni_w_funcs(epel_h, 8, sse4)
635 mc_uni_w_func(epel_h, 8, 6, sse4)
636 mc_uni_w_funcs(epel_v, 8, sse4)
637 mc_uni_w_func(epel_v, 8, 6, sse4)
638 mc_uni_w_funcs(epel_hv, 8, sse4)
639 mc_uni_w_func(epel_hv, 8, 6, sse4)
640 mc_uni_w_funcs(qpel_h, 8, sse4)
641 mc_uni_w_funcs(qpel_v, 8, sse4)
642 mc_uni_w_funcs(qpel_hv, 8, sse4)
644 mc_uni_w_funcs(pel_pixels, 10, sse4)
645 mc_uni_w_func(pel_pixels, 10, 6, sse4)
646 mc_uni_w_funcs(epel_h, 10, sse4)
647 mc_uni_w_func(epel_h, 10, 6, sse4)
648 mc_uni_w_funcs(epel_v, 10, sse4)
649 mc_uni_w_func(epel_v, 10, 6, sse4)
650 mc_uni_w_funcs(epel_hv, 10, sse4)
651 mc_uni_w_func(epel_hv, 10, 6, sse4)
652 mc_uni_w_funcs(qpel_h, 10, sse4)
653 mc_uni_w_funcs(qpel_v, 10, sse4)
654 mc_uni_w_funcs(qpel_hv, 10, sse4)
656 mc_uni_w_funcs(pel_pixels, 12, sse4)
657 mc_uni_w_func(pel_pixels, 12, 6, sse4)
658 mc_uni_w_funcs(epel_h, 12, sse4)
659 mc_uni_w_func(epel_h, 12, 6, sse4)
660 mc_uni_w_funcs(epel_v, 12, sse4)
661 mc_uni_w_func(epel_v, 12, 6, sse4)
662 mc_uni_w_funcs(epel_hv, 12, sse4)
663 mc_uni_w_func(epel_hv, 12, 6, sse4)
664 mc_uni_w_funcs(qpel_h, 12, sse4)
665 mc_uni_w_funcs(qpel_v, 12, sse4)
666 mc_uni_w_funcs(qpel_hv, 12, sse4)
668 #define mc_bi_w_func(name, bitd, W, opt) \
669 void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
670 const uint8_t *_src, ptrdiff_t _srcstride, \
671 const int16_t *_src2, \
672 int height, int denom, \
673 int _wx0, int _wx1, int _ox0, int _ox1, \
674 intptr_t mx, intptr_t my, int width) \
676 LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
677 ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
678 ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2, \
679 height, denom, _wx0, _wx1, _ox0, _ox1); \
682 #define mc_bi_w_funcs(name, bitd, opt) \
683 mc_bi_w_func(name, bitd, 4, opt) \
684 mc_bi_w_func(name, bitd, 8, opt) \
685 mc_bi_w_func(name, bitd, 12, opt) \
686 mc_bi_w_func(name, bitd, 16, opt) \
687 mc_bi_w_func(name, bitd, 24, opt) \
688 mc_bi_w_func(name, bitd, 32, opt) \
689 mc_bi_w_func(name, bitd, 48, opt) \
690 mc_bi_w_func(name, bitd, 64, opt)
692 mc_bi_w_funcs(pel_pixels, 8, sse4)
693 mc_bi_w_func(pel_pixels, 8, 6, sse4)
694 mc_bi_w_funcs(epel_h, 8, sse4)
695 mc_bi_w_func(epel_h, 8, 6, sse4)
696 mc_bi_w_funcs(epel_v, 8, sse4)
697 mc_bi_w_func(epel_v, 8, 6, sse4)
698 mc_bi_w_funcs(epel_hv, 8, sse4)
699 mc_bi_w_func(epel_hv, 8, 6, sse4)
700 mc_bi_w_funcs(qpel_h, 8, sse4)
701 mc_bi_w_funcs(qpel_v, 8, sse4)
702 mc_bi_w_funcs(qpel_hv, 8, sse4)
704 mc_bi_w_funcs(pel_pixels, 10, sse4)
705 mc_bi_w_func(pel_pixels, 10, 6, sse4)
706 mc_bi_w_funcs(epel_h, 10, sse4)
707 mc_bi_w_func(epel_h, 10, 6, sse4)
708 mc_bi_w_funcs(epel_v, 10, sse4)
709 mc_bi_w_func(epel_v, 10, 6, sse4)
710 mc_bi_w_funcs(epel_hv, 10, sse4)
711 mc_bi_w_func(epel_hv, 10, 6, sse4)
712 mc_bi_w_funcs(qpel_h, 10, sse4)
713 mc_bi_w_funcs(qpel_v, 10, sse4)
714 mc_bi_w_funcs(qpel_hv, 10, sse4)
716 mc_bi_w_funcs(pel_pixels, 12, sse4)
717 mc_bi_w_func(pel_pixels, 12, 6, sse4)
718 mc_bi_w_funcs(epel_h, 12, sse4)
719 mc_bi_w_func(epel_h, 12, 6, sse4)
720 mc_bi_w_funcs(epel_v, 12, sse4)
721 mc_bi_w_func(epel_v, 12, 6, sse4)
722 mc_bi_w_funcs(epel_hv, 12, sse4)
723 mc_bi_w_func(epel_hv, 12, 6, sse4)
724 mc_bi_w_funcs(qpel_h, 12, sse4)
725 mc_bi_w_funcs(qpel_v, 12, sse4)
726 mc_bi_w_funcs(qpel_hv, 12, sse4)
727 #endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL
729 #define SAO_BAND_FILTER_FUNCS(bitd, opt) \
730 void ff_hevc_sao_band_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
731 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
732 void ff_hevc_sao_band_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
733 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
734 void ff_hevc_sao_band_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
735 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
736 void ff_hevc_sao_band_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
737 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
738 void ff_hevc_sao_band_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
739 const int16_t *sao_offset_val, int sao_left_class, int width, int height);
751 #define SAO_BAND_INIT(bitd, opt) do { \
752 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_##bitd##_##opt; \
753 c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_##bitd##_##opt; \
754 c->sao_band_filter[2] = ff_hevc_sao_band_filter_32_##bitd##_##opt; \
755 c->sao_band_filter[3] = ff_hevc_sao_band_filter_48_##bitd##_##opt; \
756 c->sao_band_filter[4] = ff_hevc_sao_band_filter_64_##bitd##_##opt; \
759 #define SAO_EDGE_FILTER_FUNCS(bitd, opt) \
760 void ff_hevc_sao_edge_filter_8_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
761 const int16_t *sao_offset_val, int eo, int width, int height); \
762 void ff_hevc_sao_edge_filter_16_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
763 const int16_t *sao_offset_val, int eo, int width, int height); \
764 void ff_hevc_sao_edge_filter_32_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
765 const int16_t *sao_offset_val, int eo, int width, int height); \
766 void ff_hevc_sao_edge_filter_48_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
767 const int16_t *sao_offset_val, int eo, int width, int height); \
768 void ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
769 const int16_t *sao_offset_val, int eo, int width, int height); \
778 #define SAO_EDGE_INIT(bitd, opt) do { \
779 c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8_##bitd##_##opt; \
780 c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_16_##bitd##_##opt; \
781 c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_##bitd##_##opt; \
782 c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_##bitd##_##opt; \
783 c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_##bitd##_##opt; \
786 #define EPEL_LINKS(pointer, my, mx, fname, bitd, opt ) \
787 PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
788 PEL_LINK(pointer, 2, my , mx , fname##6 , bitd, opt ); \
789 PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
790 PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
791 PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
792 PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
793 PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
794 PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
795 PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
796 #define QPEL_LINKS(pointer, my, mx, fname, bitd, opt) \
797 PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
798 PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
799 PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
800 PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
801 PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
802 PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
803 PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
804 PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
812 c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
817 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
818 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
820 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
821 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
823 c->idct[2] = ff_hevc_idct_16x16_8_sse2;
824 c->idct[3] = ff_hevc_idct_32x32_8_sse2;
828 c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
829 c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
830 c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;
832 c->idct[0] = ff_hevc_idct_4x4_8_sse2;
833 c->idct[1] = ff_hevc_idct_8x8_8_sse2;
841 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
842 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
848 EPEL_LINKS(
c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4);
849 EPEL_LINKS(
c->put_hevc_epel, 0, 1, epel_h, 8, sse4);
850 EPEL_LINKS(
c->put_hevc_epel, 1, 0, epel_v, 8, sse4);
851 EPEL_LINKS(
c->put_hevc_epel, 1, 1, epel_hv, 8, sse4);
853 QPEL_LINKS(
c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
854 QPEL_LINKS(
c->put_hevc_qpel, 0, 1, qpel_h, 8, sse4);
855 QPEL_LINKS(
c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4);
856 QPEL_LINKS(
c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4);
859 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx;
860 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx;
862 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
863 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
865 c->idct[2] = ff_hevc_idct_16x16_8_avx;
866 c->idct[3] = ff_hevc_idct_32x32_8_avx;
870 c->idct[0] = ff_hevc_idct_4x4_8_avx;
871 c->idct[1] = ff_hevc_idct_8x8_8_avx;
878 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
879 c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2;
882 c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2;
883 c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2;
909 c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2;
910 c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2;
911 c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2;
913 c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2;
914 c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2;
915 c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2;
917 c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2;
918 c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2;
919 c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2;
921 c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2;
922 c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2;
923 c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2;
925 c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2;
926 c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2;
927 c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2;
929 c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2;
930 c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2;
931 c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2;
933 c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2;
934 c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2;
935 c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2;
937 c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2;
938 c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2;
939 c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2;
941 c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2;
942 c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2;
943 c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2;
945 c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2;
946 c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2;
947 c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2;
949 c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2;
950 c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2;
951 c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2;
953 c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2;
954 c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2;
955 c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2;
957 c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2;
958 c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2;
959 c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2;
961 c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2;
962 c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2;
963 c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2;
965 c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2;
966 c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2;
967 c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2;
971 c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2;
972 c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2;
973 c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2;
988 c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
991 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
992 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
994 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
995 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
997 c->idct[2] = ff_hevc_idct_16x16_10_sse2;
998 c->idct[3] = ff_hevc_idct_32x32_10_sse2;
1003 c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
1004 c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
1005 c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;
1007 c->idct[0] = ff_hevc_idct_4x4_10_sse2;
1008 c->idct[1] = ff_hevc_idct_8x8_10_sse2;
1015 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
1016 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
1019 EPEL_LINKS(
c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
1020 EPEL_LINKS(
c->put_hevc_epel, 0, 1, epel_h, 10, sse4);
1021 EPEL_LINKS(
c->put_hevc_epel, 1, 0, epel_v, 10, sse4);
1022 EPEL_LINKS(
c->put_hevc_epel, 1, 1, epel_hv, 10, sse4);
1024 QPEL_LINKS(
c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
1025 QPEL_LINKS(
c->put_hevc_qpel, 0, 1, qpel_h, 10, sse4);
1026 QPEL_LINKS(
c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4);
1027 QPEL_LINKS(
c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4);
1030 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx;
1031 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx;
1033 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
1034 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
1036 c->idct[2] = ff_hevc_idct_16x16_10_avx;
1037 c->idct[3] = ff_hevc_idct_32x32_10_avx;
1040 c->idct[0] = ff_hevc_idct_4x4_10_avx;
1041 c->idct[1] = ff_hevc_idct_8x8_10_avx;
1046 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2;
1049 c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2;
1050 c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2;
1087 c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2;
1088 c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2;
1089 c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2;
1090 c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2;
1091 c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2;
1093 c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2;
1094 c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2;
1095 c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2;
1096 c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2;
1097 c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2;
1099 c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2;
1100 c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2;
1101 c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2;
1102 c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2;
1103 c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2;
1105 c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2;
1106 c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2;
1107 c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2;
1108 c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2;
1109 c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2;
1111 c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2;
1112 c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2;
1113 c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2;
1114 c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2;
1115 c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2;
1117 c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2;
1118 c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2;
1119 c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2;
1120 c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2;
1121 c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2;
1123 c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2;
1124 c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2;
1125 c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2;
1126 c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2;
1127 c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2;
1129 c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2;
1130 c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2;
1131 c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2;
1132 c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2;
1133 c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2;
1135 c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2;
1136 c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2;
1137 c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2;
1138 c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2;
1139 c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2;
1141 c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2;
1142 c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2;
1143 c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2;
1144 c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2;
1145 c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2;
1147 c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2;
1148 c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2;
1149 c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2;
1150 c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2;
1151 c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2;
1153 c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2;
1154 c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2;
1155 c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2;
1156 c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2;
1157 c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2;
1159 c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2;
1160 c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2;
1161 c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2;
1162 c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2;
1163 c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2;
1165 c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2;
1166 c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2;
1167 c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2;
1168 c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2;
1169 c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2;
1171 c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2;
1172 c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2;
1173 c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2;
1174 c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2;
1175 c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2;
1177 c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2;
1178 c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2;
1179 c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2;
1180 c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2;
1181 c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2;
1183 c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2;
1184 c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2;
1185 c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2;
1186 c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2;
1187 c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2;
1189 c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2;
1190 c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2;
1191 c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2;
1192 c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2;
1193 c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2;
1203 c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext;
1206 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
1207 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
1209 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2;
1210 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
1215 c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_sse2;
1216 c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2;
1217 c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2;
1220 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
1221 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
1224 EPEL_LINKS(
c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
1225 EPEL_LINKS(
c->put_hevc_epel, 0, 1, epel_h, 12, sse4);
1226 EPEL_LINKS(
c->put_hevc_epel, 1, 0, epel_v, 12, sse4);
1227 EPEL_LINKS(
c->put_hevc_epel, 1, 1, epel_hv, 12, sse4);
1229 QPEL_LINKS(
c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4);
1230 QPEL_LINKS(
c->put_hevc_qpel, 0, 1, qpel_h, 12, sse4);
1231 QPEL_LINKS(
c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4);
1232 QPEL_LINKS(
c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4);
1235 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx;
1236 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx;
1238 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx;
1239 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx;
1244 c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2;
1247 c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_avx2;
1248 c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_avx2;