Go to the documentation of this file.
36 #define bf(fn, bd, opt) fn##_##bd##_##opt
37 #define BF(fn, bpc, opt) fn##_##bpc##bpc_##opt
39 #define DMVR_PROTOTYPES(bd, opt) \
40 void ff_vvc_dmvr_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
41 int height, intptr_t mx, intptr_t my, int width); \
42 void ff_vvc_dmvr_h_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
43 int height, intptr_t mx, intptr_t my, int width); \
44 void ff_vvc_dmvr_v_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
45 int height, intptr_t mx, intptr_t my, int width); \
46 void ff_vvc_dmvr_hv_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
47 int height, intptr_t mx, intptr_t my, int width); \
53 #define OF_INIT(BD, OPT) do { \
54 void ff_vvc_apply_bdof_## BD ## _ ## OPT(uint8_t *dst, ptrdiff_t dst_stride, \
55 const int16_t *src0, const int16_t *src1, \
57 c->inter.apply_bdof = ff_vvc_apply_bdof_## BD ##_## OPT; \
60 #define ALF_BPC_PROTOTYPES(bpc, opt) \
61 void BF(ff_vvc_alf_classify_grad, bpc, opt)(int *gradient_sum, \
62 const uint8_t *src, ptrdiff_t src_stride, intptr_t width, intptr_t height, intptr_t vb_pos); \
63 void BF(ff_vvc_alf_classify, bpc, opt)(int *class_idx, int *transpose_idx, const int *gradient_sum, \
64 intptr_t width, intptr_t height, intptr_t vb_pos, intptr_t bit_depth); \
66 ALF_BPC_PROTOTYPES(8, avx2)
67 ALF_BPC_PROTOTYPES(16, avx2)
70 #define FW_PUT(name, depth, opt) \
71 static void vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
72 int height, const int8_t *hf, const int8_t *vf, int width) \
74 ff_h2656_put_## name ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
77 #if HAVE_SSE4_EXTERNAL
78 #define FW_PUT_TAP(fname, bitd, opt ) \
79 FW_PUT(fname##4, bitd, opt ) \
80 FW_PUT(fname##8, bitd, opt ) \
81 FW_PUT(fname##16, bitd, opt ) \
82 FW_PUT(fname##32, bitd, opt ) \
83 FW_PUT(fname##64, bitd, opt ) \
84 FW_PUT(fname##128, bitd, opt ) \
86 #define FW_PUT_4TAP(fname, bitd, opt) \
87 FW_PUT(fname ## 2, bitd, opt) \
88 FW_PUT_TAP(fname, bitd, opt)
90 #define FW_PUT_4TAP_SSE4(bitd) \
91 FW_PUT_4TAP(pixels, bitd, sse4) \
92 FW_PUT_4TAP(4tap_h, bitd, sse4) \
93 FW_PUT_4TAP(4tap_v, bitd, sse4) \
94 FW_PUT_4TAP(4tap_hv, bitd, sse4)
96 #define FW_PUT_8TAP_SSE4(bitd) \
97 FW_PUT_TAP(8tap_h, bitd, sse4) \
98 FW_PUT_TAP(8tap_v, bitd, sse4) \
99 FW_PUT_TAP(8tap_hv, bitd, sse4)
101 #define FW_PUT_SSE4(bitd) \
102 FW_PUT_4TAP_SSE4(bitd) \
103 FW_PUT_8TAP_SSE4(bitd)
110 #if HAVE_AVX2_EXTERNAL
111 #define FW_PUT_TAP_AVX2(n, bitd) \
112 FW_PUT(n ## tap_h32, bitd, avx2) \
113 FW_PUT(n ## tap_h64, bitd, avx2) \
114 FW_PUT(n ## tap_h128, bitd, avx2) \
115 FW_PUT(n ## tap_v32, bitd, avx2) \
116 FW_PUT(n ## tap_v64, bitd, avx2) \
117 FW_PUT(n ## tap_v128, bitd, avx2)
119 #define FW_PUT_AVX2(bitd) \
120 FW_PUT(pixels32, bitd, avx2) \
121 FW_PUT(pixels64, bitd, avx2) \
122 FW_PUT(pixels128, bitd, avx2) \
123 FW_PUT_TAP_AVX2(4, bitd) \
124 FW_PUT_TAP_AVX2(8, bitd) \
130 #define FW_PUT_TAP_16BPC_AVX2(n, bitd) \
131 FW_PUT(n ## tap_h16, bitd, avx2) \
132 FW_PUT(n ## tap_v16, bitd, avx2) \
133 FW_PUT(n ## tap_hv16, bitd, avx2) \
134 FW_PUT(n ## tap_hv32, bitd, avx2) \
135 FW_PUT(n ## tap_hv64, bitd, avx2) \
136 FW_PUT(n ## tap_hv128, bitd, avx2)
138 #define FW_PUT_16BPC_AVX2(bitd) \
139 FW_PUT(pixels16, bitd, avx2) \
140 FW_PUT_TAP_16BPC_AVX2(4, bitd) \
141 FW_PUT_TAP_16BPC_AVX2(8, bitd)
143 FW_PUT_16BPC_AVX2(10)
144 FW_PUT_16BPC_AVX2(12)
146 #define ALF_FUNCS(bpc, bd, opt) \
147 static void bf(vvc_alf_classify, bd, opt)(int *class_idx, int *transpose_idx, \
148 const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, int *gradient_tmp) \
150 BF(ff_vvc_alf_classify_grad, bpc, opt)(gradient_tmp, src, src_stride, width, height, vb_pos); \
151 BF(ff_vvc_alf_classify, bpc, opt)(class_idx, transpose_idx, gradient_tmp, width, height, vb_pos, bd); \
154 ALF_FUNCS(8, 8, avx2)
155 ALF_FUNCS(16, 10, avx2)
156 ALF_FUNCS(16, 12, avx2)
160 #define SAO_FILTER_FUNC(wd, bitd, opt) \
161 void ff_vvc_sao_band_filter_##wd##_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
162 const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
163 void ff_vvc_sao_edge_filter_##wd##_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
164 const int16_t *sao_offset_val, int eo, int width, int height); \
166 #define SAO_FILTER_FUNCS(bitd, opt) \
167 SAO_FILTER_FUNC(8, bitd, opt) \
168 SAO_FILTER_FUNC(16, bitd, opt) \
169 SAO_FILTER_FUNC(32, bitd, opt) \
170 SAO_FILTER_FUNC(48, bitd, opt) \
171 SAO_FILTER_FUNC(64, bitd, opt) \
172 SAO_FILTER_FUNC(80, bitd, opt) \
173 SAO_FILTER_FUNC(96, bitd, opt) \
174 SAO_FILTER_FUNC(112, bitd, opt) \
175 SAO_FILTER_FUNC(128, bitd, opt) \
177 SAO_FILTER_FUNCS(8, avx2)
178 SAO_FILTER_FUNCS(10, avx2)
179 SAO_FILTER_FUNCS(12, avx2)
181 #define SAO_FILTER_INIT(type, bitd, opt) do { \
182 c->sao.type##_filter[0] = ff_vvc_sao_##type##_filter_8_##bitd##_##opt; \
183 c->sao.type##_filter[1] = ff_vvc_sao_##type##_filter_16_##bitd##_##opt; \
184 c->sao.type##_filter[2] = ff_vvc_sao_##type##_filter_32_##bitd##_##opt; \
185 c->sao.type##_filter[3] = ff_vvc_sao_##type##_filter_48_##bitd##_##opt; \
186 c->sao.type##_filter[4] = ff_vvc_sao_##type##_filter_64_##bitd##_##opt; \
187 c->sao.type##_filter[5] = ff_vvc_sao_##type##_filter_80_##bitd##_##opt; \
188 c->sao.type##_filter[6] = ff_vvc_sao_##type##_filter_96_##bitd##_##opt; \
189 c->sao.type##_filter[7] = ff_vvc_sao_##type##_filter_112_##bitd##_##opt; \
190 c->sao.type##_filter[8] = ff_vvc_sao_##type##_filter_128_##bitd##_##opt; \
193 #define SAO_INIT(bitd, opt) do { \
194 SAO_FILTER_INIT(band, bitd, opt); \
195 SAO_FILTER_INIT(edge, bitd, opt); \
198 #define AVG_INIT(bd, opt) do { \
199 void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
200 const int16_t *src0, const int16_t *src1, int width, int height);\
201 void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
202 const int16_t *src0, const int16_t *src1, int width, int height, \
203 int denom, int w0, int w1, int o); \
204 c->inter.avg = bf(ff_vvc_avg, bd, opt); \
205 c->inter.w_avg = bf(ff_vvc_w_avg, bd, opt); \
208 #define DMVR_INIT(bd) do { \
209 c->inter.dmvr[0][0] = ff_vvc_dmvr_##bd##_avx2; \
210 c->inter.dmvr[0][1] = ff_vvc_dmvr_h_##bd##_avx2; \
211 c->inter.dmvr[1][0] = ff_vvc_dmvr_v_##bd##_avx2; \
212 c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_##bd##_avx2; \
215 #define PEL_LINK(dst, C, W, idx1, idx2, name, D, opt) \
216 dst[C][W][idx1][idx2] = vvc_put_## name ## _ ## D ## _##opt; \
217 dst ## _uni[C][W][idx1][idx2] = ff_h2656_put_uni_ ## name ## _ ## D ## _##opt; \
219 #define MC_TAP_LINKS(pointer, C, my, mx, fname, bitd, opt ) \
220 PEL_LINK(pointer, C, 1, my , mx , fname##4 , bitd, opt ); \
221 PEL_LINK(pointer, C, 2, my , mx , fname##8 , bitd, opt ); \
222 PEL_LINK(pointer, C, 3, my , mx , fname##16, bitd, opt ); \
223 PEL_LINK(pointer, C, 4, my , mx , fname##32, bitd, opt ); \
224 PEL_LINK(pointer, C, 5, my , mx , fname##64, bitd, opt ); \
225 PEL_LINK(pointer, C, 6, my , mx , fname##128, bitd, opt );
227 #define MC_8TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
228 MC_TAP_LINKS(pointer, LUMA, my, mx, fname, bitd, opt)
230 #define MC_8TAP_LINKS_SSE4(bd) \
231 MC_8TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
232 MC_8TAP_LINKS(c->inter.put, 0, 1, 8tap_h, bd, sse4); \
233 MC_8TAP_LINKS(c->inter.put, 1, 0, 8tap_v, bd, sse4); \
234 MC_8TAP_LINKS(c->inter.put, 1, 1, 8tap_hv, bd, sse4)
236 #define MC_4TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
237 PEL_LINK(pointer, CHROMA, 0, my , mx , fname##2 , bitd, opt ); \
238 MC_TAP_LINKS(pointer, CHROMA, my, mx, fname, bitd, opt) \
240 #define MC_4TAP_LINKS_SSE4(bd) \
241 MC_4TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
242 MC_4TAP_LINKS(c->inter.put, 0, 1, 4tap_h, bd, sse4); \
243 MC_4TAP_LINKS(c->inter.put, 1, 0, 4tap_v, bd, sse4); \
244 MC_4TAP_LINKS(c->inter.put, 1, 1, 4tap_hv, bd, sse4)
246 #define MC_LINK_SSE4(bd) \
247 MC_4TAP_LINKS_SSE4(bd) \
248 MC_8TAP_LINKS_SSE4(bd)
250 #define MC_TAP_LINKS_AVX2(C,tap,bd) do { \
251 PEL_LINK(c->inter.put, C, 4, 0, 0, pixels32, bd, avx2) \
252 PEL_LINK(c->inter.put, C, 5, 0, 0, pixels64, bd, avx2) \
253 PEL_LINK(c->inter.put, C, 6, 0, 0, pixels128, bd, avx2) \
254 PEL_LINK(c->inter.put, C, 4, 0, 1, tap##tap_h32, bd, avx2) \
255 PEL_LINK(c->inter.put, C, 5, 0, 1, tap##tap_h64, bd, avx2) \
256 PEL_LINK(c->inter.put, C, 6, 0, 1, tap##tap_h128, bd, avx2) \
257 PEL_LINK(c->inter.put, C, 4, 1, 0, tap##tap_v32, bd, avx2) \
258 PEL_LINK(c->inter.put, C, 5, 1, 0, tap##tap_v64, bd, avx2) \
259 PEL_LINK(c->inter.put, C, 6, 1, 0, tap##tap_v128, bd, avx2) \
262 #define MC_LINKS_AVX2(bd) \
263 MC_TAP_LINKS_AVX2(LUMA, 8, bd); \
264 MC_TAP_LINKS_AVX2(CHROMA, 4, bd);
266 #define MC_TAP_LINKS_16BPC_AVX2(C, tap, bd) do { \
267 PEL_LINK(c->inter.put, C, 3, 0, 0, pixels16, bd, avx2) \
268 PEL_LINK(c->inter.put, C, 3, 0, 1, tap##tap_h16, bd, avx2) \
269 PEL_LINK(c->inter.put, C, 3, 1, 0, tap##tap_v16, bd, avx2) \
270 PEL_LINK(c->inter.put, C, 3, 1, 1, tap##tap_hv16, bd, avx2) \
271 PEL_LINK(c->inter.put, C, 4, 1, 1, tap##tap_hv32, bd, avx2) \
272 PEL_LINK(c->inter.put, C, 5, 1, 1, tap##tap_hv64, bd, avx2) \
273 PEL_LINK(c->inter.put, C, 6, 1, 1, tap##tap_hv128, bd, avx2) \
276 #define MC_LINKS_16BPC_AVX2(bd) \
277 MC_TAP_LINKS_16BPC_AVX2(LUMA, 8, bd); \
278 MC_TAP_LINKS_16BPC_AVX2(CHROMA, 4, bd);
280 int ff_vvc_sad_avx2(
const int16_t *
src0,
const int16_t *
src1,
int dx,
int dy,
int block_w,
int block_h);
281 #define SAD_INIT() c->inter.sad = ff_vvc_sad_avx2
283 #define ALF_INIT(bd, opt) do { \
284 void bf(ff_vvc_alf_filter_luma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
285 const uint8_t *src, ptrdiff_t src_stride, int width, int height, \
286 const int16_t *filter, const int16_t *clip, int vb_pos); \
287 void bf(ff_vvc_alf_filter_chroma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
288 const uint8_t *src, ptrdiff_t src_stride, int width, int height, \
289 const int16_t *filter, const int16_t *clip, int vb_pos); \
290 c->alf.filter[LUMA] = bf(ff_vvc_alf_filter_luma, bd, opt); \
291 c->alf.filter[CHROMA] = bf(ff_vvc_alf_filter_chroma, bd, opt); \
292 c->alf.classify = bf(vvc_alf_classify, bd, opt); \
298 #endif // ARCH_X86_64
307 #if HAVE_SSE4_EXTERNAL
312 #if HAVE_AVX2_EXTERNAL
328 #if HAVE_SSE4_EXTERNAL
333 #if HAVE_AVX2_EXTERNAL
339 MC_LINKS_16BPC_AVX2(10);
350 #if HAVE_SSE4_EXTERNAL
355 #if HAVE_AVX2_EXTERNAL
361 MC_LINKS_16BPC_AVX2(12);
#define EXTERNAL_AVX2_FAST(flags)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
#define DMVR_PROTOTYPES(bd, opt)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define DMVR_INIT(bd, opt)
#define EXTERNAL_SSE4(flags)
av_cold void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)