FFmpeg
dsp.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2024 Zhao Zhili
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef AVCODEC_AARCH64_H26X_DSP_H
22 #define AVCODEC_AARCH64_H26X_DSP_H
23 
24 #include <stddef.h>
25 #include <stdint.h>
26 
27 void ff_h26x_sao_band_filter_8x8_8_neon(uint8_t *_dst, const uint8_t *_src,
28  ptrdiff_t stride_dst, ptrdiff_t stride_src,
29  const int16_t *sao_offset_val, int sao_left_class,
30  int width, int height);
31 void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
32  const int16_t *sao_offset_val, int eo, int width, int height);
33 void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
34  const int16_t *sao_offset_val, int eo, int width, int height);
35 
36 void ff_vvc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
37  const int16_t *sao_offset_val, int eo, int width, int height);
38 void ff_vvc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
39  const int16_t *sao_offset_val, int eo, int width, int height);
40 
41 #define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \
42  void ff_hevc_put_hevc_##fn##_h4_8_neon##ext args; \
43  void ff_hevc_put_hevc_##fn##_h6_8_neon##ext args; \
44  void ff_hevc_put_hevc_##fn##_h8_8_neon##ext args; \
45  void ff_hevc_put_hevc_##fn##_h12_8_neon##ext args; \
46  void ff_hevc_put_hevc_##fn##_h16_8_neon##ext args; \
47  void ff_hevc_put_hevc_##fn##_h32_8_neon##ext args;
48 
49 NEON8_FNPROTO_PARTIAL_6(qpel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
50  intptr_t mx, intptr_t my, int width),)
51 
52 NEON8_FNPROTO_PARTIAL_6(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
53  ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width),)
54 
55 NEON8_FNPROTO_PARTIAL_6(qpel_bi, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
56  ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t
57  mx, intptr_t my, int width),)
58 
59 #define NEON8_FNPROTO(fn, args, ext) \
60  void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \
61  void ff_hevc_put_hevc_##fn##6_8_neon##ext args; \
62  void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \
63  void ff_hevc_put_hevc_##fn##12_8_neon##ext args; \
64  void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \
65  void ff_hevc_put_hevc_##fn##24_8_neon##ext args; \
66  void ff_hevc_put_hevc_##fn##32_8_neon##ext args; \
67  void ff_hevc_put_hevc_##fn##48_8_neon##ext args; \
68  void ff_hevc_put_hevc_##fn##64_8_neon##ext args
69 
70 #define NEON8_FNPROTO_PARTIAL_4(fn, args, ext) \
71  void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \
72  void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \
73  void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \
74  void ff_hevc_put_hevc_##fn##64_8_neon##ext args
75 
76 #define NEON8_FNPROTO_PARTIAL_5(fn, args, ext) \
77  void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \
78  void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \
79  void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \
80  void ff_hevc_put_hevc_##fn##32_8_neon##ext args; \
81  void ff_hevc_put_hevc_##fn##64_8_neon##ext args
82 
83 NEON8_FNPROTO(pel_pixels, (int16_t *dst,
84  const uint8_t *src, ptrdiff_t srcstride,
85  int height, intptr_t mx, intptr_t my, int width),);
86 
87 NEON8_FNPROTO(pel_bi_pixels, (uint8_t *dst, ptrdiff_t dststride,
88  const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
89  int height, intptr_t mx, intptr_t my, int width),);
90 
91 NEON8_FNPROTO(epel_bi_h, (uint8_t *dst, ptrdiff_t dststride,
92  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
93  int height, intptr_t mx, intptr_t my, int width),);
94 
95 NEON8_FNPROTO(epel_bi_v, (uint8_t *dst, ptrdiff_t dststride,
96  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
97  int height, intptr_t mx, intptr_t my, int width),);
98 
99 NEON8_FNPROTO(epel_bi_hv, (uint8_t *dst, ptrdiff_t dststride,
100  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
101  int height, intptr_t mx, intptr_t my, int width),);
102 
103 NEON8_FNPROTO(epel_bi_hv, (uint8_t *dst, ptrdiff_t dststride,
104  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
105  int height, intptr_t mx, intptr_t my, int width), _i8mm);
106 
107 NEON8_FNPROTO(epel_v, (int16_t *dst,
108  const uint8_t *src, ptrdiff_t srcstride,
109  int height, intptr_t mx, intptr_t my, int width),);
110 
111 NEON8_FNPROTO(pel_uni_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
112  const uint8_t *_src, ptrdiff_t _srcstride,
113  int height, intptr_t mx, intptr_t my, int width),);
114 
115 NEON8_FNPROTO(pel_uni_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
116  const uint8_t *_src, ptrdiff_t _srcstride,
117  int height, int denom, int wx, int ox,
118  intptr_t mx, intptr_t my, int width),);
119 
120 NEON8_FNPROTO(epel_uni_v, (uint8_t *dst, ptrdiff_t dststride,
121  const uint8_t *src, ptrdiff_t srcstride,
122  int height, intptr_t mx, intptr_t my, int width),);
123 
124 NEON8_FNPROTO(epel_uni_hv, (uint8_t *dst, ptrdiff_t _dststride,
125  const uint8_t *src, ptrdiff_t srcstride,
126  int height, intptr_t mx, intptr_t my, int width),);
127 
128 NEON8_FNPROTO(epel_uni_hv, (uint8_t *dst, ptrdiff_t _dststride,
129  const uint8_t *src, ptrdiff_t srcstride,
130  int height, intptr_t mx, intptr_t my, int width), _i8mm);
131 
132 NEON8_FNPROTO(epel_uni_w_v, (uint8_t *_dst, ptrdiff_t _dststride,
133  const uint8_t *_src, ptrdiff_t _srcstride,
134  int height, int denom, int wx, int ox,
135  intptr_t mx, intptr_t my, int width),);
136 
137 NEON8_FNPROTO_PARTIAL_4(qpel_uni_w_v, (uint8_t *_dst, ptrdiff_t _dststride,
138  const uint8_t *_src, ptrdiff_t _srcstride,
139  int height, int denom, int wx, int ox,
140  intptr_t mx, intptr_t my, int width),);
141 
142 NEON8_FNPROTO(epel_h, (int16_t *dst,
143  const uint8_t *_src, ptrdiff_t _srcstride,
144  int height, intptr_t mx, intptr_t my, int width),);
145 
146 NEON8_FNPROTO(epel_hv, (int16_t *dst,
147  const uint8_t *src, ptrdiff_t srcstride,
148  int height, intptr_t mx, intptr_t my, int width), );
149 
150 NEON8_FNPROTO(epel_h, (int16_t *dst,
151  const uint8_t *_src, ptrdiff_t _srcstride,
152  int height, intptr_t mx, intptr_t my, int width), _i8mm);
153 
154 NEON8_FNPROTO(epel_hv, (int16_t *dst,
155  const uint8_t *src, ptrdiff_t srcstride,
156  int height, intptr_t mx, intptr_t my, int width), _i8mm);
157 
158 NEON8_FNPROTO(epel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
159  const uint8_t *_src, ptrdiff_t _srcstride,
160  int height, int denom, int wx, int ox,
161  intptr_t mx, intptr_t my, int width),);
162 
163 NEON8_FNPROTO(epel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
164  const uint8_t *_src, ptrdiff_t _srcstride,
165  int height, int denom, int wx, int ox,
166  intptr_t mx, intptr_t my, int width), _i8mm);
167 
168 NEON8_FNPROTO(qpel_h, (int16_t *dst,
169  const uint8_t *_src, ptrdiff_t _srcstride,
170  int height, intptr_t mx, intptr_t my, int width), _i8mm);
171 
172 NEON8_FNPROTO(qpel_v, (int16_t *dst,
173  const uint8_t *src, ptrdiff_t srcstride,
174  int height, intptr_t mx, intptr_t my, int width),);
175 
176 NEON8_FNPROTO(qpel_hv, (int16_t *dst,
177  const uint8_t *src, ptrdiff_t srcstride,
178  int height, intptr_t mx, intptr_t my, int width),);
179 
180 NEON8_FNPROTO(qpel_hv, (int16_t *dst,
181  const uint8_t *src, ptrdiff_t srcstride,
182  int height, intptr_t mx, intptr_t my, int width), _i8mm);
183 
184 NEON8_FNPROTO(qpel_uni_v, (uint8_t *dst, ptrdiff_t dststride,
185  const uint8_t *src, ptrdiff_t srcstride,
186  int height, intptr_t mx, intptr_t my, int width),);
187 
188 NEON8_FNPROTO(qpel_uni_hv, (uint8_t *dst, ptrdiff_t dststride,
189  const uint8_t *src, ptrdiff_t srcstride,
190  int height, intptr_t mx, intptr_t my, int width),);
191 
192 NEON8_FNPROTO(qpel_uni_hv, (uint8_t *dst, ptrdiff_t dststride,
193  const uint8_t *src, ptrdiff_t srcstride,
194  int height, intptr_t mx, intptr_t my, int width), _i8mm);
195 
196 NEON8_FNPROTO(qpel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
197  const uint8_t *_src, ptrdiff_t _srcstride,
198  int height, int denom, int wx, int ox,
199  intptr_t mx, intptr_t my, int width),);
200 
201 NEON8_FNPROTO(qpel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
202  const uint8_t *_src, ptrdiff_t _srcstride,
203  int height, int denom, int wx, int ox,
204  intptr_t mx, intptr_t my, int width), _i8mm);
205 
206 NEON8_FNPROTO(epel_uni_w_hv, (uint8_t *_dst, ptrdiff_t _dststride,
207  const uint8_t *_src, ptrdiff_t _srcstride,
208  int height, int denom, int wx, int ox,
209  intptr_t mx, intptr_t my, int width),);
210 
211 NEON8_FNPROTO(epel_uni_w_hv, (uint8_t *_dst, ptrdiff_t _dststride,
212  const uint8_t *_src, ptrdiff_t _srcstride,
213  int height, int denom, int wx, int ox,
214  intptr_t mx, intptr_t my, int width), _i8mm);
215 
216 NEON8_FNPROTO_PARTIAL_5(qpel_uni_w_hv, (uint8_t *_dst, ptrdiff_t _dststride,
217  const uint8_t *_src, ptrdiff_t _srcstride,
218  int height, int denom, int wx, int ox,
219  intptr_t mx, intptr_t my, int width),);
220 
221 NEON8_FNPROTO_PARTIAL_5(qpel_uni_w_hv, (uint8_t *_dst, ptrdiff_t _dststride,
222  const uint8_t *_src, ptrdiff_t _srcstride,
223  int height, int denom, int wx, int ox,
224  intptr_t mx, intptr_t my, int width), _i8mm);
225 
226 NEON8_FNPROTO(qpel_bi_v, (uint8_t *dst, ptrdiff_t dststride,
227  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
228  int height, intptr_t mx, intptr_t my, int width),);
229 
230 NEON8_FNPROTO(qpel_bi_hv, (uint8_t *dst, ptrdiff_t dststride,
231  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
232  int height, intptr_t mx, intptr_t my, int width),);
233 
234 NEON8_FNPROTO(qpel_bi_hv, (uint8_t *dst, ptrdiff_t dststride,
235  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
236  int height, intptr_t mx, intptr_t my, int width), _i8mm);
237 
238 #undef NEON8_FNPROTO_PARTIAL_4
239 #define NEON8_FNPROTO_PARTIAL_4(fn, args, ext) \
240  void ff_vvc_put_##fn##_h4_8_neon##ext args; \
241  void ff_vvc_put_##fn##_h8_8_neon##ext args; \
242  void ff_vvc_put_##fn##_h16_8_neon##ext args; \
243  void ff_vvc_put_##fn##_h32_8_neon##ext args;
244 
245 NEON8_FNPROTO_PARTIAL_4(qpel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
246  const int8_t *hf, const int8_t *vf, int width),)
247 
248 NEON8_FNPROTO_PARTIAL_4(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
249  ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width),)
250 
251 NEON8_FNPROTO_PARTIAL_4(epel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
252  const int8_t *hf, const int8_t *vf, int width),)
253 
254 #undef NEON8_FNPROTO_PARTIAL_6
255 #define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \
256  void ff_vvc_put_##fn##4_8_neon##ext args; \
257  void ff_vvc_put_##fn##8_8_neon##ext args; \
258  void ff_vvc_put_##fn##16_8_neon##ext args; \
259  void ff_vvc_put_##fn##32_8_neon##ext args; \
260  void ff_vvc_put_##fn##64_8_neon##ext args; \
261  void ff_vvc_put_##fn##128_8_neon##ext args
262 
263 NEON8_FNPROTO_PARTIAL_6(pel_pixels, (int16_t *dst,
264  const uint8_t *src, ptrdiff_t srcstride, int height,
265  const int8_t *hf, const int8_t *vf, int width),);
266 
267 NEON8_FNPROTO_PARTIAL_6(pel_uni_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
268  const uint8_t *_src, ptrdiff_t _srcstride, int height,
269  const int8_t *hf, const int8_t *vf, int width),);
270 
271 NEON8_FNPROTO_PARTIAL_6(pel_uni_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
272  const uint8_t *_src, ptrdiff_t _srcstride,
273  int height, int denom, int wx, int ox,
274  const int8_t *hf, const int8_t *vf, int width),);
275 
276 NEON8_FNPROTO_PARTIAL_6(qpel_h, (int16_t *dst,
277  const uint8_t *_src, ptrdiff_t _srcstride, int height,
278  const int8_t *hf, const int8_t *vf, int width), _i8mm);
279 
280 NEON8_FNPROTO_PARTIAL_6(epel_h, (int16_t *dst,
281  const uint8_t *_src, ptrdiff_t _srcstride, int height,
282  const int8_t *hf, const int8_t *vf, int width), _i8mm);
283 
284 void ff_vvc_put_qpel_v4_8_neon(int16_t *dst, const uint8_t *_src,
285  ptrdiff_t _srcstride, int height,
286  const int8_t *hf, const int8_t *vf, int width);
287 
288 void ff_vvc_put_qpel_v8_8_neon(int16_t *dst, const uint8_t *_src,
289  ptrdiff_t _srcstride, int height,
290  const int8_t *hf, const int8_t *vf, int width);
291 
292 NEON8_FNPROTO_PARTIAL_6(qpel_hv, (int16_t *dst,
293  const uint8_t *src, ptrdiff_t srcstride, int height,
294  const int8_t *hf, const int8_t *vf, int width),);
295 
296 NEON8_FNPROTO_PARTIAL_6(qpel_hv, (int16_t *dst,
297  const uint8_t *src, ptrdiff_t srcstride, int height,
298  const int8_t *hf, const int8_t *vf, int width), _i8mm);
299 
300 NEON8_FNPROTO_PARTIAL_6(epel_hv, (int16_t *dst,
301  const uint8_t *src, ptrdiff_t srcstride, int height,
302  const int8_t *hf, const int8_t *vf, int width),);
303 
304 NEON8_FNPROTO_PARTIAL_6(epel_hv, (int16_t *dst,
305  const uint8_t *src, ptrdiff_t srcstride, int height,
306  const int8_t *hf, const int8_t *vf, int width), _i8mm);
307 
308 #endif
_dst
uint8_t * _dst
Definition: dsp.h:52
ff_hevc_sao_edge_filter_16x16_8_neon
void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
src
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t const uint8_t * src
Definition: dsp.h:84
ff_vvc_put_qpel_v8_8_neon
void ff_vvc_put_qpel_v8_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width)
height
uint8_t ptrdiff_t const uint8_t ptrdiff_t int height
Definition: dsp.h:53
ff_hevc_sao_edge_filter_8x8_8_neon
void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
_src
uint8_t ptrdiff_t const uint8_t * _src
Definition: dsp.h:52
mx
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t mx
Definition: dsp.h:53
_srcstride
uint8_t ptrdiff_t const uint8_t ptrdiff_t _srcstride
Definition: dsp.h:53
ff_vvc_put_qpel_v4_8_neon
void ff_vvc_put_qpel_v4_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width)
my
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t my
Definition: dsp.h:53
srcstride
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t const uint8_t ptrdiff_t srcstride
Definition: dsp.h:84
NEON8_FNPROTO
#define NEON8_FNPROTO(fn, args, ext)
width
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int width
Definition: dsp.h:53
_dststride
uint8_t ptrdiff_t _dststride
Definition: dsp.h:52
ff_vvc_sao_edge_filter_8x8_8_neon
void ff_vvc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
ff_h26x_sao_band_filter_8x8_8_neon
void ff_h26x_sao_band_filter_8x8_8_neon(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, const int16_t *sao_offset_val, int sao_left_class, int width, int height)
NEON8_FNPROTO_PARTIAL_5
#define NEON8_FNPROTO_PARTIAL_5(fn, args, ext)
NEON8_FNPROTO_PARTIAL_4
#define NEON8_FNPROTO_PARTIAL_4(fn, args, ext)
Definition: dsp.h:239
src2
const pixel * src2
Definition: h264pred_template.c:422
hf
uint8_t ptrdiff_t const uint8_t ptrdiff_t int const int8_t * hf
Definition: dsp.h:249
NEON8_FNPROTO_PARTIAL_6
#define NEON8_FNPROTO_PARTIAL_6(fn, args, ext)
Definition: dsp.h:41
ff_vvc_sao_edge_filter_16x16_8_neon
void ff_vvc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
vf
uint8_t ptrdiff_t const uint8_t ptrdiff_t int const int8_t const int8_t * vf
Definition: dsp.h:249