FFmpeg
ops_tmpl_common.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "ops_backend.h"
22 
23 #ifndef BIT_DEPTH
24 # error Should only be included from ops_tmpl_*.c!
25 #endif
26 
27 #define WRAP_CONVERT_UINT(N) \
28 DECL_PATTERN(convert_uint##N) \
29 { \
30  u##N##block_t xu, yu, zu, wu; \
31  \
32  SWS_LOOP \
33  for (int i = 0; i < SWS_BLOCK_SIZE; i++) { \
34  if (X) \
35  xu[i] = x[i]; \
36  if (Y) \
37  yu[i] = y[i]; \
38  if (Z) \
39  zu[i] = z[i]; \
40  if (W) \
41  wu[i] = w[i]; \
42  } \
43  \
44  CONTINUE(xu, yu, zu, wu); \
45 } \
46  \
47 WRAP_COMMON_PATTERNS(convert_uint##N, \
48  .op = SWS_OP_CONVERT, \
49  .convert.to = SWS_PIXEL_U##N, \
50 );
51 
52 #if BIT_DEPTH != 8
54 #endif
55 
56 #if BIT_DEPTH != 16
58 #endif
59 
60 #if BIT_DEPTH != 32 || defined(IS_FLOAT)
62 #endif
63 
65 {
66  SWS_LOOP
67  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
68  if (X)
69  x[i] = impl->priv.px[0];
70  if (Y)
71  y[i] = impl->priv.px[1];
72  if (Z)
73  z[i] = impl->priv.px[2];
74  if (W)
75  w[i] = impl->priv.px[3];
76  }
77 
78  CONTINUE(x, y, z, w);
79 }
80 
81 #define WRAP_CLEAR(X, Y, Z, W) \
82 DECL_IMPL(clear, clear##_##X##Y##Z##W, X, Y, Z, W) \
83  \
84 DECL_ENTRY(clear##_##X##Y##Z##W, SWS_COMP_ALL, \
85  .setup = ff_sws_setup_clear, \
86  .op = SWS_OP_CLEAR, \
87  .clear.mask = SWS_COMP_MASK(X, Y, Z, W), \
88 );
89 
90 WRAP_CLEAR(0, 0, 0, 1) /* rgba alpha */
91 WRAP_CLEAR(1, 0, 0, 0) /* argb alpha */
92 WRAP_CLEAR(0, 1, 0, 0) /* ya alpha */
93 
94 WRAP_CLEAR(1, 1, 0, 0) /* vuya chroma */
95 WRAP_CLEAR(0, 1, 1, 0) /* yuva chroma */
96 WRAP_CLEAR(0, 0, 1, 1) /* ayuv chroma */
97 WRAP_CLEAR(1, 0, 1, 0) /* uyva chroma */
98 WRAP_CLEAR(0, 1, 0, 1) /* xvyu chroma */
99 
100 WRAP_CLEAR(0, 1, 1, 1) /* gray -> yuva */
101 WRAP_CLEAR(1, 0, 1, 1) /* gray -> ayuv */
102 WRAP_CLEAR(1, 1, 0, 1) /* gray -> vuya */
103 
105 {
106  SWS_LOOP
107  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
108  if (X)
109  x[i] = FFMIN(x[i], impl->priv.px[0]);
110  if (Y)
111  y[i] = FFMIN(y[i], impl->priv.px[1]);
112  if (Z)
113  z[i] = FFMIN(z[i], impl->priv.px[2]);
114  if (W)
115  w[i] = FFMIN(w[i], impl->priv.px[3]);
116  }
117 
118  CONTINUE(x, y, z, w);
119 }
120 
122 {
123  SWS_LOOP
124  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
125  if (X)
126  x[i] = FFMAX(x[i], impl->priv.px[0]);
127  if (Y)
128  y[i] = FFMAX(y[i], impl->priv.px[1]);
129  if (Z)
130  z[i] = FFMAX(z[i], impl->priv.px[2]);
131  if (W)
132  w[i] = FFMAX(w[i], impl->priv.px[3]);
133  }
134 
135  CONTINUE(x, y, z, w);
136 }
137 
139  .op = SWS_OP_MIN,
140  .setup = ff_sws_setup_clamp,
141 );
142 
144  .op = SWS_OP_MAX,
145  .setup = ff_sws_setup_clamp,
146 );
147 
149 {
150  const pixel_t scale = impl->priv.px[0];
151 
152  SWS_LOOP
153  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
154  if (X)
155  x[i] *= scale;
156  if (Y)
157  y[i] *= scale;
158  if (Z)
159  z[i] *= scale;
160  if (W)
161  w[i] *= scale;
162  }
163 
164  CONTINUE(x, y, z, w);
165 }
166 
168  .op = SWS_OP_SCALE,
169  .setup = ff_sws_setup_scale,
170  .flexible = true,
171 );
172 
174 {
175  const SwsFilterWeights *filter = params->op->rw.kernel;
176  static_assert(sizeof(out->priv.ptr) <= sizeof(int32_t[2]),
177  ">8 byte pointers not supported");
178 
179  /* Pre-convert weights to float */
180  float *weights = av_calloc(filter->num_weights, sizeof(float));
181  if (!weights)
182  return AVERROR(ENOMEM);
183 
184  for (int i = 0; i < filter->num_weights; i++)
185  weights[i] = (float) filter->weights[i] / SWS_FILTER_SCALE;
186 
187  out->priv.ptr = weights;
188  out->priv.i32[2] = filter->filter_size;
189  out->free = ff_op_priv_free;
190  return 0;
191 }
192 
193 /* Fully general vertical planar filter case */
194 DECL_READ(filter_v, const int elems)
195 {
196  const SwsOpExec *exec = iter->exec;
197  const float *restrict weights = impl->priv.ptr;
198  const int filter_size = impl->priv.i32[2];
199  weights += filter_size * iter->y;
200 
201  f32block_t xs, ys, zs, ws;
202  memset(xs, 0, sizeof(xs));
203  if (elems > 1)
204  memset(ys, 0, sizeof(ys));
205  if (elems > 2)
206  memset(zs, 0, sizeof(zs));
207  if (elems > 3)
208  memset(ws, 0, sizeof(ws));
209 
210  for (int j = 0; j < filter_size; j++) {
211  const float weight = weights[j];
212 
213  SWS_LOOP
214  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
215  xs[i] += weight * in0[i];
216  if (elems > 1)
217  ys[i] += weight * in1[i];
218  if (elems > 2)
219  zs[i] += weight * in2[i];
220  if (elems > 3)
221  ws[i] += weight * in3[i];
222  }
223 
224  in0 = bump_ptr(in0, exec->in_stride[0]);
225  if (elems > 1)
226  in1 = bump_ptr(in1, exec->in_stride[1]);
227  if (elems > 2)
228  in2 = bump_ptr(in2, exec->in_stride[2]);
229  if (elems > 3)
230  in3 = bump_ptr(in3, exec->in_stride[3]);
231  }
232 
233  for (int i = 0; i < elems; i++)
234  iter->in[i] += sizeof(block_t);
235 
236  CONTINUE(xs, ys, zs, ws);
237 }
238 
240 {
241  SwsFilterWeights *filter = params->op->rw.kernel;
242  out->priv.ptr = av_refstruct_ref(filter->weights);
243  out->priv.i32[2] = filter->filter_size;
244  out->free = ff_op_priv_unref;
245  return 0;
246 }
247 
248 /* Fully general horizontal planar filter case */
249 DECL_READ(filter_h, const int elems)
250 {
251  const SwsOpExec *exec = iter->exec;
252  const int *restrict weights = impl->priv.ptr;
253  const int filter_size = impl->priv.i32[2];
254  const float scale = 1.0f / SWS_FILTER_SCALE;
255  const int xpos = iter->x;
256  weights += filter_size * iter->x;
257 
258  f32block_t xs, ys, zs, ws;
259  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
260  const int offset = exec->in_offset_x[xpos + i];
261  pixel_t *start0 = bump_ptr(in0, offset);
262  pixel_t *start1 = bump_ptr(in1, offset);
263  pixel_t *start2 = bump_ptr(in2, offset);
264  pixel_t *start3 = bump_ptr(in3, offset);
265 
266  inter_t sx = 0, sy = 0, sz = 0, sw = 0;
267  for (int j = 0; j < filter_size; j++) {
268  const int weight = weights[j];
269  sx += weight * start0[j];
270  if (elems > 1)
271  sy += weight * start1[j];
272  if (elems > 2)
273  sz += weight * start2[j];
274  if (elems > 3)
275  sw += weight * start3[j];
276  }
277 
278  xs[i] = (float) sx * scale;
279  if (elems > 1)
280  ys[i] = (float) sy * scale;
281  if (elems > 2)
282  zs[i] = (float) sz * scale;
283  if (elems > 3)
284  ws[i] = (float) sw * scale;
285 
286  weights += filter_size;
287  }
288 
289  CONTINUE(xs, ys, zs, ws);
290 }
291 
292 #define WRAP_FILTER(FUNC, DIR, ELEMS, SUFFIX) \
293 static av_flatten void fn(FUNC##ELEMS##SUFFIX)(SwsOpIter *restrict iter, \
294  const SwsOpImpl *restrict impl, \
295  void *restrict x, void *restrict y,\
296  void *restrict z, void *restrict w)\
297 { \
298  CALL_READ(FUNC##SUFFIX, ELEMS); \
299 } \
300  \
301 DECL_ENTRY(FUNC##ELEMS##SUFFIX, SWS_COMP_ELEMS(ELEMS), \
302  .op = SWS_OP_READ, \
303  .setup = fn(setup_filter##SUFFIX), \
304  .rw.elems = ELEMS, \
305  .rw.filter = SWS_OP_FILTER_##DIR, \
306 );
307 
308 WRAP_FILTER(filter, V, 1, _v)
309 WRAP_FILTER(filter, V, 2, _v)
310 WRAP_FILTER(filter, V, 3, _v)
311 WRAP_FILTER(filter, V, 4, _v)
312 
313 WRAP_FILTER(filter, H, 1, _h)
314 WRAP_FILTER(filter, H, 2, _h)
315 WRAP_FILTER(filter, H, 3, _h)
316 WRAP_FILTER(filter, H, 4, _h)
317 
318 static void fn(process)(const SwsOpExec *exec, const void *priv,
319  const int bx_start, const int y_start,
320  int bx_end, int y_end)
321 {
322  const SwsOpChain *chain = priv;
323  const SwsOpImpl *impl = chain->impl;
324  u32block_t x, y, z, w; /* allocate enough space for any intermediate */
325 
326  SwsOpIter iterdata;
327  SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */
328  iter->exec = exec;
329  for (int i = 0; i < 4; i++) {
330  iter->in[i] = (uintptr_t) exec->in[i];
331  iter->out[i] = (uintptr_t) exec->out[i];
332  }
333 
334  for (iter->y = y_start; iter->y < y_end; iter->y++) {
335  for (int block = bx_start; block < bx_end; block++) {
336  iter->x = block * SWS_BLOCK_SIZE;
337  CONTINUE(x, y, z, w);
338  }
339 
340  const int y_bump = exec->in_bump_y ? exec->in_bump_y[iter->y] : 0;
341  for (int i = 0; i < 4; i++) {
342  iter->in[i] += exec->in_bump[i] + y_bump * exec->in_stride[i];
343  iter->out[i] += exec->out_bump[i];
344  }
345  }
346 }
WRAP_CLEAR
#define WRAP_CLEAR(X, Y, Z, W)
Definition: ops_tmpl_common.c:81
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
ops_backend.h
f32block_t
float f32block_t[SWS_BLOCK_SIZE]
Definition: ops_backend.c:48
out
static FILE * out
Definition: movenc.c:55
SwsOpIter::exec
const SwsOpExec * exec
Definition: ops_backend.h:52
block_t
#define block_t
Definition: ops_tmpl_float.c:34
ff_sws_setup_scale
int ff_sws_setup_scale(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:248
SwsFilterWeights
Represents a computed filter kernel.
Definition: filters.h:64
CONTINUE
#define CONTINUE(X, Y, Z, W)
Definition: ops_backend.h:115
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
SwsOpIter
Copyright (C) 2025 Niklas Haas.
Definition: ops_backend.h:46
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
SwsOpExec::in_stride
ptrdiff_t in_stride[4]
Definition: ops_dispatch.h:41
ff_op_priv_unref
static void ff_op_priv_unref(SwsOpPriv *priv)
Definition: ops_chain.h:154
SwsOpIter::x
int x
Definition: ops_backend.h:49
WRAP_CONVERT_UINT
#define WRAP_CONVERT_UINT(N)
Copyright (C) 2025 Niklas Haas.
Definition: ops_tmpl_common.c:27
DECL_PATTERN
DECL_PATTERN(clear)
Definition: ops_tmpl_common.c:64
weight
const h264_weight_func weight
Definition: h264dsp_init.c:33
fn
Definition: ops_tmpl_float.c:123
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:66
float
float
Definition: af_crystalizer.c:122
W
#define W(a, i, v)
Definition: jpegls.h:119
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SWS_LOOP
#define SWS_LOOP
Definition: ops_backend.h:58
SwsOpImpl
Definition: ops_chain.h:71
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:64
DECL_READ
DECL_READ(filter_v, const int elems)
Definition: ops_tmpl_common.c:194
u32block_t
uint32_t u32block_t[SWS_BLOCK_SIZE]
Definition: ops_backend.c:47
SwsOpChain::impl
SwsOpImpl impl[SWS_MAX_OPS+1]
Definition: ops_chain.h:86
SWS_BLOCK_SIZE
#define SWS_BLOCK_SIZE
Copyright (C) 2025 Niklas Haas.
Definition: ops_backend.c:42
SwsOpIter::out
uintptr_t out[4]
Definition: ops_backend.h:48
SwsOpExec
Copyright (C) 2026 Niklas Haas.
Definition: ops_dispatch.h:35
bump_ptr
#define bump_ptr(ptr, bump)
Definition: ops_backend.h:71
xs
#define xs(width, name, var, subs,...)
Definition: cbs_vp9.c:305
SwsOpChain
Compiled "chain" of operations, which can be dispatched efficiently.
Definition: ops_chain.h:84
V
#define V
Definition: avdct.c:32
SWS_FILTER_SCALE
@ SWS_FILTER_SCALE
14-bit coefficients are picked to fit comfortably within int16_t for efficient SIMD processing (e....
Definition: filters.h:40
ff_sws_setup_clamp
int ff_sws_setup_clamp(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:263
SwsOpIter::in
uintptr_t in[4]
Definition: ops_backend.h:47
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
WRAP_FILTER
#define WRAP_FILTER(FUNC, DIR, ELEMS, SUFFIX)
Definition: ops_tmpl_common.c:292
process
static void fn() process(const SwsOpExec *exec, const void *priv, const int bx_start, const int y_start, int bx_end, int y_end)
Definition: ops_tmpl_common.c:318
pixel_t
#define pixel_t
Definition: ops_tmpl_float.c:32
av_refstruct_ref
void * av_refstruct_ref(void *obj)
Create a new reference to an object managed via this API, i.e.
Definition: refstruct.c:140
H
#define H
Definition: pixlet.c:39
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
Y
#define Y
Definition: boxblur.h:37
DECL_SETUP
DECL_SETUP(setup_filter_v, params, out)
Definition: ops_tmpl_common.c:173
SwsOpExec::in_offset_x
int32_t * in_offset_x
Pixel offset map; for horizontal scaling, in bytes.
Definition: ops_dispatch.h:80
weights
static const int weights[]
Definition: hevc_pel.c:32
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:264
ff_op_priv_free
static void ff_op_priv_free(SwsOpPriv *priv)
Definition: ops_chain.h:149
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:65
inter_t
#define inter_t
Definition: ops_tmpl_float.c:33
w
uint8_t w
Definition: llvidencdsp.c:39
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:278
setup_filter_v
static int setup_filter_v(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:316
X
@ X
Definition: vf_addroi.c:27
int32_t
int32_t
Definition: audioconvert.c:56
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
WRAP_COMMON_PATTERNS
WRAP_COMMON_PATTERNS(min,.op=SWS_OP_MIN,.setup=ff_sws_setup_clamp,)
SwsOpIter::y
int y
Definition: ops_backend.h:49
min
float min
Definition: vorbis_enc_data.h:429
setup_filter_h
static int setup_filter_h(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:346