FFmpeg
uops_backend.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2026 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 
23 #include "uops_tmpl.h"
24 
25 /**
26  * We want to disable FP contraction because this is a reference backend that
27  * establishes a bit-exact reference result.
28  */
29 #ifdef __clang__
30 #pragma STDC FP_CONTRACT OFF
31 #elif AV_GCC_VERSION_AT_LEAST(4, 8)
32 #pragma GCC optimize ("fp-contract=off")
33 #elif defined(_MSC_VER)
34 #pragma fp_contract (off)
35 #endif
36 
37 #if AV_GCC_VERSION_AT_LEAST(4, 4)
38 #pragma GCC optimize ("finite-math-only")
39 #endif
40 
41 /* Integer types */
42 #define IS_FLOAT 0
43 # define BIT_DEPTH 8
44 # include "uops_tmpl.c"
45 # undef BIT_DEPTH
46 # define BIT_DEPTH 16
47 # include "uops_tmpl.c"
48 # undef BIT_DEPTH
49 # define BIT_DEPTH 32
50 # include "uops_tmpl.c"
51 # undef BIT_DEPTH
52 #undef IS_FLOAT
53 
54 /* Floating point types */
55 #define IS_FLOAT 1
56 # define BIT_DEPTH 32
57 # include "uops_tmpl.c"
58 # undef BIT_DEPTH
59 #undef IS_FLOAT
60 
61 /* Expanded as new uop types are implemented in the C/template backend */
62 #define REF_ALL_UOPS(TYPE) \
63  SWS_FOR(TYPE, READ_PLANAR, REF_ENTRY) \
64  SWS_FOR(TYPE, READ_PLANAR_FV, REF_ENTRY) \
65  SWS_FOR(TYPE, READ_PLANAR_FH, REF_ENTRY) \
66  SWS_FOR(TYPE, READ_PACKED, REF_ENTRY) \
67  SWS_FOR(TYPE, READ_NIBBLE, REF_ENTRY) \
68  SWS_FOR(TYPE, READ_BIT, REF_ENTRY) \
69  SWS_FOR(TYPE, PERMUTE, REF_ENTRY) \
70  SWS_FOR(TYPE, COPY, REF_ENTRY) \
71  SWS_FOR(TYPE, WRITE_PLANAR, REF_ENTRY) \
72  SWS_FOR(TYPE, WRITE_PACKED, REF_ENTRY) \
73  SWS_FOR(TYPE, WRITE_NIBBLE, REF_ENTRY) \
74  SWS_FOR(TYPE, WRITE_BIT, REF_ENTRY) \
75  SWS_FOR(TYPE, SWAP_BYTES, REF_ENTRY) \
76  SWS_FOR(TYPE, EXPAND_BIT, REF_ENTRY) \
77  SWS_FOR(TYPE, EXPAND_PAIR, REF_ENTRY) \
78  SWS_FOR(TYPE, EXPAND_QUAD, REF_ENTRY) \
79  SWS_FOR(TYPE, TO_U8, REF_ENTRY) \
80  SWS_FOR(TYPE, TO_U16, REF_ENTRY) \
81  SWS_FOR(TYPE, TO_U32, REF_ENTRY) \
82  SWS_FOR(TYPE, TO_F32, REF_ENTRY) \
83  SWS_FOR(TYPE, SCALE, REF_ENTRY) \
84  SWS_FOR(TYPE, ADD, REF_ENTRY) \
85  SWS_FOR(TYPE, MIN, REF_ENTRY) \
86  SWS_FOR(TYPE, MAX, REF_ENTRY) \
87  SWS_FOR(TYPE, UNPACK, REF_ENTRY) \
88  SWS_FOR(TYPE, PACK, REF_ENTRY) \
89  SWS_FOR(TYPE, LSHIFT, REF_ENTRY) \
90  SWS_FOR(TYPE, RSHIFT, REF_ENTRY) \
91  SWS_FOR(TYPE, CLEAR, REF_ENTRY) \
92  SWS_FOR(TYPE, LINEAR, REF_ENTRY) \
93  SWS_FOR(TYPE, DITHER, REF_ENTRY) \
94  /* end of macro */
95 
96 static const SwsOpTable op_table = {
98  .entries = {
103  NULL
104  },
105 };
106 
107 static void process(const SwsOpExec *exec, const void *priv,
108  const int bx_start, const int y_start,
109  int bx_end, int y_end)
110 {
111  const SwsOpChain *chain = priv;
112  const SwsOpImpl *impl = chain->impl;
113  block_t x, y, z, w; /* allocate enough space for any intermediate */
114 
115  SwsOpIter iterdata;
116  SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */
117  iter->exec = exec;
118  for (int i = 0; i < 4; i++) {
119  iter->in[i] = (uintptr_t) exec->in[i];
120  iter->out[i] = (uintptr_t) exec->out[i];
121  }
122 
123  for (iter->y = y_start; iter->y < y_end; iter->y++) {
124  for (int block = bx_start; block < bx_end; block++) {
125  iter->x = block * SWS_BLOCK_SIZE;
126  CONTINUE(&x, &y, &z, &w);
127  }
128 
129  const int y_bump = exec->in_bump_y ? exec->in_bump_y[iter->y] : 0;
130  for (int i = 0; i < 4; i++) {
131  iter->in[i] += exec->in_bump[i] + y_bump * exec->in_stride[i];
132  iter->out[i] += exec->out_bump[i];
133  }
134  }
135 }
136 
137 static int compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
138 {
139  int ret;
140 
142  if (!chain)
143  return AVERROR(ENOMEM);
144 
146  if (!uops) {
147  ret = AVERROR(ENOMEM);
148  goto fail;
149  }
150 
151  ret = ff_sws_ops_translate(ctx, ops, 0, uops);
152  if (ret < 0)
153  goto fail;
154 
155  av_assert0(uops->num_ops > 0);
156  for (int i = 0; i < uops->num_ops; i++) {
157  const SwsOpTable *table = &op_table;
158  ret = ff_sws_uop_lookup(ctx, &table, 1, &uops->ops[i],
159  SWS_BLOCK_SIZE, chain);
160  if (ret < 0)
161  goto fail;
162  }
163 
164  *out = (SwsCompiledOp) {
165  .slice_align = 1,
166  .block_size = SWS_BLOCK_SIZE,
167  .cpu_flags = chain->cpu_flags,
168  .over_read = chain->over_read,
169  .over_write = chain->over_write,
170  .priv = chain,
172  .func = process,
173  };
174 
175  av_log(ctx, AV_LOG_DEBUG, "Compiled micro-ops:\n");
176  for (int i = 0; i < uops->num_ops; i++) {
177  char name[SWS_UOP_NAME_MAX];
178  ff_sws_uop_name(&uops->ops[i], name);
179  av_log(ctx, AV_LOG_DEBUG, " %s\n", name);
180  }
181 
182  ff_sws_uop_list_free(&uops);
183  return 0;
184 
185 fail:
186  ff_sws_uop_list_free(&uops);
187  ff_sws_op_chain_free(chain);
188  return ret;
189 }
190 
192  .name = "c",
193  .flags = SWS_BACKEND_C,
194  .compile = compile,
195  .hw_format = AV_PIX_FMT_NONE,
196 };
SwsOpTable
Copyright (C) 2025 Niklas Haas.
Definition: ops_chain.h:154
name
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name
Definition: writing_filters.txt:88
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
out
static FILE * out
Definition: movenc.c:55
REF_ALL_UOPS
#define REF_ALL_UOPS(TYPE)
Definition: uops_backend.c:62
SwsOpExec::in_bump
ptrdiff_t in_bump[4]
Pointer bump, difference between stride and processed line size.
Definition: ops_dispatch.h:51
SwsOpIter::exec
const SwsOpExec * exec
Definition: uops_tmpl.h:62
SwsOpExec::in
const uint8_t * in[4]
Definition: ops_dispatch.h:37
U32
@ U32
Definition: sw_ops.c:43
ff_sws_uop_lookup
int ff_sws_uop_lookup(SwsContext *ctx, const SwsOpTable *const tables[], int num_tables, const SwsUOp *uop, const int block_size, SwsOpChain *chain)
"Compile" a single uop by looking it up in a list of fixed size uop tables, in decreasing order of pr...
Definition: ops_chain.c:116
table
static const uint16_t table[]
Definition: prosumer.c:203
SwsOpIter
Internal context holding per-iter execution data.
Definition: uops_tmpl.h:56
SwsOpExec::in_stride
ptrdiff_t in_stride[4]
Definition: ops_dispatch.h:41
SwsOpBackend::name
const char * name
Definition: ops_dispatch.h:134
SWS_UOP_NAME_MAX
#define SWS_UOP_NAME_MAX
Generate a unique name for a SwsUOp.
Definition: uops.h:232
SwsOpChain::cpu_flags
int cpu_flags
Definition: ops_chain.h:89
SwsOpIter::x
int x
Definition: uops_tmpl.h:59
SwsOpTable::block_size
int block_size
Definition: ops_chain.h:156
SwsOpChain::over_read
int over_read
Definition: ops_chain.h:90
SwsOpChain::free
void(* free[SWS_MAX_OPS+1])(SwsOpPriv *)
Definition: ops_chain.h:87
avassert.h
ff_sws_op_chain_alloc
SwsOpChain * ff_sws_op_chain_alloc(void)
Definition: ops_chain.c:29
backend_c
const SwsOpBackend backend_c
Copyright (C) 2025 Niklas Haas.
Definition: uops_backend.c:191
SwsOpExec::in_bump_y
int32_t * in_bump_y
Line bump; determines how many additional lines to advance (after incrementing normally to the next l...
Definition: ops_dispatch.h:72
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:42
SwsOpImpl
Definition: ops_chain.h:71
AV_LOG_DEBUG
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
Definition: log.h:231
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
SwsOpChain::impl
SwsOpImpl impl[SWS_MAX_OPS+1]
Definition: ops_chain.h:86
SwsOpBackend
Definition: ops_dispatch.h:133
SwsOpIter::out
uintptr_t out[4]
Definition: uops_tmpl.h:58
SwsOpExec
Copyright (C) 2026 Niklas Haas.
Definition: ops_dispatch.h:35
fail
#define fail
Definition: test.h:478
SwsOpChain
Compiled "chain" of operations, which can be dispatched efficiently.
Definition: ops_chain.h:84
NULL
#define NULL
Definition: coverity.c:32
compile
static int compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
Definition: uops_backend.c:137
block_t
Definition: uops_tmpl.h:43
SWS_BLOCK_SIZE
#define SWS_BLOCK_SIZE
Copyright (C) 2026 Niklas Haas.
Definition: uops_tmpl.h:40
process
static void process(const SwsOpExec *exec, const void *priv, const int bx_start, const int y_start, int bx_end, int y_end)
Definition: uops_backend.c:107
SwsOpIter::in
uintptr_t in[4]
Definition: uops_tmpl.h:57
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
op_table
static const SwsOpTable op_table
Definition: uops_backend.c:96
ff_sws_uop_list_alloc
SwsUOpList * ff_sws_uop_list_alloc(void)
Definition: uops.c:382
ff_sws_op_chain_free_cb
void ff_sws_op_chain_free_cb(void *ptr)
Definition: ops_chain.c:34
SwsOpExec::out
uint8_t * out[4]
Definition: ops_dispatch.h:38
U8
@ U8
Definition: sw_ops.c:41
CONTINUE
#define CONTINUE(...)
Definition: uops_tmpl.h:107
ff_sws_op_chain_free
static void ff_sws_op_chain_free(SwsOpChain *chain)
Definition: ops_chain.h:96
ff_sws_ops_translate
int ff_sws_ops_translate(SwsContext *ctx, const SwsOpList *ops, SwsUOpFlags flags, SwsUOpList *uops)
Translate a list of operations down to micro-ops, which can be further optimized and then directly ex...
Definition: uops.c:850
ret
ret
Definition: filter_design.txt:187
F32
@ F32
Definition: sw_ops.c:44
SwsUOpList::num_ops
int num_ops
Definition: uops.h:237
SwsCompiledOp
Definition: ops_dispatch.h:100
ff_sws_uop_list_free
void ff_sws_uop_list_free(SwsUOpList **p_ops)
Definition: uops.c:368
SWS_BACKEND_C
@ SWS_BACKEND_C
Template-based C reference implementation.
Definition: swscale.h:116
uops_tmpl.c
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
ff_sws_uop_name
void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX])
Definition: uops.c:129
SwsUOpList
Definition: uops.h:235
w
uint8_t w
Definition: llvidencdsp.c:39
uops_tmpl.h
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
U16
@ U16
Definition: sw_ops.c:42
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:258
SwsContext
Main external API structure.
Definition: swscale.h:229
SwsUOpList::ops
SwsUOp * ops
Definition: uops.h:236
SwsOpExec::out_bump
ptrdiff_t out_bump[4]
Definition: ops_dispatch.h:52
SwsOpIter::y
int y
Definition: uops_tmpl.h:59
SwsOpChain::over_write
int over_write
Definition: ops_chain.h:91