FFmpeg
uops.h
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef SWSCALE_UOPS_H
22 #define SWSCALE_UOPS_H
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27 
28 /***************************************************************************
29  * Note: This header must be usable at build time, to generate asm sources *
30  ***************************************************************************/
31 
32 #include "libavutil/attributes.h"
33 
34 typedef struct SwsContext SwsContext;
35 typedef struct SwsFilterWeights SwsFilterWeights;
36 typedef struct SwsOpList SwsOpList;
37 
38 typedef enum SwsPixelType {
45 } SwsPixelType;
46 
50 
51 typedef union SwsPixel {
52  char data[4];
53 
54  uint8_t u8;
55  uint16_t u16;
56  uint32_t u32;
57  float f32;
58 } SwsPixel;
59 
60 /* Ensures (SwsPixel) {0} is properly initialized to all zeros */
61 static_assert(sizeof(SwsPixel) == sizeof(char[4]), "SwsPixel size mismatch");
62 
63 /**
64  * Bit-mask of components. Exact meaning depends on the usage context.
65  */
66 typedef uint8_t SwsCompMask;
67 enum {
69  SWS_COMP_ALL = 0xF,
70 #define SWS_COMP(X) (1 << (X))
71 #define SWS_COMP_TEST(mask, X) (!!((mask) & SWS_COMP(X)))
72 #define SWS_COMP_INV(mask) ((mask) ^ SWS_COMP_ALL)
73 #define SWS_COMP_ELEMS(N) ((1 << (N)) - 1)
74 #define SWS_COMP_MASK(X, Y, Z, W) \
75  (((X) ? SWS_COMP(0) : 0) | \
76  ((Y) ? SWS_COMP(1) : 0) | \
77  ((Z) ? SWS_COMP(2) : 0) | \
78  ((W) ? SWS_COMP(3) : 0))
79 };
80 
81 
82 #define ff_sws_comp_mask_str(mask) ff_sws_comp_mask_print(mask, (char[5]){0})
83 static inline char *ff_sws_comp_mask_print(SwsCompMask mask, char buf[5])
84 {
85  char *ptr = buf;
86  for (int c = 0; c < 4; c++) {
87  if (SWS_COMP_TEST(mask, c))
88  *ptr++ = "xyzw"[c];
89  }
90  *ptr = '\0';
91  return buf;
92 }
93 
94 typedef uint32_t SwsUOpFlags;
95 typedef enum SwsUOpFlagBits {
97  SWS_UOP_FLAG_FMA = (1 << 0), /* platform supports FMA ops */
98  SWS_UOP_FLAG_MOVE = (1 << 1), /* platform supports SWS_UOP_MOVE */
100 
101 typedef enum SwsUOpType {
103 
104  /* Read/write uops; mask = components to read/write */
105  SWS_UOP_READ_PLANAR, /* simple planar byte-aligned read */
106  SWS_UOP_READ_PLANAR_FH, /* planar read with horizontal filter */
107  SWS_UOP_READ_PLANAR_FV, /* planar read with vertical filter */
109  SWS_UOP_READ_PACKED, /* simple packed byte-aligned read */
110  SWS_UOP_READ_NIBBLE, /* fractional read (4 bits) from single plane */
111  SWS_UOP_READ_BIT, /* fractional read (1 bit) from single plane */
112  SWS_UOP_READ_PALETTE, /* indexed read from palette in plane 1 */
113 
114  SWS_UOP_WRITE_PLANAR, /* simple planar byte-aligned write */
115  SWS_UOP_WRITE_PACKED, /* simple packed byte-aligned write */
116  SWS_UOP_WRITE_NIBBLE, /* fractional write (4 bits) to single plane */
117  SWS_UOP_WRITE_BIT, /* fractional write (1 bit) to single plane */
118 
119  /* Data rearrangement uops; mask = non-trivial and needed components */
120  SWS_UOP_PERMUTE, /* rearrange components (no duplicates) */
121  SWS_UOP_COPY, /* copy/duplicate components */
122  SWS_UOP_MOVE, /* series of register-register assignments */
123 
124  /* Data conversion / manipulation uops; mask = affected components */
125  SWS_UOP_SWAP_BYTES, /* swap byte order in components */
126  SWS_UOP_EXPAND_BIT, /* expand low-order bit to all bits in type */
127  SWS_UOP_EXPAND_PAIR, /* expand bytes in pairs (16 bit) */
128  SWS_UOP_EXPAND_QUAD, /* expand bytes in quads (32 bit) */
129  SWS_UOP_TO_U8, /* cast pixel values to SWS_PIXEL_U8 */
130  SWS_UOP_TO_U16, /* cast pixel values to SWS_PIXEL_U16 */
131  SWS_UOP_TO_U32, /* cast pixel values to SWS_PIXEL_U32 */
132  SWS_UOP_TO_F32, /* cast pixel values to SWS_PIXEL_F32 */
133 
134  /* Arithmetic uops */
135  SWS_UOP_SCALE, /* multiply masked components by scalar */
136  SWS_UOP_ADD, /* add vec4 to masked components */
137  SWS_UOP_MIN, /* min(x, vec4) on masked components */
138  SWS_UOP_MAX, /* max(x, vec4) on masked components */
139 
140  /* Identical to corresponding SwsOpType */
141  SWS_UOP_UNPACK, /* mask = nonzero components in pack pattern */
142  SWS_UOP_PACK, /* mask = nonzero components in pack pattern */
143  SWS_UOP_LSHIFT, /* mask = components to shift */
144  SWS_UOP_RSHIFT, /* mask = components to shift */
145  SWS_UOP_CLEAR, /* mask = components to clear */
146  SWS_UOP_LINEAR, /* mask = non-trivial output rows */
147  SWS_UOP_LINEAR_FMA, /* with SWS_UOP_FLAG_FMA */
148  SWS_UOP_DITHER, /* mask = components to dither */
149 
150  /* Platform-specific uops would go here */
152 } SwsUOpType;
153 
154 typedef struct SwsFilterUOp {
155  SwsPixelType type; /* pixel type to store result as */
156 } SwsFilterUOp;
157 
158 typedef struct SwsShiftUOp {
159  uint8_t amount;
160 } SwsShiftUOp;
161 
162 typedef struct SwsSwizzleUOp {
163  uint8_t in[4]; /* input component for each output component */
164 } SwsSwizzleUOp;
165 
166 typedef struct SwsMoveUOp {
167  /* The worst case number of moves (for two independent cycles) */
168  #define SWS_UOP_MOVE_MAX 6
170 
171  /* This may involve a temporary register (index -1) */
172  int8_t dst[SWS_UOP_MOVE_MAX]; /* destination register index */
173  int8_t src[SWS_UOP_MOVE_MAX]; /* source register index */
174 } SwsMoveUOp;
175 
176 typedef struct SwsPackUOp {
177  uint8_t pattern[4]; /* bit depth pattern, from MSB to LSB */
178 } SwsPackUOp;
179 
180 typedef struct SwsClearUOp {
181  SwsCompMask one; /* mask of coefficients equal to all 1s */
182  SwsCompMask zero; /* mask of coefficients equal to all 0s */
183 } SwsClearUOp;
184 
185 typedef struct SwsLinearUOp {
186  uint32_t one; /* mask of coefficients equal to one */
187  uint32_t zero; /* mask of coefficients equal to zero */
188 
189  /* for SWS_UOP_LINEAR_FMA only */
190  uint32_t exact; /* mask of coefficients whose product is exact */
191 } SwsLinearUOp;
192 
193 #define SWS_MASK(I, J) (1 << (5 * (I) + (J)))
194 #define SWS_MASK_OFF(I) SWS_MASK(I, 4)
195 #define SWS_MASK_ROW(I) (0x1F << (5 * (I)))
196 #define SWS_MASK_COL(J) (0x8421 << J)
197 #define SWS_MASK_DIAG4 (0x41041)
198 
199 typedef struct SwsDitherUOp {
200  uint8_t y_offset[4];
201  uint8_t size_log2;
202 } SwsDitherUOp;
203 
204 /**
205  * Computes (1 << size_log2) + MAX(y_offset). The dither matrix attached to
206  * the SwsUOp is always pre-padded to this number of lines.
207  */
209 
210 typedef union SwsUOpParams {
211  SwsFilterUOp filter; /* for SWS_UOP_READ_*_FV/FH */
219 } SwsUOpParams;
220 
221 typedef struct SwsUOp {
222  /* These fields uniquely identify the uop implementation */
227 
228  /* Constant data for this uop; not part of the unique identifier */
229  union {
230  SwsFilterWeights *kernel; /* refstruct */
231  SwsPixel *ptr; /* refstruct */
234  SwsPixel mat4[4][5]; /* row major */
235  void *opaque; /* reserved for internal use */
236  } data;
237 } SwsUOp;
238 
239 /**
240  * Compare two SwsUOps for equality (excluding constant data).
241  */
242 int ff_sws_uop_cmp(const SwsUOp *a, const SwsUOp *b);
243 
244 static inline int ff_sws_uop_cmp_v(const void *a, const void *b)
245 {
246  return ff_sws_uop_cmp(a, b);
247 }
248 
249 /**
250  * Generate a unique name for a SwsUOp.
251  */
252 #define SWS_UOP_NAME_MAX 64
253 void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX]);
254 
255 typedef struct SwsUOpList {
257  int num_ops;
258 } SwsUOpList;
259 
261 void ff_sws_uop_list_free(SwsUOpList **ops);
262 
263 /* Takes over ownership of `uop` and sets it to {0}, even on failure. */
264 int ff_sws_uop_list_append(SwsUOpList *uops, SwsUOp *uop);
265 
266 /**
267  * Translate a list of operations down to micro-ops, which can be further
268  * optimized and then directly executed by backends.
269  *
270  * Return 0 or a negative error code.
271  */
273  SwsUOpFlags flags, SwsUOpList *uops);
274 
275 #endif
flags
const SwsFlags flags[]
Definition: swscale.c:85
SWS_UOP_SCALE
@ SWS_UOP_SCALE
Definition: uops.h:135
SwsUOpParams::move
SwsMoveUOp move
Definition: uops.h:214
ff_sws_uop_cmp_v
static int ff_sws_uop_cmp_v(const void *a, const void *b)
Definition: uops.h:244
SWS_UOP_RSHIFT
@ SWS_UOP_RSHIFT
Definition: uops.h:144
SWS_PIXEL_NONE
@ SWS_PIXEL_NONE
Definition: uops.h:39
SwsClearUOp::zero
SwsCompMask zero
Definition: uops.h:182
SwsUOp::data
union SwsUOp::@589 data
SwsSwizzleUOp
Definition: uops.h:162
mask
int mask
Definition: mediacodecdec_common.c:154
ff_sws_uop_cmp
int ff_sws_uop_cmp(const SwsUOp *a, const SwsUOp *b)
Compare two SwsUOps for equality (excluding constant data).
Definition: uops.c:31
SwsFilterWeights
Represents a computed filter kernel.
Definition: filters.h:85
ff_sws_uop_list_free
void ff_sws_uop_list_free(SwsUOpList **ops)
Definition: uops.c:190
b
#define b
Definition: input.c:43
SWS_UOP_MOVE_MAX
#define SWS_UOP_MOVE_MAX
Definition: uops.h:168
SWS_UOP_LINEAR_FMA
@ SWS_UOP_LINEAR_FMA
Definition: uops.h:147
SWS_UOP_MAX
@ SWS_UOP_MAX
Definition: uops.h:138
ff_sws_pixel_type_name
const char * ff_sws_pixel_type_name(SwsPixelType type)
Definition: ops.c:62
SwsUOpParams::swizzle
SwsSwizzleUOp swizzle
Definition: uops.h:213
SWS_UOP_LSHIFT
@ SWS_UOP_LSHIFT
Definition: uops.h:143
SwsLinearUOp::one
uint32_t one
Definition: uops.h:186
SwsFilterUOp
Definition: uops.h:154
SWS_UOP_TYPE_NB
@ SWS_UOP_TYPE_NB
Definition: uops.h:151
SWS_COMP_ALL
@ SWS_COMP_ALL
Definition: uops.h:69
SWS_UOP_NAME_MAX
#define SWS_UOP_NAME_MAX
Generate a unique name for a SwsUOp.
Definition: uops.h:252
SwsMoveUOp::num_moves
int num_moves
Definition: uops.h:169
SwsMoveUOp
Definition: uops.h:166
SWS_COMP_TEST
#define SWS_COMP_TEST(mask, X)
Definition: uops.h:71
SWS_UOP_TO_U16
@ SWS_UOP_TO_U16
Definition: uops.h:130
SWS_UOP_PACK
@ SWS_UOP_PACK
Definition: uops.h:142
ff_sws_uop_list_alloc
SwsUOpList * ff_sws_uop_list_alloc(void)
Definition: uops.c:204
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type) av_const
Definition: ops.c:77
SwsShiftUOp::amount
uint8_t amount
Definition: uops.h:159
SWS_UOP_PERMUTE
@ SWS_UOP_PERMUTE
Definition: uops.h:120
SwsUOpParams::pack
SwsPackUOp pack
Definition: uops.h:215
SWS_UOP_EXPAND_BIT
@ SWS_UOP_EXPAND_BIT
Definition: uops.h:126
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
SwsUOpParams
Definition: uops.h:210
SwsFilterUOp::type
SwsPixelType type
Definition: uops.h:155
SWS_UOP_COPY
@ SWS_UOP_COPY
Definition: uops.h:121
SWS_UOP_INVALID
@ SWS_UOP_INVALID
Definition: uops.h:102
ff_sws_pixel_type_is_int
bool ff_sws_pixel_type_is_int(SwsPixelType type) av_const
Definition: ops.c:92
SWS_UOP_WRITE_NIBBLE
@ SWS_UOP_WRITE_NIBBLE
Definition: uops.h:116
SWS_UOP_READ_PALETTE
@ SWS_UOP_READ_PALETTE
Definition: uops.h:112
av_const
#define av_const
Definition: attributes.h:113
SwsUOp::kernel
SwsFilterWeights * kernel
Definition: uops.h:230
SWS_UOP_MOVE
@ SWS_UOP_MOVE
Definition: uops.h:122
SwsPackUOp
Definition: uops.h:176
dither
static const uint16_t dither[8][8]
Definition: vf_gradfun.c:46
SwsUOp::uop
SwsUOpType uop
Definition: uops.h:224
SWS_UOP_WRITE_PLANAR
@ SWS_UOP_WRITE_PLANAR
Definition: uops.h:114
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SWS_UOP_TO_F32
@ SWS_UOP_TO_F32
Definition: uops.h:132
SWS_UOP_MIN
@ SWS_UOP_MIN
Definition: uops.h:137
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
SwsCompMask
uint8_t SwsCompMask
Bit-mask of components.
Definition: uops.h:61
SWS_UOP_READ_PACKED
@ SWS_UOP_READ_PACKED
Definition: uops.h:109
SwsPixel::f32
float f32
Definition: uops.h:57
SwsPixel::u8
uint8_t u8
Definition: uops.h:54
SwsUOp::mat4
SwsPixel mat4[4][5]
Definition: uops.h:234
SWS_PIXEL_TYPE_NB
@ SWS_PIXEL_TYPE_NB
Definition: uops.h:44
SwsUOpParams::shift
SwsShiftUOp shift
Definition: uops.h:212
SwsLinearUOp
Definition: uops.h:185
SwsMoveUOp::dst
int8_t dst[SWS_UOP_MOVE_MAX]
Definition: uops.h:172
SwsClearUOp::one
SwsCompMask one
Definition: uops.h:181
SWS_UOP_FLAG_MOVE
@ SWS_UOP_FLAG_MOVE
Definition: uops.h:98
SwsClearUOp
Definition: uops.h:180
SWS_UOP_READ_NIBBLE
@ SWS_UOP_READ_NIBBLE
Definition: uops.h:110
attributes.h
SWS_UOP_ADD
@ SWS_UOP_ADD
Definition: uops.h:136
SwsShiftUOp
Definition: uops.h:158
SwsPixelType
SwsPixelType
Definition: uops.h:38
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
SwsUOp::par
SwsUOpParams par
Definition: uops.h:226
SWS_COMP_NONE
@ SWS_COMP_NONE
Definition: uops.h:68
SWS_UOP_TO_U32
@ SWS_UOP_TO_U32
Definition: uops.h:131
SwsPixel::u16
uint16_t u16
Definition: uops.h:55
SwsUOp
Definition: uops.h:221
SWS_UOP_WRITE_BIT
@ SWS_UOP_WRITE_BIT
Definition: uops.h:117
SwsUOp::opaque
void * opaque
Definition: uops.h:235
SWS_UOP_READ_PLANAR_FV_FMA
@ SWS_UOP_READ_PLANAR_FV_FMA
Definition: uops.h:108
SwsLinearUOp::zero
uint32_t zero
Definition: uops.h:187
SwsUOp::mask
SwsCompMask mask
Definition: uops.h:225
SwsDitherUOp::size_log2
uint8_t size_log2
Definition: uops.h:201
SWS_UOP_UNPACK
@ SWS_UOP_UNPACK
Definition: uops.h:141
SWS_PIXEL_U32
@ SWS_PIXEL_U32
Definition: uops.h:42
SWS_UOP_FLAG_NONE
@ SWS_UOP_FLAG_NONE
Definition: uops.h:96
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
SwsPixel
Definition: uops.h:51
SwsUOpFlagBits
SwsUOpFlagBits
Definition: uops.h:95
SWS_UOP_TO_U8
@ SWS_UOP_TO_U8
Definition: uops.h:129
SWS_UOP_READ_PLANAR
@ SWS_UOP_READ_PLANAR
Definition: uops.h:105
SWS_PIXEL_U8
@ SWS_PIXEL_U8
Definition: uops.h:40
SwsUOpType
SwsUOpType
Definition: uops.h:101
SWS_UOP_SWAP_BYTES
@ SWS_UOP_SWAP_BYTES
Definition: uops.h:125
SwsUOp::scalar
SwsPixel scalar
Definition: uops.h:232
SWS_UOP_LINEAR
@ SWS_UOP_LINEAR
Definition: uops.h:146
SwsUOpParams::lin
SwsLinearUOp lin
Definition: uops.h:217
SwsPackUOp::pattern
uint8_t pattern[4]
Definition: uops.h:177
SwsUOp::type
SwsPixelType type
Definition: uops.h:223
SwsUOpList::num_ops
int num_ops
Definition: uops.h:257
ff_sws_uop_name
void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX])
Definition: uops.c:81
SwsUOp::ptr
SwsPixel * ptr
Definition: uops.h:231
ff_sws_uop_list_append
int ff_sws_uop_list_append(SwsUOpList *uops, SwsUOp *uop)
Definition: uops.c:209
ff_sws_ops_translate
int ff_sws_ops_translate(SwsContext *ctx, const SwsOpList *ops, SwsUOpFlags flags, SwsUOpList *uops)
Translate a list of operations down to micro-ops, which can be further optimized and then directly ex...
Definition: uops.c:685
ff_sws_comp_mask_print
static char * ff_sws_comp_mask_print(SwsCompMask mask, char buf[5])
Definition: uops.h:83
SwsLinearUOp::exact
uint32_t exact
Definition: uops.h:190
SwsDitherUOp::y_offset
uint8_t y_offset[4]
Definition: uops.h:200
SwsUOpList
Definition: uops.h:255
SwsUOp::vec4
SwsPixel vec4[4]
Definition: uops.h:233
SWS_UOP_DITHER
@ SWS_UOP_DITHER
Definition: uops.h:148
SWS_UOP_WRITE_PACKED
@ SWS_UOP_WRITE_PACKED
Definition: uops.h:115
SwsDitherUOp
Definition: uops.h:199
SwsUOpParams::dither
SwsDitherUOp dither
Definition: uops.h:218
SWS_PIXEL_F32
@ SWS_PIXEL_F32
Definition: uops.h:43
SWS_UOP_READ_PLANAR_FV
@ SWS_UOP_READ_PLANAR_FV
Definition: uops.h:107
SWS_UOP_EXPAND_QUAD
@ SWS_UOP_EXPAND_QUAD
Definition: uops.h:128
SwsUOpFlags
uint32_t SwsUOpFlags
Definition: uops.h:94
SWS_UOP_READ_PLANAR_FH
@ SWS_UOP_READ_PLANAR_FH
Definition: uops.h:106
SwsMoveUOp::src
int8_t src[SWS_UOP_MOVE_MAX]
Definition: uops.h:173
SwsUOpParams::filter
SwsFilterUOp filter
Definition: uops.h:211
SWS_UOP_FLAG_FMA
@ SWS_UOP_FLAG_FMA
Definition: uops.h:97
SWS_UOP_READ_BIT
@ SWS_UOP_READ_BIT
Definition: uops.h:111
SWS_UOP_CLEAR
@ SWS_UOP_CLEAR
Definition: uops.h:145
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:265
SwsContext
Main external API structure.
Definition: swscale.h:229
SWS_PIXEL_U16
@ SWS_PIXEL_U16
Definition: uops.h:41
SwsPixel::u32
uint32_t u32
Definition: uops.h:56
SwsSwizzleUOp::in
uint8_t in[4]
Definition: uops.h:163
SwsUOpParams::clear
SwsClearUOp clear
Definition: uops.h:216
SwsPixel::data
char data[4]
Definition: uops.h:52
SwsUOpList::ops
SwsUOp * ops
Definition: uops.h:256
ff_sws_dither_height
int ff_sws_dither_height(const SwsDitherUOp *dither)
Computes (1 << size_log2) + MAX(y_offset).
Definition: uops.c:222
SWS_UOP_EXPAND_PAIR
@ SWS_UOP_EXPAND_PAIR
Definition: uops.h:127