FFmpeg
ops_chain.h
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef SWSCALE_OPS_CHAIN_H
22 #define SWSCALE_OPS_CHAIN_H
23 
24 #include "libavutil/cpu.h"
25 
26 #include "ops_internal.h"
27 
28 /**
29  * Helpers for SIMD implementations based on chained kernels, using a
30  * continuation passing style to link them together.
31  *
32  * The basic idea here is to "link" together a series of different operation
33  * kernels by constructing a list of kernel addresses into an SwsOpChain. Each
34  * kernel will load the address of the next kernel (the "continuation") from
35  * this struct, and jump directly into it; using an internal function signature
36  * that is an implementation detail of the specific backend.
37  */
38 
39 /**
40  * Private data for each kernel.
41  */
42 typedef union SwsOpPriv {
43  DECLARE_ALIGNED_16(char, data)[16];
44 
45  /* Common types */
46  void *ptr;
47  uint8_t u8[16];
48  uint16_t u16[8];
49  uint32_t u32[4];
50  float f32[4];
51 } SwsOpPriv;
52 
53 static_assert(sizeof(SwsOpPriv) == 16, "SwsOpPriv size mismatch");
54 
55 /* Setup helpers */
56 int ff_sws_setup_u(const SwsOp *op, SwsOpPriv *out);
57 int ff_sws_setup_u8(const SwsOp *op, SwsOpPriv *out);
58 int ff_sws_setup_q(const SwsOp *op, SwsOpPriv *out);
59 int ff_sws_setup_q4(const SwsOp *op, SwsOpPriv *out);
60 
61 /**
62  * Per-kernel execution context.
63  *
64  * Note: This struct is hard-coded in assembly, so do not change the layout.
65  */
66 typedef void (*SwsFuncPtr)(void);
67 typedef struct SwsOpImpl {
68  SwsFuncPtr cont; /* [offset = 0] Continuation for this operation. */
69  SwsOpPriv priv; /* [offset = 16] Private data for this operation. */
70 } SwsOpImpl;
71 
72 static_assert(sizeof(SwsOpImpl) == 32, "SwsOpImpl layout mismatch");
73 static_assert(offsetof(SwsOpImpl, priv) == 16, "SwsOpImpl layout mismatch");
74 
75 /**
76  * Compiled "chain" of operations, which can be dispatched efficiently.
77  * Effectively just a list of function pointers, alongside a small amount of
78  * private data for each operation.
79  */
80 typedef struct SwsOpChain {
81 #define SWS_MAX_OPS 16
82  SwsOpImpl impl[SWS_MAX_OPS + 1]; /* reserve extra space for the entrypoint */
83  void (*free[SWS_MAX_OPS + 1])(void *);
84  int num_impl;
85  int cpu_flags; /* set of all used CPU flags */
86 } SwsOpChain;
87 
89 void ff_sws_op_chain_free(SwsOpChain *chain);
90 
91 /* Returns 0 on success, or a negative error code. */
93  void (*free)(void *), const SwsOpPriv *priv);
94 
95 typedef struct SwsOpEntry {
96  /* Kernel metadata; reduced size subset of SwsOp */
99  bool flexible; /* if true, only the type and op are matched */
100  bool unused[4]; /* for kernels which operate on a subset of components */
101 
102  union { /* extra data defining the operation, unless `flexible` is true */
107  uint32_t linear_mask; /* subset of SwsLinearOp */
108  int dither_size; /* subset of SwsDitherOp */
109  int clear_value; /* clear value for integer clears */
110  };
111 
112  /* Kernel implementation */
114  int (*setup)(const SwsOp *op, SwsOpPriv *out); /* optional */
115  void (*free)(void *priv);
116 } SwsOpEntry;
117 
118 typedef struct SwsOpTable {
119  unsigned cpu_flags; /* required CPU flags for this table */
120  int block_size; /* fixed block size of this table */
121  const SwsOpEntry *entries[]; /* terminated by NULL */
122 } SwsOpTable;
123 
124 /**
125  * "Compile" a single op by looking it up in a list of fixed size op tables.
126  * See `op_match` in `ops.c` for details on how the matching works.
127  *
128  * Returns 0, AVERROR(EAGAIN), or a negative error code.
129  */
130 int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables,
131  SwsOpList *ops, const int block_size,
132  SwsOpChain *chain);
133 
134 #endif
SwsOpTable
Definition: ops_chain.h:118
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:68
SWS_MAX_OPS
#define SWS_MAX_OPS
Definition: ops_chain.h:81
out
FILE * out
Definition: movenc.c:55
SwsOpImpl::cont
SwsFuncPtr cont
Definition: ops_chain.h:68
ff_sws_op_chain_alloc
SwsOpChain * ff_sws_op_chain_alloc(void)
Definition: ops_chain.c:29
data
const char data[16]
Definition: mxf.c:149
ff_sws_op_chain_free
void ff_sws_op_chain_free(SwsOpChain *chain)
Definition: ops_chain.c:34
SwsOpEntry::type
SwsPixelType type
Definition: ops_chain.h:98
SwsOpEntry::op
SwsOpType op
Definition: ops_chain.h:97
SwsOpChain::cpu_flags
int cpu_flags
Definition: ops_chain.h:85
SwsOpEntry::setup
int(* setup)(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.h:114
SwsPixelType
SwsPixelType
Copyright (C) 2025 Niklas Haas.
Definition: ops.h:30
SwsOpTable::block_size
int block_size
Definition: ops_chain.h:120
SwsOpPriv::u32
uint32_t u32[4]
Definition: ops_chain.h:49
SwsFuncPtr
void(* SwsFuncPtr)(void)
Per-kernel execution context.
Definition: ops_chain.h:66
ff_sws_setup_u8
int ff_sws_setup_u8(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.c:248
SwsOpPriv::DECLARE_ALIGNED_16
DECLARE_ALIGNED_16(char, data)[16]
tables
Writing a table generator This documentation is preliminary Parts of the API are not good and should be changed Basic concepts A table generator consists of two *_tablegen c and *_tablegen h The h file will provide the variable declarations and initialization code for the tables
Definition: tablegen.txt:10
SwsReadWriteOp
Definition: ops.h:96
SwsSwizzleOp
Definition: ops.h:114
ff_sws_setup_q
int ff_sws_setup_q(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.c:265
SwsOpEntry::swizzle
SwsSwizzleOp swizzle
Definition: ops_chain.h:105
SwsOpEntry::free
void(* free)(void *priv)
Definition: ops_chain.h:115
SwsOpEntry::convert
SwsConvertOp convert
Definition: ops_chain.h:106
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SwsOpImpl
Definition: ops_chain.h:67
SwsOpChain::impl
SwsOpImpl impl[SWS_MAX_OPS+1]
Definition: ops_chain.h:82
SwsOpTable::entries
const SwsOpEntry * entries[]
Definition: ops_chain.h:121
SwsOpPriv::f32
float f32[4]
Definition: ops_chain.h:50
SwsOpEntry::dither_size
int dither_size
Definition: ops_chain.h:108
SwsOpPriv::ptr
void * ptr
Definition: ops_chain.h:46
SwsOpChain
Compiled "chain" of operations, which can be dispatched efficiently.
Definition: ops_chain.h:80
SwsOpEntry::flexible
bool flexible
Definition: ops_chain.h:99
SwsOpEntry::clear_value
int clear_value
Definition: ops_chain.h:109
SwsOpType
SwsOpType
Definition: ops.h:44
SwsOpEntry::func
SwsFuncPtr func
Definition: ops_chain.h:113
cpu.h
SwsOpPriv::u8
uint8_t u8[16]
Definition: ops_chain.h:47
ff_sws_op_compile_tables
int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables, SwsOpList *ops, const int block_size, SwsOpChain *chain)
"Compile" a single op by looking it up in a list of fixed size op tables.
Definition: ops_chain.c:195
SwsOpChain::num_impl
int num_impl
Definition: ops_chain.h:84
SwsOpEntry
Definition: ops_chain.h:95
SwsOpPriv::u16
uint16_t u16[8]
Definition: ops_chain.h:48
SwsOpChain::free
void(* free[SWS_MAX_OPS+1])(void *)
Definition: ops_chain.h:83
SwsOpTable::cpu_flags
unsigned cpu_flags
Definition: ops_chain.h:119
SwsPackOp
Definition: ops.h:110
ops_internal.h
ff_sws_setup_u
int ff_sws_setup_u(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.c:254
SwsOp
Definition: ops.h:179
ff_sws_op_chain_append
int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, void(*free)(void *), const SwsOpPriv *priv)
Definition: ops_chain.c:47
SwsOpEntry::rw
SwsReadWriteOp rw
Definition: ops_chain.h:103
SwsOpEntry::unused
bool unused[4]
Definition: ops_chain.h:100
SwsOpImpl::priv
SwsOpPriv priv
Definition: ops_chain.h:69
SwsConvertOp
Definition: ops.h:128
ff_sws_setup_q4
int ff_sws_setup_q4(const SwsOp *op, SwsOpPriv *out)
Definition: ops_chain.c:278
SwsOpEntry::pack
SwsPackOp pack
Definition: ops_chain.h:104
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:209
SwsOpPriv
Copyright (C) 2025 Niklas Haas.
Definition: ops_chain.h:42
SwsOpEntry::linear_mask
uint32_t linear_mask
Definition: ops_chain.h:107