FFmpeg
ops_impl.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2026 Ramiro Polla
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef SWSCALE_AARCH64_OPS_IMPL_H
22 #define SWSCALE_AARCH64_OPS_IMPL_H
23 
24 #include <assert.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 
28 /* Similar to SwsPixelType */
29 typedef enum SwsAArch64PixelType {
36 
37 /* Similar to SwsOpType */
38 typedef enum SwsAArch64OpType {
66 
67 /* Each nibble in the mask corresponds to one component. */
68 typedef uint16_t SwsAArch64OpMask;
69 
70 /**
71  * Affine coefficient mask for linear op. Packs a 4x5 matrix in execution
72  * order, where the offset is the first element, with 2 bits per element:
73  * 00: m[i][j] == 0
74  * 01: m[i][j] == 1
75  * 11: m[i][j] is any other coefficient
76  */
77 typedef uint64_t SwsAArch64LinearOpMask;
78 
79 typedef struct SwsAArch64LinearOp {
81  uint8_t fmla;
83 
84 typedef struct SwsAArch64DitherOp {
85  uint16_t y_offset;
86  uint8_t size_log2;
88 
89 /**
90  * SwsAArch64OpImplParams describes the parameters for an SwsAArch64OpType
91  * operation. It consists of simplified parameters from the SwsOp structure,
92  * with the purpose of being straight-forward to implement and execute.
93  */
94 typedef struct SwsAArch64OpImplParams {
98  uint8_t block_size;
99  union {
100  uint8_t shift;
106  };
108 
109 /* SwsAArch64OpMask-related helpers. */
110 
111 #define MASK_GET(mask, idx) (((mask) >> ((idx) << 2)) & 0xf)
112 #define MASK_SET(mask, idx, val) do { (mask) |= (((val) & 0xf) << ((idx) << 2)); } while (0)
113 
114 #define LOOP(mask, idx) \
115  for (int idx = 0; idx < 4; idx++) \
116  if (MASK_GET(mask, idx))
117 #define LOOP_BWD(mask, idx) \
118  for (int idx = 3; idx >= 0; idx--) \
119  if (MASK_GET(mask, idx))
120 
121 #define LOOP_MASK(p, idx) LOOP(p->mask, idx)
122 #define LOOP_MASK_BWD(p, idx) LOOP_BWD(p->mask, idx)
123 
124 #define LINEAR_MASK_GET(mask, idx, jdx) (((mask) >> (2 * ((5 * (idx) + (jdx))))) & 3)
125 #define LINEAR_MASK_SET(mask, idx, jdx, val) do { \
126  (mask) |= ((((SwsAArch64LinearOpMask) (val)) & 3) << (2 * ((5 * (idx) + (jdx))))); \
127 } while (0)
128 #define LINEAR_MASK_0 0
129 #define LINEAR_MASK_1 1
130 #define LINEAR_MASK_X 3
131 
132 #define LOOP_LINEAR_MASK(p, idx, jdx) \
133  LOOP_MASK(p, idx) \
134  for (int jdx = 0; jdx < 5; jdx++) \
135  if (LINEAR_MASK_GET(p->linear.mask, idx, jdx))
136 
137 /* Compute number of vector registers needed to store all coefficients. */
138 static inline int linear_num_vregs(const SwsAArch64OpImplParams *params)
139 {
140  int count = 0;
141  LOOP_LINEAR_MASK(params, i, j)
142  count++;
143  return (count + 3) / 4;
144 }
145 
146 static inline int linear_index_to_sws_op(int idx)
147 {
148  const int reorder_col[5] = { 4, 0, 1, 2, 3 };
149  return reorder_col[idx];
150 }
151 
152 static inline int linear_index_is_offset(int idx)
153 {
154  return (idx == 0);
155 }
156 
157 static inline int linear_index_to_vx(int idx)
158 {
159  /* The offset shouldn't map to any vx, but to please UBSan we map
160  * it to 0. */
161  if (linear_index_is_offset(idx))
162  return 0;
163  return (idx - 1);
164 }
165 
166 /**
167  * These values will be used by ops_asmgen to access fields inside of
168  * SwsOpExec and SwsOpImpl. The sizes are checked below when compiling
169  * for AArch64 to make sure there is no mismatch.
170  */
171 #define offsetof_exec_in 0
172 #define offsetof_exec_out 32
173 #define offsetof_exec_in_bump 128
174 #define offsetof_exec_out_bump 160
175 #define offsetof_impl_cont 0
176 #define offsetof_impl_priv 16
177 #define sizeof_impl 32
178 
179 #if ARCH_AARCH64 && HAVE_NEON
180 static_assert(offsetof_exec_in == offsetof(SwsOpExec, in), "SwsOpExec layout mismatch");
181 static_assert(offsetof_exec_out == offsetof(SwsOpExec, out), "SwsOpExec layout mismatch");
182 static_assert(offsetof_exec_in_bump == offsetof(SwsOpExec, in_bump), "SwsOpExec layout mismatch");
183 static_assert(offsetof_exec_out_bump == offsetof(SwsOpExec, out_bump), "SwsOpExec layout mismatch");
184 static_assert(offsetof_impl_cont == offsetof(SwsOpImpl, cont), "SwsOpImpl layout mismatch");
185 static_assert(offsetof_impl_priv == offsetof(SwsOpImpl, priv), "SwsOpImpl layout mismatch");
186 #endif
187 
188 #endif /* SWSCALE_AARCH64_OPS_IMPL_H */
AARCH64_SWS_OP_MIN
@ AARCH64_SWS_OP_MIN
Definition: ops_impl.h:59
linear_index_to_vx
static int linear_index_to_vx(int idx)
Definition: ops_impl.h:157
linear_index_to_sws_op
static int linear_index_to_sws_op(int idx)
Definition: ops_impl.h:146
out
static FILE * out
Definition: movenc.c:55
AARCH64_SWS_OP_SWIZZLE
@ AARCH64_SWS_OP_SWIZZLE
Definition: ops_impl.h:51
AARCH64_PIXEL_TYPE_NB
@ AARCH64_PIXEL_TYPE_NB
Definition: ops_impl.h:34
AARCH64_SWS_OP_CLEAR
@ AARCH64_SWS_OP_CLEAR
Definition: ops_impl.h:56
AARCH64_SWS_OP_NONE
@ AARCH64_SWS_OP_NONE
Definition: ops_impl.h:39
SwsAArch64OpImplParams::swizzle
SwsAArch64OpMask swizzle
Definition: ops_impl.h:101
AARCH64_SWS_OP_READ_NIBBLE
@ AARCH64_SWS_OP_READ_NIBBLE
Definition: ops_impl.h:43
AARCH64_SWS_OP_PACK
@ AARCH64_SWS_OP_PACK
Definition: ops_impl.h:53
AARCH64_SWS_OP_SWAP_BYTES
@ AARCH64_SWS_OP_SWAP_BYTES
Definition: ops_impl.h:50
SwsAArch64LinearOp::fmla
uint8_t fmla
Definition: ops_impl.h:81
AARCH64_SWS_OP_READ_BIT
@ AARCH64_SWS_OP_READ_BIT
Definition: ops_impl.h:42
AARCH64_SWS_OP_MAX
@ AARCH64_SWS_OP_MAX
Definition: ops_impl.h:60
SwsAArch64OpImplParams::to_type
SwsAArch64PixelType to_type
Definition: ops_impl.h:103
SwsAArch64LinearOpMask
uint64_t SwsAArch64LinearOpMask
Affine coefficient mask for linear op.
Definition: ops_impl.h:77
AARCH64_SWS_OP_WRITE_NIBBLE
@ AARCH64_SWS_OP_WRITE_NIBBLE
Definition: ops_impl.h:47
SwsAArch64OpMask
uint16_t SwsAArch64OpMask
Definition: ops_impl.h:68
AARCH64_SWS_OP_DITHER
@ AARCH64_SWS_OP_DITHER
Definition: ops_impl.h:63
AARCH64_SWS_OP_RSHIFT
@ AARCH64_SWS_OP_RSHIFT
Definition: ops_impl.h:55
AARCH64_SWS_OP_LINEAR
@ AARCH64_SWS_OP_LINEAR
Definition: ops_impl.h:62
SwsAArch64OpImplParams::shift
uint8_t shift
Definition: ops_impl.h:100
offsetof_impl_cont
#define offsetof_impl_cont
Definition: ops_impl.h:175
SwsOpImpl
Definition: ops_chain.h:71
AARCH64_SWS_OP_CONVERT
@ AARCH64_SWS_OP_CONVERT
Definition: ops_impl.h:57
AARCH64_SWS_OP_PROCESS
@ AARCH64_SWS_OP_PROCESS
Definition: ops_impl.h:40
AARCH64_PIXEL_F32
@ AARCH64_PIXEL_F32
Definition: ops_impl.h:33
SwsOpExec
Copyright (C) 2026 Niklas Haas.
Definition: ops_dispatch.h:35
AARCH64_SWS_OP_SCALE
@ AARCH64_SWS_OP_SCALE
Definition: ops_impl.h:61
offsetof_exec_out_bump
#define offsetof_exec_out_bump
Definition: ops_impl.h:174
SwsAArch64OpImplParams::dither
SwsAArch64DitherOp dither
Definition: ops_impl.h:105
AARCH64_SWS_OP_READ_PACKED
@ AARCH64_SWS_OP_READ_PACKED
Definition: ops_impl.h:44
offsetof_exec_out
#define offsetof_exec_out
Definition: ops_impl.h:172
SwsAArch64OpImplParams::pack
SwsAArch64OpMask pack
Definition: ops_impl.h:102
AARCH64_SWS_OP_WRITE_PLANAR
@ AARCH64_SWS_OP_WRITE_PLANAR
Definition: ops_impl.h:49
SwsAArch64OpImplParams::mask
SwsAArch64OpMask mask
Definition: ops_impl.h:96
AARCH64_SWS_OP_LSHIFT
@ AARCH64_SWS_OP_LSHIFT
Definition: ops_impl.h:54
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
AARCH64_SWS_OP_TYPE_NB
@ AARCH64_SWS_OP_TYPE_NB
Definition: ops_impl.h:64
SwsAArch64OpImplParams::op
SwsAArch64OpType op
Definition: ops_impl.h:95
SwsAArch64DitherOp
Definition: ops_impl.h:84
AARCH64_SWS_OP_WRITE_BIT
@ AARCH64_SWS_OP_WRITE_BIT
Definition: ops_impl.h:46
AARCH64_SWS_OP_READ_PLANAR
@ AARCH64_SWS_OP_READ_PLANAR
Definition: ops_impl.h:45
AARCH64_SWS_OP_EXPAND
@ AARCH64_SWS_OP_EXPAND
Definition: ops_impl.h:58
AARCH64_SWS_OP_UNPACK
@ AARCH64_SWS_OP_UNPACK
Definition: ops_impl.h:52
SwsAArch64OpImplParams::linear
SwsAArch64LinearOp linear
Definition: ops_impl.h:104
SwsAArch64DitherOp::size_log2
uint8_t size_log2
Definition: ops_impl.h:86
offsetof_impl_priv
#define offsetof_impl_priv
Definition: ops_impl.h:176
offsetof_exec_in
#define offsetof_exec_in
These values will be used by ops_asmgen to access fields inside of SwsOpExec and SwsOpImpl.
Definition: ops_impl.h:171
AARCH64_PIXEL_U8
@ AARCH64_PIXEL_U8
Definition: ops_impl.h:30
AARCH64_PIXEL_U32
@ AARCH64_PIXEL_U32
Definition: ops_impl.h:32
SwsAArch64OpType
SwsAArch64OpType
Definition: ops_impl.h:38
SwsAArch64LinearOp::mask
SwsAArch64LinearOpMask mask
Definition: ops_impl.h:80
LOOP_LINEAR_MASK
#define LOOP_LINEAR_MASK(p, idx, jdx)
Definition: ops_impl.h:132
AARCH64_SWS_OP_WRITE_PACKED
@ AARCH64_SWS_OP_WRITE_PACKED
Definition: ops_impl.h:48
SwsAArch64OpImplParams::block_size
uint8_t block_size
Definition: ops_impl.h:98
SwsAArch64OpImplParams
SwsAArch64OpImplParams describes the parameters for an SwsAArch64OpType operation.
Definition: ops_impl.h:94
SwsAArch64OpImplParams::type
SwsAArch64PixelType type
Definition: ops_impl.h:97
AARCH64_SWS_OP_PROCESS_RETURN
@ AARCH64_SWS_OP_PROCESS_RETURN
Definition: ops_impl.h:41
AARCH64_PIXEL_U16
@ AARCH64_PIXEL_U16
Definition: ops_impl.h:31
offsetof_exec_in_bump
#define offsetof_exec_in_bump
Definition: ops_impl.h:173
linear_index_is_offset
static int linear_index_is_offset(int idx)
Definition: ops_impl.h:152
linear_num_vregs
static int linear_num_vregs(const SwsAArch64OpImplParams *params)
Definition: ops_impl.h:138
SwsAArch64PixelType
SwsAArch64PixelType
Definition: ops_impl.h:29
SwsAArch64DitherOp::y_offset
uint16_t y_offset
Definition: ops_impl.h:85
SwsAArch64LinearOp
Definition: ops_impl.h:79