Go to the documentation of this file.
26 #include "../ops_chain.h"
28 #define DECL_ENTRY(TYPE, MASK, NAME, ...) \
29 static const SwsOpEntry op_##NAME = { \
30 .type = SWS_PIXEL_##TYPE, \
35 #define DECL_ASM(TYPE, MASK, NAME, ...) \
36 void ff_##NAME(void); \
37 DECL_ENTRY(TYPE, MASK, NAME, \
41 #define DECL_PATTERN(TYPE, NAME, X, Y, Z, W, ...) \
42 DECL_ASM(TYPE, SWS_COMP_MASK(X, Y, Z, W), p##X##Y##Z##W##_##NAME, \
46 #define REF_PATTERN(NAME, X, Y, Z, W) \
47 &op_p##X##Y##Z##W##_##NAME
49 #define DECL_COMMON_PATTERNS(TYPE, NAME, ...) \
50 DECL_PATTERN(TYPE, NAME, 1, 0, 0, 0, __VA_ARGS__); \
51 DECL_PATTERN(TYPE, NAME, 1, 0, 0, 1, __VA_ARGS__); \
52 DECL_PATTERN(TYPE, NAME, 1, 1, 1, 0, __VA_ARGS__); \
53 DECL_PATTERN(TYPE, NAME, 1, 1, 1, 1, __VA_ARGS__) \
55 #define REF_COMMON_PATTERNS(NAME) \
56 REF_PATTERN(NAME, 1, 0, 0, 0), \
57 REF_PATTERN(NAME, 1, 0, 0, 1), \
58 REF_PATTERN(NAME, 1, 1, 1, 0), \
59 REF_PATTERN(NAME, 1, 1, 1, 1)
66 if (
op->rw.packed &&
op->rw.elems == 3) {
76 #define DECL_RW(EXT, TYPE, NAME, OP, ELEMS, PACKED, FRAC) \
77 DECL_ASM(TYPE, SWS_COMP_ELEMS(ELEMS), NAME##ELEMS##EXT, \
79 .rw = { .elems = ELEMS, .packed = PACKED, .frac = FRAC }, \
83 #define DECL_PACKED_RW(EXT, DEPTH) \
84 DECL_RW(EXT, U##DEPTH, read##DEPTH##_packed, READ, 2, true, 0) \
85 DECL_RW(EXT, U##DEPTH, read##DEPTH##_packed, READ, 3, true, 0) \
86 DECL_RW(EXT, U##DEPTH, read##DEPTH##_packed, READ, 4, true, 0) \
87 DECL_RW(EXT, U##DEPTH, write##DEPTH##_packed, WRITE, 2, true, 0) \
88 DECL_RW(EXT, U##DEPTH, write##DEPTH##_packed, WRITE, 3, true, 0) \
89 DECL_RW(EXT, U##DEPTH, write##DEPTH##_packed, WRITE, 4, true, 0) \
91 #define DECL_PACK_UNPACK(EXT, TYPE, X, Y, Z, W) \
92 DECL_ASM(TYPE, SWS_COMP(0), pack_##X##Y##Z##W##EXT, \
94 .pack.pattern = {X, Y, Z, W}, \
97 DECL_ASM(TYPE, SWS_COMP_MASK(X, Y, Z, W), unpack_##X##Y##Z##W##EXT, \
98 .op = SWS_OP_UNPACK, \
99 .pack.pattern = {X, Y, Z, W}, \
105 for (
int i = 0;
i < 16;
i++)
110 #define DECL_SWAP_BYTES(EXT, TYPE, X, Y, Z, W) \
111 DECL_ENTRY(TYPE, SWS_COMP_MASK(X, Y, Z, W), \
112 p##X##Y##Z##W##_swap_bytes_##TYPE##EXT, \
113 .op = SWS_OP_SWAP_BYTES, \
114 .func = ff_p##X##Y##Z##W##_shuffle##EXT, \
115 .setup = setup_swap_bytes, \
118 #define DECL_CLEAR_ALPHA(EXT, IDX) \
119 DECL_ASM(U8, SWS_COMP_ALL, clear_alpha##IDX##EXT, \
120 .op = SWS_OP_CLEAR, \
121 .clear.mask = SWS_COMP(IDX), \
122 .clear.value[IDX] = { -1, 1 }, \
125 #define DECL_CLEAR_ZERO(EXT, IDX) \
126 DECL_ASM(U8, SWS_COMP_ALL, clear_zero##IDX##EXT, \
127 .op = SWS_OP_CLEAR, \
128 .clear.mask = SWS_COMP(IDX), \
129 .clear.value[IDX] = { 0, 1 }, \
135 for (
int i = 0;
i < 4;
i++)
136 out->priv.u32[
i] = (uint32_t)
op->clear.value[
i].num;
140 #define DECL_CLEAR(EXT, X, Y, Z, W) \
141 DECL_ASM(U8, SWS_COMP_ALL, p##X##Y##Z##W##_clear##EXT, \
142 .op = SWS_OP_CLEAR, \
143 .setup = setup_clear, \
144 .clear.mask = SWS_COMP_MASK(X, Y, Z, W), \
147 #define DECL_SWIZZLE(EXT, X, Y, Z, W) \
148 DECL_ASM(U8, SWS_COMP_ALL, swizzle_##X##Y##Z##W##EXT, \
149 .op = SWS_OP_SWIZZLE, \
150 .swizzle.in = {X, Y, Z, W}, \
153 #define DECL_CONVERT(EXT, FROM, TO) \
154 DECL_COMMON_PATTERNS(FROM, convert_##FROM##_##TO##EXT, \
155 .op = SWS_OP_CONVERT, \
156 .convert.to = SWS_PIXEL_##TO, \
159 #define DECL_EXPAND(EXT, FROM, TO) \
160 DECL_COMMON_PATTERNS(FROM, expand_##FROM##_##TO##EXT, \
161 .op = SWS_OP_CONVERT, \
162 .convert.to = SWS_PIXEL_##TO, \
163 .convert.expand = true, \
172 #define DECL_SHIFT16(EXT) \
173 DECL_COMMON_PATTERNS(U16, lshift16##EXT, \
174 .op = SWS_OP_LSHIFT, \
175 .setup = setup_shift, \
179 DECL_COMMON_PATTERNS(U16, rshift16##EXT, \
180 .op = SWS_OP_RSHIFT, \
181 .setup = setup_shift, \
185 #define DECL_MIN_MAX(EXT) \
186 DECL_COMMON_PATTERNS(F32, min##EXT, \
188 .setup = ff_sws_setup_clamp, \
192 DECL_COMMON_PATTERNS(F32, max##EXT, \
194 .setup = ff_sws_setup_clamp, \
198 #define DECL_SCALE(EXT) \
199 DECL_COMMON_PATTERNS(F32, scale##EXT, \
200 .op = SWS_OP_SCALE, \
201 .setup = ff_sws_setup_scale, \
205 #define DECL_EXPAND_BITS(EXT, BITS) \
206 DECL_ASM(U##BITS, SWS_COMP(0), expand_bits##BITS##EXT, \
207 .op = SWS_OP_SCALE, \
208 .scale = { .num = ((1 << (BITS)) - 1), .den = 1 }, \
215 if (!
op->dither.size_log2) {
221 const int size = 1 <<
op->dither.size_log2;
222 const int8_t *off =
op->dither.y_offset;
224 for (
int i = 0;
i < 4;
i++) {
226 max_offset =
FFMAX(max_offset, off[
i] & (
size - 1));
234 const int num_rows =
size + max_offset;
241 matrix[
i] = (
float)
op->dither.matrix[
i].num /
op->dither.matrix[
i].den;
246 static_assert(
sizeof(
out->priv.ptr) <=
sizeof(int16_t[4]),
247 ">8 byte pointers not supported");
248 assert(max_offset *
stride <= INT16_MAX);
249 int16_t *off_out = &
out->priv.i16[4];
250 for (
int i = 0;
i < 4;
i++)
251 off_out[
i] = off[
i] >= 0 ? (off[
i] & (
size - 1)) *
stride : -1;
256 #define DECL_DITHER0(EXT) \
257 DECL_COMMON_PATTERNS(F32, dither0##EXT, \
258 .op = SWS_OP_DITHER, \
259 .setup = setup_dither, \
262 #define DECL_DITHER(EXT, SIZE) \
263 DECL_ASM(F32, SWS_COMP_ALL, dither##SIZE##EXT, \
264 .op = SWS_OP_DITHER, \
265 .setup = setup_dither, \
266 .dither_size = SIZE, \
278 for (
int y = 0; y < 4; y++) {
279 for (
int x = 0; x < 5; x++)
280 matrix[y * 5 + x] = (
float)
op->lin.m[y][x].num /
op->lin.m[y][x].den;
286 #define DECL_LINEAR(EXT, NAME, MASK) \
287 DECL_ASM(F32, SWS_COMP_ALL, NAME##EXT, \
288 .op = SWS_OP_LINEAR, \
289 .setup = setup_linear, \
290 .linear_mask = (MASK), \
308 for (
int i = 0;
i <
op->rw.elems;
i++) {
321 static_assert(
sizeof(
out->priv.ptr) <=
sizeof(
int32_t[2]),
322 ">8 byte pointers not supported");
329 for (
int i = 0;
i <
filter->num_weights;
i++)
359 const int taps_align =
sizeof(
int32_t) / pixel_size;
360 const int filter_size =
filter->filter_size;
362 const size_t aligned_size =
FFALIGN(filter_size, taps_align);
363 const size_t line_size =
FFALIGN(
filter->dst_size, block_size);
365 if (aligned_size > INT_MAX)
382 const int mmsize = block_size * 2;
383 const int gather_size = mmsize /
sizeof(
int32_t);
384 for (
size_t x = 0; x < line_size; x += block_size) {
385 const int elems =
FFMIN(block_size,
filter->dst_size - x);
386 for (
int j = 0; j < filter_size; j++) {
387 const int jb = j & ~(taps_align - 1);
388 const int ji = j - jb;
389 const size_t idx_base = x * aligned_size + jb * block_size + ji;
390 for (
int i = 0;
i < elems;
i++) {
391 const int w =
filter->weights[(x +
i) * filter_size + j];
392 size_t idx = idx_base;
404 const int gather_base =
i & ~(gather_size - 1);
405 const int gather_pos =
i - gather_base;
406 const int lane_idx = gather_pos >> 2;
407 const int pos_in_lane = gather_pos & 3;
408 idx += gather_base * 4
409 + (pos_in_lane >> 1) * (mmsize / 2)
411 + (pos_in_lane & 1) * 4;
413 idx +=
i * taps_align;
426 out->priv.uptr[1] = aligned_size;
428 out->over_read = (aligned_size - filter_size) * pixel_size;
463 const int taps_align = 16 / sizeof_weights;
464 const int pixels_align = 4;
465 const int filter_size =
filter->filter_size;
466 const size_t aligned_size =
FFALIGN(filter_size, taps_align);
492 for (
int x = 0; x <
filter->dst_size; x++) {
493 for (
int j = 0; j < filter_size; j++) {
494 const int xb = x & ~(pixels_align - 1);
495 const int jb = j & ~(taps_align - 1);
496 const int xi = x - xb, ji = j - jb;
497 const int w =
filter->weights[x * filter_size + j];
498 const int idx = xb * aligned_size + jb * pixels_align +
xi * taps_align + ji;
509 out->priv.uptr[1] = aligned_size * sizeof_weights;
511 out->over_read = (aligned_size - filter_size) * pixel_size;
515 #define DECL_FILTER(EXT, TYPE, DIR, NAME, ELEMS, ...) \
516 DECL_ASM(TYPE, SWS_COMP_ELEMS(ELEMS), NAME##ELEMS##_##TYPE##EXT, \
519 .rw.filter = SWS_OP_FILTER_##DIR, \
523 #define DECL_FILTERS(EXT, TYPE, DIR, NAME, ...) \
524 DECL_FILTER(EXT, TYPE, DIR, NAME, 1, __VA_ARGS__) \
525 DECL_FILTER(EXT, TYPE, DIR, NAME, 2, __VA_ARGS__) \
526 DECL_FILTER(EXT, TYPE, DIR, NAME, 3, __VA_ARGS__) \
527 DECL_FILTER(EXT, TYPE, DIR, NAME, 4, __VA_ARGS__)
529 #define DECL_FILTERS_GENERIC(EXT, TYPE) \
530 DECL_FILTERS(EXT, TYPE, V, filter_v, .setup = setup_filter_v) \
531 DECL_FILTERS(EXT, TYPE, V, filter_fma_v, .setup = setup_filter_v, \
532 .check = check_filter_fma) \
533 DECL_FILTERS(EXT, TYPE, H, filter_h, .setup = setup_filter_h) \
534 DECL_FILTERS(EXT, TYPE, H, filter_4x4_h, .setup = setup_filter_4x4_h, \
535 .check = check_filter_4x4_h)
537 #define REF_FILTERS(NAME, SUFFIX) \
538 &op_##NAME##1##SUFFIX, \
539 &op_##NAME##2##SUFFIX, \
540 &op_##NAME##3##SUFFIX, \
541 &op_##NAME##4##SUFFIX
543 #define DECL_FUNCS_8(SIZE, EXT, FLAG) \
544 DECL_RW(EXT, U8, read_planar, READ, 1, false, 0) \
545 DECL_RW(EXT, U8, read_planar, READ, 2, false, 0) \
546 DECL_RW(EXT, U8, read_planar, READ, 3, false, 0) \
547 DECL_RW(EXT, U8, read_planar, READ, 4, false, 0) \
548 DECL_RW(EXT, U8, write_planar, WRITE, 1, false, 0) \
549 DECL_RW(EXT, U8, write_planar, WRITE, 2, false, 0) \
550 DECL_RW(EXT, U8, write_planar, WRITE, 3, false, 0) \
551 DECL_RW(EXT, U8, write_planar, WRITE, 4, false, 0) \
552 DECL_RW(EXT, U8, read_nibbles, READ, 1, false, 1) \
553 DECL_RW(EXT, U8, read_bits, READ, 1, false, 3) \
554 DECL_RW(EXT, U8, write_bits, WRITE, 1, false, 3) \
555 DECL_EXPAND_BITS(EXT, 8) \
556 DECL_PACKED_RW(EXT, 8) \
557 DECL_PACK_UNPACK(EXT, U8, 1, 2, 1, 0) \
558 DECL_PACK_UNPACK(EXT, U8, 3, 3, 2, 0) \
559 DECL_PACK_UNPACK(EXT, U8, 2, 3, 3, 0) \
560 void ff_p1000_shuffle##EXT(void); \
561 void ff_p1001_shuffle##EXT(void); \
562 void ff_p1110_shuffle##EXT(void); \
563 void ff_p1111_shuffle##EXT(void); \
564 DECL_SWIZZLE(EXT, 3, 0, 1, 2) \
565 DECL_SWIZZLE(EXT, 3, 0, 2, 1) \
566 DECL_SWIZZLE(EXT, 2, 1, 0, 3) \
567 DECL_SWIZZLE(EXT, 3, 2, 1, 0) \
568 DECL_SWIZZLE(EXT, 3, 1, 0, 2) \
569 DECL_SWIZZLE(EXT, 3, 2, 0, 1) \
570 DECL_SWIZZLE(EXT, 1, 2, 0, 3) \
571 DECL_SWIZZLE(EXT, 1, 0, 2, 3) \
572 DECL_SWIZZLE(EXT, 2, 0, 1, 3) \
573 DECL_SWIZZLE(EXT, 2, 3, 1, 0) \
574 DECL_SWIZZLE(EXT, 2, 1, 3, 0) \
575 DECL_SWIZZLE(EXT, 1, 2, 3, 0) \
576 DECL_SWIZZLE(EXT, 1, 3, 2, 0) \
577 DECL_SWIZZLE(EXT, 0, 2, 1, 3) \
578 DECL_SWIZZLE(EXT, 0, 2, 3, 1) \
579 DECL_SWIZZLE(EXT, 0, 3, 1, 2) \
580 DECL_SWIZZLE(EXT, 3, 1, 2, 0) \
581 DECL_SWIZZLE(EXT, 0, 3, 2, 1) \
582 DECL_SWIZZLE(EXT, 0, 0, 0, 3) \
583 DECL_SWIZZLE(EXT, 3, 0, 0, 0) \
584 DECL_SWIZZLE(EXT, 0, 0, 0, 1) \
585 DECL_SWIZZLE(EXT, 1, 0, 0, 0) \
586 DECL_CLEAR_ALPHA(EXT, 0) \
587 DECL_CLEAR_ALPHA(EXT, 1) \
588 DECL_CLEAR_ALPHA(EXT, 3) \
589 DECL_CLEAR_ZERO(EXT, 0) \
590 DECL_CLEAR_ZERO(EXT, 1) \
591 DECL_CLEAR_ZERO(EXT, 3) \
592 DECL_CLEAR(EXT, 0, 0, 0, 1) \
593 DECL_CLEAR(EXT, 1, 0, 0, 0) \
594 DECL_CLEAR(EXT, 1, 1, 0, 0) \
595 DECL_CLEAR(EXT, 0, 1, 1, 0) \
596 DECL_CLEAR(EXT, 0, 0, 1, 1) \
597 DECL_CLEAR(EXT, 1, 0, 1, 0) \
598 DECL_CLEAR(EXT, 0, 1, 0, 1) \
599 DECL_CLEAR(EXT, 0, 1, 1, 1) \
600 DECL_CLEAR(EXT, 1, 0, 1, 1) \
601 DECL_CLEAR(EXT, 1, 1, 0, 1) \
603 static const SwsOpTable ops8##EXT = { \
604 .cpu_flags = AV_CPU_FLAG_##FLAG, \
605 .block_size = SIZE, \
607 &op_read_planar1##EXT, \
608 &op_read_planar2##EXT, \
609 &op_read_planar3##EXT, \
610 &op_read_planar4##EXT, \
611 &op_write_planar1##EXT, \
612 &op_write_planar2##EXT, \
613 &op_write_planar3##EXT, \
614 &op_write_planar4##EXT, \
615 &op_read8_packed2##EXT, \
616 &op_read8_packed3##EXT, \
617 &op_read8_packed4##EXT, \
618 &op_write8_packed2##EXT, \
619 &op_write8_packed3##EXT, \
620 &op_write8_packed4##EXT, \
621 &op_read_nibbles1##EXT, \
622 &op_read_bits1##EXT, \
623 &op_write_bits1##EXT, \
624 &op_expand_bits8##EXT, \
625 &op_pack_1210##EXT, \
626 &op_pack_3320##EXT, \
627 &op_pack_2330##EXT, \
628 &op_unpack_1210##EXT, \
629 &op_unpack_3320##EXT, \
630 &op_unpack_2330##EXT, \
631 &op_swizzle_3012##EXT, \
632 &op_swizzle_3021##EXT, \
633 &op_swizzle_2103##EXT, \
634 &op_swizzle_3210##EXT, \
635 &op_swizzle_3102##EXT, \
636 &op_swizzle_3201##EXT, \
637 &op_swizzle_1203##EXT, \
638 &op_swizzle_1023##EXT, \
639 &op_swizzle_2013##EXT, \
640 &op_swizzle_2310##EXT, \
641 &op_swizzle_2130##EXT, \
642 &op_swizzle_1230##EXT, \
643 &op_swizzle_1320##EXT, \
644 &op_swizzle_0213##EXT, \
645 &op_swizzle_0231##EXT, \
646 &op_swizzle_0312##EXT, \
647 &op_swizzle_3120##EXT, \
648 &op_swizzle_0321##EXT, \
649 &op_swizzle_0003##EXT, \
650 &op_swizzle_0001##EXT, \
651 &op_swizzle_3000##EXT, \
652 &op_swizzle_1000##EXT, \
653 &op_clear_alpha0##EXT, \
654 &op_clear_alpha1##EXT, \
655 &op_clear_alpha3##EXT, \
656 &op_clear_zero0##EXT, \
657 &op_clear_zero1##EXT, \
658 &op_clear_zero3##EXT, \
659 REF_PATTERN(clear##EXT, 0, 0, 0, 1), \
660 REF_PATTERN(clear##EXT, 1, 0, 0, 0), \
661 REF_PATTERN(clear##EXT, 1, 1, 0, 0), \
662 REF_PATTERN(clear##EXT, 0, 1, 1, 0), \
663 REF_PATTERN(clear##EXT, 0, 0, 1, 1), \
664 REF_PATTERN(clear##EXT, 1, 0, 1, 0), \
665 REF_PATTERN(clear##EXT, 0, 1, 0, 1), \
666 REF_PATTERN(clear##EXT, 0, 1, 1, 1), \
667 REF_PATTERN(clear##EXT, 1, 0, 1, 1), \
668 REF_PATTERN(clear##EXT, 1, 1, 0, 1), \
673 #define DECL_FUNCS_16(SIZE, EXT, FLAG) \
674 DECL_PACKED_RW(EXT, 16) \
675 DECL_EXPAND_BITS(EXT, 16) \
676 DECL_PACK_UNPACK(EXT, U16, 4, 4, 4, 0) \
677 DECL_PACK_UNPACK(EXT, U16, 5, 5, 5, 0) \
678 DECL_PACK_UNPACK(EXT, U16, 5, 6, 5, 0) \
679 DECL_SWAP_BYTES(EXT, U16, 1, 0, 0, 0) \
680 DECL_SWAP_BYTES(EXT, U16, 1, 0, 0, 1) \
681 DECL_SWAP_BYTES(EXT, U16, 1, 1, 1, 0) \
682 DECL_SWAP_BYTES(EXT, U16, 1, 1, 1, 1) \
684 DECL_CONVERT(EXT, U8, U16) \
685 DECL_CONVERT(EXT, U16, U8) \
686 DECL_EXPAND(EXT, U8, U16) \
688 static const SwsOpTable ops16##EXT = { \
689 .cpu_flags = AV_CPU_FLAG_##FLAG, \
690 .block_size = SIZE, \
692 &op_read16_packed2##EXT, \
693 &op_read16_packed3##EXT, \
694 &op_read16_packed4##EXT, \
695 &op_write16_packed2##EXT, \
696 &op_write16_packed3##EXT, \
697 &op_write16_packed4##EXT, \
698 &op_pack_4440##EXT, \
699 &op_pack_5550##EXT, \
700 &op_pack_5650##EXT, \
701 &op_unpack_4440##EXT, \
702 &op_unpack_5550##EXT, \
703 &op_unpack_5650##EXT, \
704 &op_expand_bits16##EXT, \
705 REF_COMMON_PATTERNS(swap_bytes_U16##EXT), \
706 REF_COMMON_PATTERNS(convert_U8_U16##EXT), \
707 REF_COMMON_PATTERNS(convert_U16_U8##EXT), \
708 REF_COMMON_PATTERNS(expand_U8_U16##EXT), \
709 REF_COMMON_PATTERNS(lshift16##EXT), \
710 REF_COMMON_PATTERNS(rshift16##EXT), \
715 #define DECL_FUNCS_32(SIZE, EXT, FLAG) \
716 DECL_PACKED_RW(_m2##EXT, 32) \
717 DECL_PACK_UNPACK(_m2##EXT, U32, 10, 10, 10, 2) \
718 DECL_PACK_UNPACK(_m2##EXT, U32, 2, 10, 10, 10) \
719 DECL_SWAP_BYTES(_m2##EXT, U32, 1, 0, 0, 0) \
720 DECL_SWAP_BYTES(_m2##EXT, U32, 1, 0, 0, 1) \
721 DECL_SWAP_BYTES(_m2##EXT, U32, 1, 1, 1, 0) \
722 DECL_SWAP_BYTES(_m2##EXT, U32, 1, 1, 1, 1) \
723 DECL_CONVERT(EXT, U8, U32) \
724 DECL_CONVERT(EXT, U32, U8) \
725 DECL_CONVERT(EXT, U16, U32) \
726 DECL_CONVERT(EXT, U32, U16) \
727 DECL_CONVERT(EXT, U8, F32) \
728 DECL_CONVERT(EXT, F32, U8) \
729 DECL_CONVERT(EXT, U16, F32) \
730 DECL_CONVERT(EXT, F32, U16) \
731 DECL_EXPAND(EXT, U8, U32) \
735 DECL_DITHER(EXT, 1) \
736 DECL_DITHER(EXT, 2) \
737 DECL_DITHER(EXT, 3) \
738 DECL_DITHER(EXT, 4) \
739 DECL_DITHER(EXT, 5) \
740 DECL_DITHER(EXT, 6) \
741 DECL_DITHER(EXT, 7) \
742 DECL_DITHER(EXT, 8) \
743 DECL_LINEAR(EXT, luma, SWS_MASK_LUMA) \
744 DECL_LINEAR(EXT, alpha, SWS_MASK_ALPHA) \
745 DECL_LINEAR(EXT, lumalpha, SWS_MASK_LUMA | SWS_MASK_ALPHA) \
746 DECL_LINEAR(EXT, yalpha, SWS_MASK(1, 1)) \
747 DECL_LINEAR(EXT, dot3, 0x7) \
748 DECL_LINEAR(EXT, dot3a, 0x7 | SWS_MASK_ALPHA) \
749 DECL_LINEAR(EXT, row0, SWS_MASK_ROW(0) ^ SWS_MASK(0, 3)) \
750 DECL_LINEAR(EXT, diag3, SWS_MASK_DIAG3) \
751 DECL_LINEAR(EXT, diag4, SWS_MASK_DIAG4) \
752 DECL_LINEAR(EXT, diagoff3, SWS_MASK_DIAG3 | SWS_MASK_OFF3) \
753 DECL_LINEAR(EXT, affine3, SWS_MASK_MAT3 | SWS_MASK_OFF3) \
754 DECL_LINEAR(EXT, affine3uv, \
755 SWS_MASK_MAT3 | SWS_MASK_OFF(1) | SWS_MASK_OFF(2)) \
756 DECL_LINEAR(EXT, affine3x, \
757 SWS_MASK_MAT3 ^ SWS_MASK(0, 1) | SWS_MASK_OFF3) \
758 DECL_LINEAR(EXT, affine3xa, \
759 SWS_MASK_MAT3 ^ SWS_MASK(0, 1) | SWS_MASK_OFF3 | SWS_MASK_ALPHA) \
760 DECL_LINEAR(EXT, affine3xy, \
761 SWS_MASK_MAT3 ^ SWS_MASK(0, 0) ^ SWS_MASK(0, 1) | SWS_MASK_OFF3) \
762 DECL_LINEAR(EXT, affine3a, \
763 SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA) \
764 DECL_FILTERS_GENERIC(EXT, U8) \
765 DECL_FILTERS_GENERIC(EXT, U16) \
766 DECL_FILTERS_GENERIC(EXT, F32) \
768 static const SwsOpTable ops32##EXT = { \
769 .cpu_flags = AV_CPU_FLAG_##FLAG, \
770 .block_size = SIZE, \
772 &op_read32_packed2_m2##EXT, \
773 &op_read32_packed3_m2##EXT, \
774 &op_read32_packed4_m2##EXT, \
775 &op_write32_packed2_m2##EXT, \
776 &op_write32_packed3_m2##EXT, \
777 &op_write32_packed4_m2##EXT, \
778 &op_pack_1010102_m2##EXT, \
779 &op_pack_2101010_m2##EXT, \
780 &op_unpack_1010102_m2##EXT, \
781 &op_unpack_2101010_m2##EXT, \
782 REF_COMMON_PATTERNS(swap_bytes_U32_m2##EXT), \
783 REF_COMMON_PATTERNS(convert_U8_U32##EXT), \
784 REF_COMMON_PATTERNS(convert_U32_U8##EXT), \
785 REF_COMMON_PATTERNS(convert_U16_U32##EXT), \
786 REF_COMMON_PATTERNS(convert_U32_U16##EXT), \
787 REF_COMMON_PATTERNS(convert_U8_F32##EXT), \
788 REF_COMMON_PATTERNS(convert_F32_U8##EXT), \
789 REF_COMMON_PATTERNS(convert_U16_F32##EXT), \
790 REF_COMMON_PATTERNS(convert_F32_U16##EXT), \
791 REF_COMMON_PATTERNS(expand_U8_U32##EXT), \
792 REF_COMMON_PATTERNS(min##EXT), \
793 REF_COMMON_PATTERNS(max##EXT), \
794 REF_COMMON_PATTERNS(scale##EXT), \
795 REF_COMMON_PATTERNS(dither0##EXT), \
815 &op_affine3uv##EXT, \
817 &op_affine3xa##EXT, \
818 &op_affine3xy##EXT, \
820 REF_FILTERS(filter_fma_v, _U8##EXT), \
821 REF_FILTERS(filter_fma_v, _U16##EXT), \
822 REF_FILTERS(filter_fma_v, _F32##EXT), \
823 REF_FILTERS(filter_4x4_h, _U8##EXT), \
824 REF_FILTERS(filter_4x4_h, _U16##EXT), \
825 REF_FILTERS(filter_4x4_h, _F32##EXT), \
826 REF_FILTERS(filter_v, _U8##EXT), \
827 REF_FILTERS(filter_v, _U16##EXT), \
828 REF_FILTERS(filter_v, _F32##EXT), \
829 REF_FILTERS(filter_h, _U8##EXT), \
830 REF_FILTERS(filter_h, _U16##EXT), \
831 REF_FILTERS(filter_h, _F32##EXT), \
877 return !(
op->rw.elems > 1 &&
op->rw.packed) && !
op->rw.frac && !
op->rw.filter;
886 static int movsize(
const int bytes,
const int mmsize)
888 return bytes <= 4 ? 4 :
909 const int num_lanes = mmsize / 16;
917 .block_size = pixels * num_lanes,
918 .over_read =
movsize(in_total, mmsize) - in_total,
919 .over_write =
movsize(out_total, mmsize) - out_total,
928 #define ASSIGN_SHUFFLE_FUNC(IN, OUT, EXT) \
930 SWS_DECL_FUNC(ff_packed_shuffle##IN##_##OUT##_##EXT); \
931 if (in_total == IN && out_total == OUT) \
932 out->func = ff_packed_shuffle##IN##_##OUT##_##EXT; \
960 static_assert(
sizeof(uint32_t) ==
sizeof(
int),
"int size mismatch");
969 for (
int i = 0;
i < 4;
i++) {
973 case 1:
c.u32 = 0x1010101
U * res.
priv.
u8[
i];
break;
978 op->clear.value[
i].num =
c.i;
979 op->clear.value[
i].den = 1;
1010 int op_block_size =
out->block_size;
1021 ops,
i, op_block_size, chain);
1029 #define ASSIGN_PROCESS_FUNC(NAME) \
1031 SWS_DECL_FUNC(NAME); \
1037 const int read_planes =
read ? (
read->rw.packed ? 1 :
read->rw.elems) : 0;
1039 switch (
FFMAX(read_planes, write_planes)) {
Copyright (C) 2025 Niklas Haas.
static bool check_filter_fma(const SwsImplParams *params)
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
int ff_sws_setup_clear(const SwsImplParams *params, SwsImplResult *out)
#define ASSIGN_PROCESS_FUNC(NAME)
static av_const int get_mmsize(const int cpu_flags)
const SwsOp * ff_sws_op_list_input(const SwsOpList *ops)
Returns the input operation for a given op list, or NULL if there is none (e.g.
int ff_sws_op_list_max_size(const SwsOpList *ops)
Returns the size of the largest pixel type used in ops.
const SwsOpBackend backend_x86
static void normalize_clear(SwsOp *op)
Represents a computed filter kernel.
static void read_bytes(const uint8_t *src, float *dst, int src_stride, int dst_stride, int width, int height, float scale)
#define DECL_FUNCS_32(SIZE, EXT, FLAG)
void(* filter)(uint8_t *src, int stride, int qscale)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
static int setup_linear(const SwsImplParams *params, SwsImplResult *out)
int ff_sws_pixel_type_size(SwsPixelType type)
void * av_memdup(const void *p, size_t size)
Duplicate a buffer with av_malloc().
static int setup_dither(const SwsImplParams *params, SwsImplResult *out)
#define SWS_COMP_TEST(mask, X)
bool ff_sws_pixel_type_is_int(SwsPixelType type)
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
#define AV_CPU_FLAG_AVX512
AVX-512 functions: requires OS support even if YMM/ZMM registers aren't used.
void(* free[SWS_MAX_OPS+1])(SwsOpPriv *)
#define AV_LOG_TRACE
Extremely verbose debugging, useful for libav* development.
#define FF_ARRAY_ELEMS(a)
SwsOpChain * ff_sws_op_chain_alloc(void)
int flags
Flags modifying the (de)muxer behaviour.
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
static int setup_clear(const SwsImplParams *params, SwsImplResult *out)
static AVFormatContext * ctx
#define AV_CPU_FLAG_SSE4
Penryn SSE4.1 functions.
const SwsOp * ff_sws_op_list_output(const SwsOpList *ops)
Returns the output operation for a given op list, or NULL if there is none.
SwsFilterWeights * kernel
Compiled "chain" of operations, which can be dispatched efficiently.
Rational number (pair of numerator and denominator).
static const SwsOpTable *const tables[]
static bool check_filter_4x4_h(const SwsImplParams *params)
static int setup_rw(const SwsImplParams *params, SwsImplResult *out)
static int solve_shuffle(const SwsOpList *ops, int mmsize, SwsCompiledOp *out)
static int setup_filter_4x4_h(const SwsImplParams *params, SwsImplResult *out)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
@ SWS_FILTER_SCALE
14-bit coefficients are picked to fit comfortably within int16_t for efficient SIMD processing (e....
#define AV_CPU_FLAG_AVX2
AVX2 functions: requires OS support even if YMM registers aren't used.
#define i(width, name, range_min, range_max)
static int movsize(const int bytes, const int mmsize)
static int setup_swap_bytes(const SwsImplParams *params, SwsImplResult *out)
int ff_sws_op_compile_tables(SwsContext *ctx, const SwsOpTable *const tables[], int num_tables, SwsOpList *ops, int ops_index, const int block_size, SwsOpChain *chain)
"Compile" a single op by looking it up in a list of fixed size op tables.
void ff_sws_op_chain_free_cb(void *ptr)
static int compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
static void ff_sws_op_chain_free(SwsOpChain *chain)
static const int weights[]
static bool op_is_type_invariant(const SwsOp *op)
Returns true if the operation's implementation only depends on the block size, and not the underlying...
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
static int hscale_sizeof_weight(const SwsOp *op)
static void write_bytes(const float *src, uint8_t *dst, int src_stride, int dst_stride, int width, int height, int depth, float scale)
void * av_calloc(size_t nmemb, size_t size)
static void ff_op_priv_free(SwsOpPriv *priv)
static int av_cmp_q(AVRational a, AVRational b)
Compare two rationals.
static int setup_shift(const SwsImplParams *params, SwsImplResult *out)
#define ASSIGN_SHUFFLE_FUNC(IN, OUT, EXT)
int ff_sws_solve_shuffle(const SwsOpList *ops, uint8_t shuffle[], int size, uint8_t clear_val, int *read_bytes, int *write_bytes)
"Solve" an op list into a fixed shuffle mask, with an optional ability to also directly clear the out...
AVRational av_mul_q(AVRational b, AVRational c)
Multiply two rationals.
uint8_t elems
Examples: rgba = 4x u8 packed yuv444p = 3x u8 rgb565 = 1x u16 <- use SWS_OP_UNPACK to unpack monow = ...
static void scale(int *out, const int *in, const int w, const int h, const int shift)
static int setup_filter_v(const SwsImplParams *params, SwsImplResult *out)
#define DECL_FUNCS_16(SIZE, EXT, FLAG)
#define xi(width, name, var, range_min, range_max, subs,...)
Helper struct for representing a list of operations.
#define DECL_FUNCS_8(SIZE, EXT, FLAG)
Main external API structure.
static uint64_t shuffle(uint64_t in, const uint8_t *shuffle, int shuffle_len)
static uint32_t BS_FUNC() read(BSCTX *bc, unsigned int n)
Return n bits from the buffer, n has to be in the 0-32 range.
static int setup_filter_h(const SwsImplParams *params, SwsImplResult *out)