FFmpeg
vp3dsp.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18 
19 #include <stddef.h>
20 #include <string.h>
21 
22 #include "checkasm.h"
23 #include "libavutil/intreadwrite.h"
24 #include "libavutil/macros.h"
25 #include "libavutil/mem_internal.h"
26 #include "libavcodec/vp3dsp.h"
27 
28 #define randomize_buffers(buf0, buf1, size) \
29  do { \
30  char *b0 = (char*)buf0, *b1 = (char*)buf1; \
31  for (size_t k = 0; k < (size & ~3); k += 4) { \
32  uint32_t r = rnd(); \
33  AV_WN32A(b0 + k, r); \
34  AV_WN32A(b1 + k, r); \
35  } \
36  for (size_t k = size & ~3; k < size; ++k) \
37  b0[k] = b1[k] = rnd(); \
38  } while (0)
39 
40 static void vp3_check_put_no_rnd_pixels_l2(const VP3DSPContext *const vp3dsp)
41 {
42  enum {
43  MAX_STRIDE = 64,
44  HEIGHT = 8, ///< only used height, so only tested height
45  WIDTH = 8,
46  BUF_SIZE = MAX_STRIDE * (HEIGHT - 1) + WIDTH,
47  SRC_BUF_SIZE = BUF_SIZE + (WIDTH - 1), ///< WIDTH-1 to use misaligned input
48  };
49  declare_func(void, uint8_t *dst,
50  const uint8_t *a, const uint8_t *b,
51  ptrdiff_t stride, int h);
52 
53  if (!check_func(vp3dsp->put_no_rnd_pixels_l2, "put_no_rnd_pixels_l2"))
54  return;
55 
56  DECLARE_ALIGNED(8, uint8_t, dstbuf_new)[BUF_SIZE];
57  DECLARE_ALIGNED(8, uint8_t, dstbuf_ref)[BUF_SIZE];
58  DECLARE_ALIGNED(4, uint8_t, src0_buf)[SRC_BUF_SIZE];
59  DECLARE_ALIGNED(4, uint8_t, src1_buf)[SRC_BUF_SIZE];
60 
61  size_t src0_offset = rnd() % WIDTH, src1_offset = rnd() % WIDTH;
62  ptrdiff_t stride = (rnd() % (MAX_STRIDE / WIDTH) + 1) * WIDTH;
63  const uint8_t *src0 = src0_buf + src0_offset, *src1 = src1_buf + src1_offset;
64  uint8_t *dst_new = dstbuf_new, *dst_ref = dstbuf_ref;
65  const int h = HEIGHT;
66 
67  if (rnd() & 1) {
68  // Flip stride.
69  dst_new += (h - 1) * stride;
70  dst_ref += (h - 1) * stride;
71  src0 += (h - 1) * stride;
72  src1 += (h - 1) * stride;
73  stride = -stride;
74  }
75 
76  randomize_buffers(src0_buf, src1_buf, sizeof(src0_buf));
77  randomize_buffers(dstbuf_new, dstbuf_ref, sizeof(dstbuf_new));
78  call_ref(dst_ref, src0, src1, stride, h);
79  call_new(dst_new, src0, src1, stride, h);
80  if (memcmp(dstbuf_new, dstbuf_ref, sizeof(dstbuf_new)))
81  fail();
82  bench_new(dst_new, src1, src1, stride, h);
83 }
84 
85 static void vp3_check_idct(int nb_bits)
86 {
87  enum {
88  MAX_STRIDE = 64,
89  MIN_STRIDE = 16,
90  NB_LINES = 8,
91  WIDTH = 8,
92  BUF_SIZE = MAX_STRIDE * (NB_LINES - 1) + WIDTH,
93  };
94 
95  declare_func(void, uint8_t *dest, ptrdiff_t stride, int16_t *block);
96 
97  DECLARE_ALIGNED(16, int16_t, block_new)[64];
98  DECLARE_ALIGNED(16, int16_t, block_ref)[64];
99  DECLARE_ALIGNED(8, uint8_t, dstbuf_new)[BUF_SIZE];
100  DECLARE_ALIGNED(8, uint8_t, dstbuf_ref)[BUF_SIZE];
101 
102  ptrdiff_t stride = (rnd() % (MAX_STRIDE / MIN_STRIDE) + 1) * MIN_STRIDE;
103  uint8_t *dst_new = dstbuf_new, *dst_ref = dstbuf_ref;
104 
105  if (rnd() & 1) {
106  // Flip stride.
107  dst_new += (NB_LINES - 1) * stride;
108  dst_ref += (NB_LINES - 1) * stride;
109  stride = -stride;
110  }
111 
112  randomize_buffers(dstbuf_new, dstbuf_ref, sizeof(dstbuf_ref));
113  for (size_t k = 0; k < FF_ARRAY_ELEMS(block_new); ++k) {
114  int32_t r = (int32_t)rnd() >> (32 - nb_bits);
115  block_new[k] = block_ref[k] = r;
116  }
117 
118  call_ref(dst_ref, stride, block_ref);
119  call_new(dst_new, stride, block_new);
120  if (memcmp(dstbuf_new, dstbuf_ref, sizeof(dstbuf_new)) ||
121  memcmp(block_new, block_ref, sizeof(block_new)))
122  fail();
123  bench_new(dst_new, stride, block_new);
124 }
125 
126 static void vp3_check_loop_filter(const VP3DSPContext *const vp3dsp)
127 {
128  enum {
129  MAX_STRIDE = 64,
130  MIN_STRIDE = 8,
131  /// Horizontal tests operate on 4x8 blocks
132  HORIZONTAL_BUF_SIZE = ((8 /* lines */ - 1) * MAX_STRIDE + 4 /* width */ + 7 /* misalignment */),
133  /// Vertical tests operate on 8x4 blocks
134  VERTICAL_BUF_SIZE = ((4 /* lines */ - 1) * MAX_STRIDE + 8 /* width */ + 7 /* misalignment */),
135  };
136  DECLARE_ALIGNED(8, uint8_t, hor_buf0)[HORIZONTAL_BUF_SIZE];
137  DECLARE_ALIGNED(8, uint8_t, hor_buf1)[HORIZONTAL_BUF_SIZE];
138  DECLARE_ALIGNED(8, uint8_t, ver_buf0)[VERTICAL_BUF_SIZE];
139  DECLARE_ALIGNED(8, uint8_t, ver_buf1)[VERTICAL_BUF_SIZE];
140  DECLARE_ALIGNED(16, int, bounding_values_array)[256 + 4];
141  int *const bounding_values = bounding_values_array + 127;
142  static const struct {
143  const char *name;
144  size_t offset;
145  int lines_above, lines_below;
146  int pixels_left, pixels_right;
147  unsigned alignment;
148  int horizontal;
149  } tests[] = {
150 #define TEST(NAME) .name = #NAME, .offset = offsetof(VP3DSPContext, NAME)
151  { TEST(v_loop_filter_unaligned), 2, 1, 0, 7, 1, 0 },
152  { TEST(h_loop_filter_unaligned), 0, 7, 2, 1, 1, 1 },
153  { TEST(v_loop_filter), 2, 1, 0, 7, VP3_LOOP_FILTER_NO_UNALIGNED_SUPPORT ? 8 : 1, 0 },
154  { TEST(h_loop_filter), 0, 7, 2, 1, VP3_LOOP_FILTER_NO_UNALIGNED_SUPPORT ? 8 : 1, 1 },
155  };
156  declare_func(void, uint8_t *src, ptrdiff_t stride, int *bounding_values);
157 
158  int filter_limit = rnd() % 128;
159 
160  ff_vp3dsp_set_bounding_values(bounding_values_array, filter_limit);
161 
162  for (size_t i = 0; i < FF_ARRAY_ELEMS(tests); ++i) {
163  void (*loop_filter)(uint8_t *, ptrdiff_t, int*) = *(void(**)(uint8_t *, ptrdiff_t, int*))((const char*)vp3dsp + tests[i].offset);
164 
165  if (check_func(loop_filter, "%s", tests[i].name)) {
166  uint8_t *buf0 = tests[i].horizontal ? hor_buf0 : ver_buf0;
167  uint8_t *buf1 = tests[i].horizontal ? hor_buf1 : ver_buf1;
168  size_t bufsize = tests[i].horizontal ? HORIZONTAL_BUF_SIZE : VERTICAL_BUF_SIZE;
169  ptrdiff_t stride = (rnd() % (MAX_STRIDE / MIN_STRIDE) + 1) * MIN_STRIDE;
170  // Don't always use pointers that are aligned to 8.
171  size_t offset = FFALIGN(tests[i].pixels_left, tests[i].alignment) +
172  (rnd() % (MIN_STRIDE / tests[i].alignment)) * tests[i].alignment
173  + stride * tests[i].lines_above;
174  uint8_t *dst0 = buf0 + offset, *dst1 = buf1 + offset;
175 
176  if (rnd() & 1) {
177  // Flip stride.
178  dst1 += (tests[i].lines_below - tests[i].lines_above) * stride;
179  dst0 += (tests[i].lines_below - tests[i].lines_above) * stride;
180  stride = -stride;
181  }
182 
183  randomize_buffers(buf0, buf1, bufsize);
184  call_ref(dst0, stride, bounding_values);
185  call_new(dst1, stride, bounding_values);
186  if (memcmp(buf0, buf1, bufsize))
187  fail();
188  bench_new(dst0, stride, bounding_values);
189  }
190  }
191 }
192 
194 {
195  VP3DSPContext vp3dsp;
196 
197  ff_vp3dsp_init(&vp3dsp);
198 
200  report("put_no_rnd_pixels_l2");
201 
202 #define IDCT_TEST(func, mask) \
203  if (check_func(vp3dsp.func, #func)) \
204  vp3_check_idct(mask); \
205  report(#func)
206  IDCT_TEST(idct_dc_add, 16);
207  // FIXME: The Theora specification actually requires using unsaturated
208  // 16-bit arithmetic for its idct. Yet the SSE2 version uses saturated
209  // arithmetic and even the C version seems to forget truncating
210  // intermediate values to 16 bit. For the time being, use a range
211  // that does not trigger overflow.
212  IDCT_TEST(idct_put, 8);
213  IDCT_TEST(idct_add, 8);
214 
215  vp3_check_loop_filter(&vp3dsp);
216  report("loop_filter");
217 }
SRC_BUF_SIZE
#define SRC_BUF_SIZE
Definition: hash.c:26
vp3_check_loop_filter
static void vp3_check_loop_filter(const VP3DSPContext *const vp3dsp)
Definition: vp3dsp.c:126
TEST
#define TEST(NAME)
name
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name
Definition: writing_filters.txt:88
VP3DSPContext
Definition: vp3dsp.h:29
r
const char * r
Definition: vf_curves.c:127
BUF_SIZE
#define BUF_SIZE
Definition: setpts.c:159
mem_internal.h
src1
const pixel * src1
Definition: h264pred_template.c:420
vp3dsp.h
check_func
#define check_func(func,...)
Definition: checkasm.h:214
b
#define b
Definition: input.c:42
call_ref
#define call_ref(...)
Definition: checkasm.h:230
macros.h
fail
#define fail()
Definition: checkasm.h:224
checkasm.h
checkasm_check_vp3dsp
void checkasm_check_vp3dsp(void)
Definition: vp3dsp.c:193
rnd
#define rnd()
Definition: checkasm.h:207
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
IDCT_TEST
#define IDCT_TEST(func, mask)
VP3DSPContext::put_no_rnd_pixels_l2
void(* put_no_rnd_pixels_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, ptrdiff_t stride, int h)
Copy 8xH pixels from source to destination buffer using a bilinear filter with no rounding (i....
Definition: vp3dsp.h:40
intreadwrite.h
vp3_check_idct
static void vp3_check_idct(int nb_bits)
Definition: vp3dsp.c:85
call_new
#define call_new(...)
Definition: checkasm.h:238
idct_put
static void idct_put(FourXContext *f, int x, int y)
Definition: 4xm.c:559
ff_vp3dsp_set_bounding_values
void ff_vp3dsp_set_bounding_values(int *bounding_values_array, int filter_limit)
Definition: vp3dsp.c:477
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:104
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
WIDTH
#define WIDTH
Definition: c93.c:44
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
loop_filter
static void loop_filter(const H264Context *h, H264SliceContext *sl, int start_x, int end_x)
Definition: h264_slice.c:2438
tests
const TestCase tests[]
Definition: fifo_muxer.c:363
report
#define report
Definition: checkasm.h:227
HEIGHT
#define HEIGHT
Definition: c93.c:45
bench_new
#define bench_new(...)
Definition: checkasm.h:429
VP3_LOOP_FILTER_NO_UNALIGNED_SUPPORT
#define VP3_LOOP_FILTER_NO_UNALIGNED_SUPPORT
Definition: vp3dsp.h:27
randomize_buffers
#define randomize_buffers(buf0, buf1, size)
Definition: vp3dsp.c:28
MAX_STRIDE
#define MAX_STRIDE
Definition: hpeldsp.c:31
vp3_check_put_no_rnd_pixels_l2
static void vp3_check_put_no_rnd_pixels_l2(const VP3DSPContext *const vp3dsp)
Definition: vp3dsp.c:40
src0
const pixel *const src0
Definition: h264pred_template.c:419
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:219
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
int32_t
int32_t
Definition: audioconvert.c:56
idct_add
static void idct_add(uint8_t *dst, int stride, const uint8_t *src, int in_linesize, int *block)
Definition: mv30.c:170
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h
h
Definition: vp9dsp_template.c:2070
stride
#define stride
Definition: h264pred_template.c:536
ff_vp3dsp_init
av_cold void ff_vp3dsp_init(VP3DSPContext *c)
Definition: vp3dsp.c:448
src
#define src
Definition: vp8dsp.c:248