FFmpeg
sw_scale.c
Go to the documentation of this file.
1 /*
2  *
3  * This file is part of FFmpeg.
4  *
5  * FFmpeg is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * FFmpeg is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18  */
19 
20 #include <string.h>
21 
22 #include "libavutil/common.h"
23 #include "libavutil/intreadwrite.h"
24 #include "libavutil/mem.h"
25 #include "libavutil/mem_internal.h"
26 
27 #include "libswscale/swscale.h"
29 
30 #include "checkasm.h"
31 
32 #define randomize_buffers(buf, size) \
33  do { \
34  int j; \
35  for (j = 0; j < size; j+=4) \
36  AV_WN32(buf + j, rnd()); \
37  } while (0)
38 
39 // This reference function is the same approximate algorithm employed by the
40 // SIMD functions
41 static void ref_function(const int16_t *filter, int filterSize,
42  const int16_t **src, uint8_t *dest, int dstW,
43  const uint8_t *dither, int offset)
44 {
45  int i, d;
46  d = ((filterSize - 1) * 8 + dither[0]) >> 4;
47  for ( i = 0; i < dstW; i++) {
48  int16_t val = d;
49  int j;
50  union {
51  int val;
52  int16_t v[2];
53  } t;
54  for (j = 0; j < filterSize; j++){
55  t.val = (int)src[j][i + offset] * (int)filter[j];
56  val += t.v[1];
57  }
58  dest[i]= av_clip_uint8(val>>3);
59  }
60 }
61 
62 static void check_yuv2yuvX(void)
63 {
64  struct SwsContext *ctx;
65  int fsi, osi, isi, i, j;
66  int dstW;
67 #define LARGEST_FILTER 16
68 #define FILTER_SIZES 4
69  static const int filter_sizes[FILTER_SIZES] = {1, 4, 8, 16};
70 #define LARGEST_INPUT_SIZE 512
71 #define INPUT_SIZES 6
72  static const int input_sizes[INPUT_SIZES] = {8, 24, 128, 144, 256, 512};
73 
74  declare_func_emms(AV_CPU_FLAG_MMX, void, const int16_t *filter,
75  int filterSize, const int16_t **src, uint8_t *dest,
76  int dstW, const uint8_t *dither, int offset);
77 
78  const int16_t **src;
79  LOCAL_ALIGNED_8(int16_t, src_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
80  LOCAL_ALIGNED_8(int16_t, filter_coeff, [LARGEST_FILTER]);
84  union VFilterData{
85  const int16_t *src;
86  uint16_t coeff[8];
87  } *vFilterData;
88  uint8_t d_val = rnd();
89  memset(dither, d_val, LARGEST_INPUT_SIZE);
90  randomize_buffers((uint8_t*)src_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t));
91  randomize_buffers((uint8_t*)filter_coeff, LARGEST_FILTER * sizeof(int16_t));
93  if (sws_init_context(ctx, NULL, NULL) < 0)
94  fail();
95 
97  for(isi = 0; isi < INPUT_SIZES; ++isi){
98  dstW = input_sizes[isi];
99  for(osi = 0; osi < 64; osi += 16){
100  for(fsi = 0; fsi < FILTER_SIZES; ++fsi){
101  src = av_malloc(sizeof(int16_t*) * filter_sizes[fsi]);
102  vFilterData = av_malloc((filter_sizes[fsi] + 2) * sizeof(union VFilterData));
103  memset(vFilterData, 0, (filter_sizes[fsi] + 2) * sizeof(union VFilterData));
104  for(i = 0; i < filter_sizes[fsi]; ++i){
105  src[i] = &src_pixels[i * LARGEST_INPUT_SIZE];
106  vFilterData[i].src = src[i];
107  for(j = 0; j < 4; ++j)
108  vFilterData[i].coeff[j + 4] = filter_coeff[i];
109  }
110  if (check_func(ctx->yuv2planeX, "yuv2yuvX_%d_%d_%d", filter_sizes[fsi], osi, dstW)){
111  memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
112  memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
113 
114  // The reference function is not the scalar function selected when mmx
115  // is deactivated as the SIMD functions do not give the same result as
116  // the scalar ones due to rounding. The SIMD functions are activated by
117  // the flag SWS_ACCURATE_RND
118  ref_function(&filter_coeff[0], filter_sizes[fsi], src, dst0, dstW - osi, dither, osi);
119  // There's no point in calling new for the reference function
120  if(ctx->use_mmx_vfilter){
121  call_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
122  if (memcmp(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0])))
123  fail();
124  if(dstW == LARGEST_INPUT_SIZE)
125  bench_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
126  }
127  }
128  av_freep(&src);
129  av_freep(&vFilterData);
130  }
131  }
132  }
134 #undef FILTER_SIZES
135 }
136 
137 #undef SRC_PIXELS
138 #define SRC_PIXELS 128
139 
140 static void check_hscale(void)
141 {
142 #define MAX_FILTER_WIDTH 40
143 #define FILTER_SIZES 5
144  static const int filter_sizes[FILTER_SIZES] = { 4, 8, 16, 32, 40 };
145 
146 #define HSCALE_PAIRS 2
147  static const int hscale_pairs[HSCALE_PAIRS][2] = {
148  { 8, 14 },
149  { 8, 18 },
150  };
151 
152  int i, j, fsi, hpi, width;
153  struct SwsContext *ctx;
154 
155  // padded
157  LOCAL_ALIGNED_32(uint32_t, dst0, [SRC_PIXELS]);
158  LOCAL_ALIGNED_32(uint32_t, dst1, [SRC_PIXELS]);
159 
160  // padded
162  LOCAL_ALIGNED_32(int32_t, filterPos, [SRC_PIXELS]);
163 
164  // The dst parameter here is either int16_t or int32_t but we use void* to
165  // just cover both cases.
166  declare_func_emms(AV_CPU_FLAG_MMX, void, void *c, void *dst, int dstW,
167  const uint8_t *src, const int16_t *filter,
168  const int32_t *filterPos, int filterSize);
169 
171  if (sws_init_context(ctx, NULL, NULL) < 0)
172  fail();
173 
175 
176  for (hpi = 0; hpi < HSCALE_PAIRS; hpi++) {
177  for (fsi = 0; fsi < FILTER_SIZES; fsi++) {
178  width = filter_sizes[fsi];
179 
180  ctx->srcBpc = hscale_pairs[hpi][0];
181  ctx->dstBpc = hscale_pairs[hpi][1];
182  ctx->hLumFilterSize = ctx->hChrFilterSize = width;
183 
184  for (i = 0; i < SRC_PIXELS; i++) {
185  filterPos[i] = i;
186 
187  // These filter cofficients are chosen to try break two corner
188  // cases, namely:
189  //
190  // - Negative filter coefficients. The filters output signed
191  // values, and it should be possible to end up with negative
192  // output values.
193  //
194  // - Positive clipping. The hscale filter function has clipping
195  // at (1<<15) - 1
196  //
197  // The coefficients sum to the 1.0 point for the hscale
198  // functions (1 << 14).
199 
200  for (j = 0; j < width; j++) {
201  filter[i * width + j] = -((1 << 14) / (width - 1));
202  }
203  filter[i * width + (rnd() % width)] = ((1 << 15) - 1);
204  }
205 
206  for (i = 0; i < MAX_FILTER_WIDTH; i++) {
207  // These values should be unused in SIMD implementations but
208  // may still be read, random coefficients here should help show
209  // issues where they are used in error.
210 
211  filter[SRC_PIXELS * width + i] = rnd();
212  }
214 
215  if (check_func(ctx->hcScale, "hscale_%d_to_%d_width%d", ctx->srcBpc, ctx->dstBpc + 1, width)) {
216  memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));
217  memset(dst1, 0, SRC_PIXELS * sizeof(dst1[0]));
218 
219  call_ref(NULL, dst0, SRC_PIXELS, src, filter, filterPos, width);
220  call_new(NULL, dst1, SRC_PIXELS, src, filter, filterPos, width);
221  if (memcmp(dst0, dst1, SRC_PIXELS * sizeof(dst0[0])))
222  fail();
223  bench_new(NULL, dst0, SRC_PIXELS, src, filter, filterPos, width);
224  }
225  }
226  }
228 }
229 
231 {
232  check_hscale();
233  report("hscale");
234  check_yuv2yuvX();
235  report("yuv2yuvX");
236 }
FILTER_SIZES
#define FILTER_SIZES
declare_func_emms
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:130
SwsContext::dstW
int dstW
Width of destination luma/alpha planes.
Definition: swscale_internal.h:481
mem_internal.h
check_yuv2yuvX
static void check_yuv2yuvX(void)
Definition: sw_scale.c:62
check_func
#define check_func(func,...)
Definition: checkasm.h:124
filter
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
Definition: filter_design.txt:228
call_ref
#define call_ref(...)
Definition: checkasm.h:139
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:31
fail
#define fail()
Definition: checkasm.h:133
checkasm.h
val
static double val(void *priv, double ch)
Definition: aeval.c:76
check_hscale
static void check_hscale(void)
Definition: sw_scale.c:140
rnd
#define rnd()
Definition: checkasm.h:117
width
#define width
intreadwrite.h
LARGEST_FILTER
#define LARGEST_FILTER
ctx
AVFormatContext * ctx
Definition: movenc.c:48
LOCAL_ALIGNED_8
#define LOCAL_ALIGNED_8(t, v,...)
Definition: mem_internal.h:124
int32_t
int32_t
Definition: audio_convert.c:194
HSCALE_PAIRS
#define HSCALE_PAIRS
SRC_PIXELS
#define SRC_PIXELS
Definition: sw_scale.c:138
call_new
#define call_new(...)
Definition: checkasm.h:211
NULL
#define NULL
Definition: coverity.c:32
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:136
src
#define src
Definition: vp8dsp.c:255
sws_alloc_context
struct SwsContext * sws_alloc_context(void)
Allocate an empty SwsContext.
Definition: utils.c:1093
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_getSwsFunc
SwsFunc ff_getSwsFunc(SwsContext *c)
Return function pointer to fastest main scaler path function depending on architecture and available ...
Definition: swscale.c:584
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
report
#define report
Definition: checkasm.h:136
i
int i
Definition: input.c:407
bench_new
#define bench_new(...)
Definition: checkasm.h:271
common.h
LARGEST_INPUT_SIZE
#define LARGEST_INPUT_SIZE
swscale_internal.h
uint8_t
uint8_t
Definition: audio_convert.c:194
INPUT_SIZES
#define INPUT_SIZES
randomize_buffers
#define randomize_buffers(buf, size)
Definition: sw_scale.c:32
sws_init_context
av_warn_unused_result int sws_init_context(struct SwsContext *sws_context, SwsFilter *srcFilter, SwsFilter *dstFilter)
Initialize the swscaler context sws_context.
Definition: utils.c:1179
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:31
sws_freeContext
void sws_freeContext(struct SwsContext *swsContext)
Free the swscaler context swsContext.
Definition: utils.c:2337
ref_function
static void ref_function(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
Definition: sw_scale.c:41
av_clip_uint8
#define av_clip_uint8
Definition: common.h:128
mem.h
MAX_FILTER_WIDTH
#define MAX_FILTER_WIDTH
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:48
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
coeff
static const double coeff[2][5]
Definition: vf_owdenoise.c:73
checkasm_check_sw_scale
void checkasm_check_sw_scale(void)
Definition: sw_scale.c:230
SwsContext
Definition: swscale_internal.h:283
int
int
Definition: ffmpeg_filter.c:170
swscale.h
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:59