88 #if HAVE_MMXEXT_INLINE
106 #if ARCH_X86_64 && HAVE_MMX && HAVE_YASM
108 int16_t *
block, int16_t *qmat);
110 static void ff_prores_idct_put_10_sse2_wrap(int16_t *dst){
133 #if HAVE_MMXEXT_INLINE
138 #if ARCH_X86_64 && HAVE_YASM
168 #define AANSCALE_BITS 12
171 #define NB_ITS_SPEED 50000
176 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
177 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
178 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
179 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
180 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
181 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
182 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
183 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
193 for (i = 0; i < 64; i++) {
194 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
205 memset(block, 0, 64 *
sizeof(*block));
209 for (i = 0; i < 64; i++)
210 block[i] = (
av_lfg_get(prng) % (2*vals)) -vals;
213 for (i = 0; i < 64; i++)
219 for (i = 0; i < j; i++) {
221 block[idx] =
av_lfg_get(prng) % (2*vals) -vals;
225 block[ 0] =
av_lfg_get(prng) % (16*vals) - (8*vals);
226 block[63] = (block[0] & 1) ^ 1;
236 for (i = 0; i < 64; i++)
237 dst[idct_mmx_perm[i]] = src[i];
239 for (i = 0; i < 64; i++)
240 dst[idct_simple_mmx_perm[i]] = src[i];
242 for (i = 0; i < 64; i++)
243 dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
245 for (i = 0; i < 64; i++)
246 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
248 for (i = 0; i < 64; i++)
249 dst[(i>>3) | ((i<<3)&0x38)] = src[i];
251 for (i = 0; i < 64; i++)
261 int64_t err2, ti, ti1, it1, err_sum = 0;
262 int64_t sysErr[64], sysErrMax = 0;
264 int blockSumErrMax = 0, blockSumErr;
266 const int vals=1<<
bits;
274 for (i = 0; i < 64; i++)
276 for (it = 0; it <
NB_ITS; it++) {
284 for (i = 0; i < 64; i++) {
293 for (i = 0; i < 64; i++) {
300 sysErr[i] +=
block[i] - block1[i];
302 if (abs(
block[i]) > maxout)
303 maxout = abs(
block[i]);
305 if (blockSumErrMax < blockSumErr)
306 blockSumErrMax = blockSumErr;
308 for (i = 0; i < 64; i++)
309 sysErrMax =
FFMAX(sysErrMax,
FFABS(sysErr[i]));
311 for (i = 0; i < 64; i++) {
314 printf(
"%7d ", (
int) sysErr[i]);
318 omse = (double) err2 / NB_ITS / 64;
319 ome = (double) err_sum / NB_ITS / 64;
321 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
323 printf(
"%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
324 is_idct ?
"IDCT" :
"DCT", dct->
name, err_inf,
325 omse, ome, (
double) sysErrMax / NB_ITS,
326 maxout, blockSumErrMax);
349 }
while (ti1 < 1000000);
351 printf(
"%s %s: %0.1f kdct/s\n", is_idct ?
"IDCT" :
"DCT", dct->
name,
352 (
double) it1 * 1000.0 / (
double) ti1);
363 static double c8[8][8];
364 static double c4[4][4];
365 double block1[64], block2[64], block3[64];
372 for (i = 0; i < 8; i++) {
374 for (j = 0; j < 8; j++) {
375 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
376 c8[i][j] = s * cos(
M_PI * i * (j + 0.5) / 8.0);
377 sum += c8[i][j] * c8[i][j];
381 for (i = 0; i < 4; i++) {
383 for (j = 0; j < 4; j++) {
384 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
385 c4[i][j] = s * cos(
M_PI * i * (j + 0.5) / 4.0);
386 sum += c4[i][j] * c4[i][j];
393 for (i = 0; i < 4; i++) {
394 for (j = 0; j < 8; j++) {
395 block1[8 * (2 * i) + j] =
396 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) *
s;
397 block1[8 * (2 * i + 1) + j] =
398 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) *
s;
403 for (i = 0; i < 8; i++) {
404 for (j = 0; j < 8; j++) {
406 for (k = 0; k < 8; k++)
407 sum += c8[k][j] * block1[8 * i + k];
408 block2[8 * i + j] = sum;
413 for (i = 0; i < 8; i++) {
414 for (j = 0; j < 4; j++) {
417 for (k = 0; k < 4; k++)
418 sum += c4[k][j] * block2[8 * (2 * k) + i];
419 block3[8 * (2 * j) + i] = sum;
423 for (k = 0; k < 4; k++)
424 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
425 block3[8 * (2 * j + 1) + i] = sum;
430 for (i = 0; i < 8; i++) {
431 for (j = 0; j < 8; j++) {
432 v = block3[8 * i + j];
434 else if (v > 255) v = 255;
435 dest[i * linesize + j] = (int)
rint(v);
441 void (*idct248_put)(
uint8_t *dest,
int line_size,
445 int it, i, it1, ti, ti1, err_max,
v;
453 for (it = 0; it <
NB_ITS; it++) {
455 for (i = 0; i < 64; i++)
459 for (i = 0; i < 64; i++)
463 for (i = 0; i < 64; i++)
467 for (i = 0; i < 64; i++) {
494 printf(
"%s %s: err_inf=%d\n", 1 ?
"IDCT248" :
"DCT248", name, err_max);
503 for (i = 0; i < 64; i++)
510 }
while (ti1 < 1000000);
512 printf(
"%s %s: %0.1f kdct/s\n", 1 ?
"IDCT248" :
"DCT248", name,
513 (
double) it1 * 1000.0 / (
double) ti1);
518 printf(
"dct-test [-i] [<test-number>] [<bits>]\n"
519 "test-number 0 -> test with random matrixes\n"
520 " 1 -> test with random sparse matrixes\n"
521 " 2 -> do 3. test from mpeg4 std\n"
522 "bits Number of time domain bits to use, 8 is default\n"
523 "-i test IDCT implementations\n"
524 "-4 test IDCT248 implementations\n"
532 int main(
int argc,
char **argv)
534 int test_idct = 0, test_248_dct = 0;
547 c =
getopt(argc, argv,
"ih4t");
568 test = atoi(argv[
optind]);
569 if(optind+1 < argc) bits= atoi(argv[optind+1]);
571 printf(
"ffmpeg DCT/IDCT test\n");
576 const struct algo *algos = test_idct ? idct_tab :
fdct_tab;
577 for (i = 0; algos[i].
name; i++)
579 err |=
dct_error(&algos[i], test, test_idct, speed, bits);
584 printf(
"Error: %d.\n", err);