00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #include "libavutil/imgutils.h"
00031 #include "avcodec.h"
00032 #include "dsputil.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "faanidct.h"
00036 #include "mathops.h"
00037 #include "mpegvideo.h"
00038 #include "config.h"
00039 #include "ac3dec.h"
00040 #include "vorbis.h"
00041 #include "diracdsp.h"
00042
00043 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00044 uint32_t ff_squareTbl[512] = {0, };
00045
00046 #define BIT_DEPTH 9
00047 #include "dsputil_template.c"
00048 #undef BIT_DEPTH
00049
00050 #define BIT_DEPTH 10
00051 #include "dsputil_template.c"
00052 #undef BIT_DEPTH
00053
00054 #define BIT_DEPTH 8
00055 #include "dsputil_template.c"
00056
00057
00058 #define pb_7f (~0UL/255 * 0x7f)
00059 #define pb_80 (~0UL/255 * 0x80)
00060
00061 const uint8_t ff_zigzag_direct[64] = {
00062 0, 1, 8, 16, 9, 2, 3, 10,
00063 17, 24, 32, 25, 18, 11, 4, 5,
00064 12, 19, 26, 33, 40, 48, 41, 34,
00065 27, 20, 13, 6, 7, 14, 21, 28,
00066 35, 42, 49, 56, 57, 50, 43, 36,
00067 29, 22, 15, 23, 30, 37, 44, 51,
00068 58, 59, 52, 45, 38, 31, 39, 46,
00069 53, 60, 61, 54, 47, 55, 62, 63
00070 };
00071
00072
00073
00074 const uint8_t ff_zigzag248_direct[64] = {
00075 0, 8, 1, 9, 16, 24, 2, 10,
00076 17, 25, 32, 40, 48, 56, 33, 41,
00077 18, 26, 3, 11, 4, 12, 19, 27,
00078 34, 42, 49, 57, 50, 58, 35, 43,
00079 20, 28, 5, 13, 6, 14, 21, 29,
00080 36, 44, 51, 59, 52, 60, 37, 45,
00081 22, 30, 7, 15, 23, 31, 38, 46,
00082 53, 61, 54, 62, 39, 47, 55, 63,
00083 };
00084
00085
00086 DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
00087
00088 const uint8_t ff_alternate_horizontal_scan[64] = {
00089 0, 1, 2, 3, 8, 9, 16, 17,
00090 10, 11, 4, 5, 6, 7, 15, 14,
00091 13, 12, 19, 18, 24, 25, 32, 33,
00092 26, 27, 20, 21, 22, 23, 28, 29,
00093 30, 31, 34, 35, 40, 41, 48, 49,
00094 42, 43, 36, 37, 38, 39, 44, 45,
00095 46, 47, 50, 51, 56, 57, 58, 59,
00096 52, 53, 54, 55, 60, 61, 62, 63,
00097 };
00098
00099 const uint8_t ff_alternate_vertical_scan[64] = {
00100 0, 8, 16, 24, 1, 9, 2, 10,
00101 17, 25, 32, 40, 48, 56, 57, 49,
00102 41, 33, 26, 18, 3, 11, 4, 12,
00103 19, 27, 34, 42, 50, 58, 35, 43,
00104 51, 59, 20, 28, 5, 13, 6, 14,
00105 21, 29, 36, 44, 52, 60, 37, 45,
00106 53, 61, 22, 30, 7, 15, 23, 31,
00107 38, 46, 54, 62, 39, 47, 55, 63,
00108 };
00109
00110
00111 static const uint8_t simple_mmx_permutation[64]={
00112 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00113 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00114 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00115 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00116 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00117 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00118 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00119 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00120 };
00121
00122 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00123
00124 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00125 int i;
00126 int end;
00127
00128 st->scantable= src_scantable;
00129
00130 for(i=0; i<64; i++){
00131 int j;
00132 j = src_scantable[i];
00133 st->permutated[i] = permutation[j];
00134 }
00135
00136 end=-1;
00137 for(i=0; i<64; i++){
00138 int j;
00139 j = st->permutated[i];
00140 if(j>end) end=j;
00141 st->raster_end[i]= end;
00142 }
00143 }
00144
00145 void ff_init_scantable_permutation(uint8_t *idct_permutation,
00146 int idct_permutation_type)
00147 {
00148 int i;
00149
00150 switch(idct_permutation_type){
00151 case FF_NO_IDCT_PERM:
00152 for(i=0; i<64; i++)
00153 idct_permutation[i]= i;
00154 break;
00155 case FF_LIBMPEG2_IDCT_PERM:
00156 for(i=0; i<64; i++)
00157 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00158 break;
00159 case FF_SIMPLE_IDCT_PERM:
00160 for(i=0; i<64; i++)
00161 idct_permutation[i]= simple_mmx_permutation[i];
00162 break;
00163 case FF_TRANSPOSE_IDCT_PERM:
00164 for(i=0; i<64; i++)
00165 idct_permutation[i]= ((i&7)<<3) | (i>>3);
00166 break;
00167 case FF_PARTTRANS_IDCT_PERM:
00168 for(i=0; i<64; i++)
00169 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
00170 break;
00171 case FF_SSE2_IDCT_PERM:
00172 for(i=0; i<64; i++)
00173 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
00174 break;
00175 default:
00176 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
00177 }
00178 }
00179
00180 static int pix_sum_c(uint8_t * pix, int line_size)
00181 {
00182 int s, i, j;
00183
00184 s = 0;
00185 for (i = 0; i < 16; i++) {
00186 for (j = 0; j < 16; j += 8) {
00187 s += pix[0];
00188 s += pix[1];
00189 s += pix[2];
00190 s += pix[3];
00191 s += pix[4];
00192 s += pix[5];
00193 s += pix[6];
00194 s += pix[7];
00195 pix += 8;
00196 }
00197 pix += line_size - 16;
00198 }
00199 return s;
00200 }
00201
00202 static int pix_norm1_c(uint8_t * pix, int line_size)
00203 {
00204 int s, i, j;
00205 uint32_t *sq = ff_squareTbl + 256;
00206
00207 s = 0;
00208 for (i = 0; i < 16; i++) {
00209 for (j = 0; j < 16; j += 8) {
00210 #if 0
00211 s += sq[pix[0]];
00212 s += sq[pix[1]];
00213 s += sq[pix[2]];
00214 s += sq[pix[3]];
00215 s += sq[pix[4]];
00216 s += sq[pix[5]];
00217 s += sq[pix[6]];
00218 s += sq[pix[7]];
00219 #else
00220 #if HAVE_FAST_64BIT
00221 register uint64_t x=*(uint64_t*)pix;
00222 s += sq[x&0xff];
00223 s += sq[(x>>8)&0xff];
00224 s += sq[(x>>16)&0xff];
00225 s += sq[(x>>24)&0xff];
00226 s += sq[(x>>32)&0xff];
00227 s += sq[(x>>40)&0xff];
00228 s += sq[(x>>48)&0xff];
00229 s += sq[(x>>56)&0xff];
00230 #else
00231 register uint32_t x=*(uint32_t*)pix;
00232 s += sq[x&0xff];
00233 s += sq[(x>>8)&0xff];
00234 s += sq[(x>>16)&0xff];
00235 s += sq[(x>>24)&0xff];
00236 x=*(uint32_t*)(pix+4);
00237 s += sq[x&0xff];
00238 s += sq[(x>>8)&0xff];
00239 s += sq[(x>>16)&0xff];
00240 s += sq[(x>>24)&0xff];
00241 #endif
00242 #endif
00243 pix += 8;
00244 }
00245 pix += line_size - 16;
00246 }
00247 return s;
00248 }
00249
00250 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00251 int i;
00252
00253 for(i=0; i+8<=w; i+=8){
00254 dst[i+0]= av_bswap32(src[i+0]);
00255 dst[i+1]= av_bswap32(src[i+1]);
00256 dst[i+2]= av_bswap32(src[i+2]);
00257 dst[i+3]= av_bswap32(src[i+3]);
00258 dst[i+4]= av_bswap32(src[i+4]);
00259 dst[i+5]= av_bswap32(src[i+5]);
00260 dst[i+6]= av_bswap32(src[i+6]);
00261 dst[i+7]= av_bswap32(src[i+7]);
00262 }
00263 for(;i<w; i++){
00264 dst[i+0]= av_bswap32(src[i+0]);
00265 }
00266 }
00267
00268 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
00269 {
00270 while (len--)
00271 *dst++ = av_bswap16(*src++);
00272 }
00273
00274 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00275 {
00276 int s, i;
00277 uint32_t *sq = ff_squareTbl + 256;
00278
00279 s = 0;
00280 for (i = 0; i < h; i++) {
00281 s += sq[pix1[0] - pix2[0]];
00282 s += sq[pix1[1] - pix2[1]];
00283 s += sq[pix1[2] - pix2[2]];
00284 s += sq[pix1[3] - pix2[3]];
00285 pix1 += line_size;
00286 pix2 += line_size;
00287 }
00288 return s;
00289 }
00290
00291 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00292 {
00293 int s, i;
00294 uint32_t *sq = ff_squareTbl + 256;
00295
00296 s = 0;
00297 for (i = 0; i < h; i++) {
00298 s += sq[pix1[0] - pix2[0]];
00299 s += sq[pix1[1] - pix2[1]];
00300 s += sq[pix1[2] - pix2[2]];
00301 s += sq[pix1[3] - pix2[3]];
00302 s += sq[pix1[4] - pix2[4]];
00303 s += sq[pix1[5] - pix2[5]];
00304 s += sq[pix1[6] - pix2[6]];
00305 s += sq[pix1[7] - pix2[7]];
00306 pix1 += line_size;
00307 pix2 += line_size;
00308 }
00309 return s;
00310 }
00311
00312 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00313 {
00314 int s, i;
00315 uint32_t *sq = ff_squareTbl + 256;
00316
00317 s = 0;
00318 for (i = 0; i < h; i++) {
00319 s += sq[pix1[ 0] - pix2[ 0]];
00320 s += sq[pix1[ 1] - pix2[ 1]];
00321 s += sq[pix1[ 2] - pix2[ 2]];
00322 s += sq[pix1[ 3] - pix2[ 3]];
00323 s += sq[pix1[ 4] - pix2[ 4]];
00324 s += sq[pix1[ 5] - pix2[ 5]];
00325 s += sq[pix1[ 6] - pix2[ 6]];
00326 s += sq[pix1[ 7] - pix2[ 7]];
00327 s += sq[pix1[ 8] - pix2[ 8]];
00328 s += sq[pix1[ 9] - pix2[ 9]];
00329 s += sq[pix1[10] - pix2[10]];
00330 s += sq[pix1[11] - pix2[11]];
00331 s += sq[pix1[12] - pix2[12]];
00332 s += sq[pix1[13] - pix2[13]];
00333 s += sq[pix1[14] - pix2[14]];
00334 s += sq[pix1[15] - pix2[15]];
00335
00336 pix1 += line_size;
00337 pix2 += line_size;
00338 }
00339 return s;
00340 }
00341
00342 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00343 const uint8_t *s2, int stride){
00344 int i;
00345
00346
00347 for(i=0;i<8;i++) {
00348 block[0] = s1[0] - s2[0];
00349 block[1] = s1[1] - s2[1];
00350 block[2] = s1[2] - s2[2];
00351 block[3] = s1[3] - s2[3];
00352 block[4] = s1[4] - s2[4];
00353 block[5] = s1[5] - s2[5];
00354 block[6] = s1[6] - s2[6];
00355 block[7] = s1[7] - s2[7];
00356 s1 += stride;
00357 s2 += stride;
00358 block += 8;
00359 }
00360 }
00361
00362
00363 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00364 int line_size)
00365 {
00366 int i;
00367
00368
00369 for(i=0;i<8;i++) {
00370 pixels[0] = av_clip_uint8(block[0]);
00371 pixels[1] = av_clip_uint8(block[1]);
00372 pixels[2] = av_clip_uint8(block[2]);
00373 pixels[3] = av_clip_uint8(block[3]);
00374 pixels[4] = av_clip_uint8(block[4]);
00375 pixels[5] = av_clip_uint8(block[5]);
00376 pixels[6] = av_clip_uint8(block[6]);
00377 pixels[7] = av_clip_uint8(block[7]);
00378
00379 pixels += line_size;
00380 block += 8;
00381 }
00382 }
00383
00384 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00385 int line_size)
00386 {
00387 int i;
00388
00389
00390 for(i=0;i<4;i++) {
00391 pixels[0] = av_clip_uint8(block[0]);
00392 pixels[1] = av_clip_uint8(block[1]);
00393 pixels[2] = av_clip_uint8(block[2]);
00394 pixels[3] = av_clip_uint8(block[3]);
00395
00396 pixels += line_size;
00397 block += 8;
00398 }
00399 }
00400
00401 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00402 int line_size)
00403 {
00404 int i;
00405
00406
00407 for(i=0;i<2;i++) {
00408 pixels[0] = av_clip_uint8(block[0]);
00409 pixels[1] = av_clip_uint8(block[1]);
00410
00411 pixels += line_size;
00412 block += 8;
00413 }
00414 }
00415
00416 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
00417 uint8_t *restrict pixels,
00418 int line_size)
00419 {
00420 int i, j;
00421
00422 for (i = 0; i < 8; i++) {
00423 for (j = 0; j < 8; j++) {
00424 if (*block < -128)
00425 *pixels = 0;
00426 else if (*block > 127)
00427 *pixels = 255;
00428 else
00429 *pixels = (uint8_t)(*block + 128);
00430 block++;
00431 pixels++;
00432 }
00433 pixels += (line_size - 8);
00434 }
00435 }
00436
00437 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00438 int line_size)
00439 {
00440 int i;
00441
00442
00443 for(i=0;i<8;i++) {
00444 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00445 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00446 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
00447 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
00448 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
00449 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
00450 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
00451 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
00452 pixels += line_size;
00453 block += 8;
00454 }
00455 }
00456
00457 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00458 int line_size)
00459 {
00460 int i;
00461
00462
00463 for(i=0;i<4;i++) {
00464 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00465 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00466 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
00467 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
00468 pixels += line_size;
00469 block += 8;
00470 }
00471 }
00472
00473 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00474 int line_size)
00475 {
00476 int i;
00477
00478
00479 for(i=0;i<2;i++) {
00480 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00481 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00482 pixels += line_size;
00483 block += 8;
00484 }
00485 }
00486
00487 static int sum_abs_dctelem_c(DCTELEM *block)
00488 {
00489 int sum=0, i;
00490 for(i=0; i<64; i++)
00491 sum+= FFABS(block[i]);
00492 return sum;
00493 }
00494
00495 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
00496 {
00497 int i;
00498
00499 for (i = 0; i < h; i++) {
00500 memset(block, value, 16);
00501 block += line_size;
00502 }
00503 }
00504
00505 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
00506 {
00507 int i;
00508
00509 for (i = 0; i < h; i++) {
00510 memset(block, value, 8);
00511 block += line_size;
00512 }
00513 }
00514
00515 #define avg2(a,b) ((a+b+1)>>1)
00516 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
00517
00518 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
00519 {
00520 const int A=(16-x16)*(16-y16);
00521 const int B=( x16)*(16-y16);
00522 const int C=(16-x16)*( y16);
00523 const int D=( x16)*( y16);
00524 int i;
00525
00526 for(i=0; i<h; i++)
00527 {
00528 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
00529 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
00530 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
00531 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
00532 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
00533 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
00534 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
00535 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
00536 dst+= stride;
00537 src+= stride;
00538 }
00539 }
00540
00541 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
00542 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
00543 {
00544 int y, vx, vy;
00545 const int s= 1<<shift;
00546
00547 width--;
00548 height--;
00549
00550 for(y=0; y<h; y++){
00551 int x;
00552
00553 vx= ox;
00554 vy= oy;
00555 for(x=0; x<8; x++){
00556 int src_x, src_y, frac_x, frac_y, index;
00557
00558 src_x= vx>>16;
00559 src_y= vy>>16;
00560 frac_x= src_x&(s-1);
00561 frac_y= src_y&(s-1);
00562 src_x>>=shift;
00563 src_y>>=shift;
00564
00565 if((unsigned)src_x < width){
00566 if((unsigned)src_y < height){
00567 index= src_x + src_y*stride;
00568 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00569 + src[index +1]* frac_x )*(s-frac_y)
00570 + ( src[index+stride ]*(s-frac_x)
00571 + src[index+stride+1]* frac_x )* frac_y
00572 + r)>>(shift*2);
00573 }else{
00574 index= src_x + av_clip(src_y, 0, height)*stride;
00575 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00576 + src[index +1]* frac_x )*s
00577 + r)>>(shift*2);
00578 }
00579 }else{
00580 if((unsigned)src_y < height){
00581 index= av_clip(src_x, 0, width) + src_y*stride;
00582 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
00583 + src[index+stride ]* frac_y )*s
00584 + r)>>(shift*2);
00585 }else{
00586 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
00587 dst[y*stride + x]= src[index ];
00588 }
00589 }
00590
00591 vx+= dxx;
00592 vy+= dyx;
00593 }
00594 ox += dxy;
00595 oy += dyy;
00596 }
00597 }
00598
00599 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00600 switch(width){
00601 case 2: put_pixels2_8_c (dst, src, stride, height); break;
00602 case 4: put_pixels4_8_c (dst, src, stride, height); break;
00603 case 8: put_pixels8_8_c (dst, src, stride, height); break;
00604 case 16:put_pixels16_8_c(dst, src, stride, height); break;
00605 }
00606 }
00607
00608 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00609 int i,j;
00610 for (i=0; i < height; i++) {
00611 for (j=0; j < width; j++) {
00612 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
00613 }
00614 src += stride;
00615 dst += stride;
00616 }
00617 }
00618
00619 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00620 int i,j;
00621 for (i=0; i < height; i++) {
00622 for (j=0; j < width; j++) {
00623 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
00624 }
00625 src += stride;
00626 dst += stride;
00627 }
00628 }
00629
00630 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00631 int i,j;
00632 for (i=0; i < height; i++) {
00633 for (j=0; j < width; j++) {
00634 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
00635 }
00636 src += stride;
00637 dst += stride;
00638 }
00639 }
00640
00641 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00642 int i,j;
00643 for (i=0; i < height; i++) {
00644 for (j=0; j < width; j++) {
00645 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
00646 }
00647 src += stride;
00648 dst += stride;
00649 }
00650 }
00651
00652 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00653 int i,j;
00654 for (i=0; i < height; i++) {
00655 for (j=0; j < width; j++) {
00656 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00657 }
00658 src += stride;
00659 dst += stride;
00660 }
00661 }
00662
00663 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00664 int i,j;
00665 for (i=0; i < height; i++) {
00666 for (j=0; j < width; j++) {
00667 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
00668 }
00669 src += stride;
00670 dst += stride;
00671 }
00672 }
00673
00674 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00675 int i,j;
00676 for (i=0; i < height; i++) {
00677 for (j=0; j < width; j++) {
00678 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00679 }
00680 src += stride;
00681 dst += stride;
00682 }
00683 }
00684
00685 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00686 int i,j;
00687 for (i=0; i < height; i++) {
00688 for (j=0; j < width; j++) {
00689 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
00690 }
00691 src += stride;
00692 dst += stride;
00693 }
00694 }
00695
00696 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00697 switch(width){
00698 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
00699 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
00700 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
00701 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
00702 }
00703 }
00704
00705 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00706 int i,j;
00707 for (i=0; i < height; i++) {
00708 for (j=0; j < width; j++) {
00709 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
00710 }
00711 src += stride;
00712 dst += stride;
00713 }
00714 }
00715
00716 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00717 int i,j;
00718 for (i=0; i < height; i++) {
00719 for (j=0; j < width; j++) {
00720 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
00721 }
00722 src += stride;
00723 dst += stride;
00724 }
00725 }
00726
00727 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00728 int i,j;
00729 for (i=0; i < height; i++) {
00730 for (j=0; j < width; j++) {
00731 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
00732 }
00733 src += stride;
00734 dst += stride;
00735 }
00736 }
00737
00738 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00739 int i,j;
00740 for (i=0; i < height; i++) {
00741 for (j=0; j < width; j++) {
00742 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00743 }
00744 src += stride;
00745 dst += stride;
00746 }
00747 }
00748
00749 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00750 int i,j;
00751 for (i=0; i < height; i++) {
00752 for (j=0; j < width; j++) {
00753 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00754 }
00755 src += stride;
00756 dst += stride;
00757 }
00758 }
00759
00760 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00761 int i,j;
00762 for (i=0; i < height; i++) {
00763 for (j=0; j < width; j++) {
00764 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
00765 }
00766 src += stride;
00767 dst += stride;
00768 }
00769 }
00770
00771 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00772 int i,j;
00773 for (i=0; i < height; i++) {
00774 for (j=0; j < width; j++) {
00775 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00776 }
00777 src += stride;
00778 dst += stride;
00779 }
00780 }
00781
00782 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00783 int i,j;
00784 for (i=0; i < height; i++) {
00785 for (j=0; j < width; j++) {
00786 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00787 }
00788 src += stride;
00789 dst += stride;
00790 }
00791 }
00792
00793 #define QPEL_MC(r, OPNAME, RND, OP) \
00794 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00795 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00796 int i;\
00797 for(i=0; i<h; i++)\
00798 {\
00799 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
00800 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
00801 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
00802 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
00803 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
00804 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
00805 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
00806 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
00807 dst+=dstStride;\
00808 src+=srcStride;\
00809 }\
00810 }\
00811 \
00812 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00813 const int w=8;\
00814 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00815 int i;\
00816 for(i=0; i<w; i++)\
00817 {\
00818 const int src0= src[0*srcStride];\
00819 const int src1= src[1*srcStride];\
00820 const int src2= src[2*srcStride];\
00821 const int src3= src[3*srcStride];\
00822 const int src4= src[4*srcStride];\
00823 const int src5= src[5*srcStride];\
00824 const int src6= src[6*srcStride];\
00825 const int src7= src[7*srcStride];\
00826 const int src8= src[8*srcStride];\
00827 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
00828 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
00829 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
00830 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
00831 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
00832 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
00833 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
00834 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00835 dst++;\
00836 src++;\
00837 }\
00838 }\
00839 \
00840 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00841 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00842 int i;\
00843 \
00844 for(i=0; i<h; i++)\
00845 {\
00846 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
00847 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
00848 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
00849 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
00850 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
00851 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
00852 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
00853 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
00854 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
00855 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
00856 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
00857 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
00858 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
00859 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
00860 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
00861 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
00862 dst+=dstStride;\
00863 src+=srcStride;\
00864 }\
00865 }\
00866 \
00867 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00868 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00869 int i;\
00870 const int w=16;\
00871 for(i=0; i<w; i++)\
00872 {\
00873 const int src0= src[0*srcStride];\
00874 const int src1= src[1*srcStride];\
00875 const int src2= src[2*srcStride];\
00876 const int src3= src[3*srcStride];\
00877 const int src4= src[4*srcStride];\
00878 const int src5= src[5*srcStride];\
00879 const int src6= src[6*srcStride];\
00880 const int src7= src[7*srcStride];\
00881 const int src8= src[8*srcStride];\
00882 const int src9= src[9*srcStride];\
00883 const int src10= src[10*srcStride];\
00884 const int src11= src[11*srcStride];\
00885 const int src12= src[12*srcStride];\
00886 const int src13= src[13*srcStride];\
00887 const int src14= src[14*srcStride];\
00888 const int src15= src[15*srcStride];\
00889 const int src16= src[16*srcStride];\
00890 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
00891 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
00892 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
00893 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
00894 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
00895 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
00896 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
00897 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
00898 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
00899 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
00900 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
00901 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
00902 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
00903 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
00904 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
00905 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00906 dst++;\
00907 src++;\
00908 }\
00909 }\
00910 \
00911 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
00912 uint8_t half[64];\
00913 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00914 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
00915 }\
00916 \
00917 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
00918 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
00919 }\
00920 \
00921 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
00922 uint8_t half[64];\
00923 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00924 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
00925 }\
00926 \
00927 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
00928 uint8_t full[16*9];\
00929 uint8_t half[64];\
00930 copy_block9(full, src, 16, stride, 9);\
00931 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00932 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
00933 }\
00934 \
00935 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
00936 uint8_t full[16*9];\
00937 copy_block9(full, src, 16, stride, 9);\
00938 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
00939 }\
00940 \
00941 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
00942 uint8_t full[16*9];\
00943 uint8_t half[64];\
00944 copy_block9(full, src, 16, stride, 9);\
00945 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00946 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
00947 }\
00948 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
00949 uint8_t full[16*9];\
00950 uint8_t halfH[72];\
00951 uint8_t halfV[64];\
00952 uint8_t halfHV[64];\
00953 copy_block9(full, src, 16, stride, 9);\
00954 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00955 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00956 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00957 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00958 }\
00959 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
00960 uint8_t full[16*9];\
00961 uint8_t halfH[72];\
00962 uint8_t halfHV[64];\
00963 copy_block9(full, src, 16, stride, 9);\
00964 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00965 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
00966 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00967 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00968 }\
00969 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
00970 uint8_t full[16*9];\
00971 uint8_t halfH[72];\
00972 uint8_t halfV[64];\
00973 uint8_t halfHV[64];\
00974 copy_block9(full, src, 16, stride, 9);\
00975 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00976 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
00977 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00978 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00979 }\
00980 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
00981 uint8_t full[16*9];\
00982 uint8_t halfH[72];\
00983 uint8_t halfHV[64];\
00984 copy_block9(full, src, 16, stride, 9);\
00985 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00986 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
00987 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00988 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00989 }\
00990 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
00991 uint8_t full[16*9];\
00992 uint8_t halfH[72];\
00993 uint8_t halfV[64];\
00994 uint8_t halfHV[64];\
00995 copy_block9(full, src, 16, stride, 9);\
00996 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00997 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00998 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00999 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01000 }\
01001 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01002 uint8_t full[16*9];\
01003 uint8_t halfH[72];\
01004 uint8_t halfHV[64];\
01005 copy_block9(full, src, 16, stride, 9);\
01006 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01007 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01008 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01009 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01010 }\
01011 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01012 uint8_t full[16*9];\
01013 uint8_t halfH[72];\
01014 uint8_t halfV[64];\
01015 uint8_t halfHV[64];\
01016 copy_block9(full, src, 16, stride, 9);\
01017 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
01018 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01019 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01020 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01021 }\
01022 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01023 uint8_t full[16*9];\
01024 uint8_t halfH[72];\
01025 uint8_t halfHV[64];\
01026 copy_block9(full, src, 16, stride, 9);\
01027 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01028 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01029 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01030 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01031 }\
01032 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01033 uint8_t halfH[72];\
01034 uint8_t halfHV[64];\
01035 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01036 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01037 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01038 }\
01039 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01040 uint8_t halfH[72];\
01041 uint8_t halfHV[64];\
01042 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01043 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01044 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01045 }\
01046 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01047 uint8_t full[16*9];\
01048 uint8_t halfH[72];\
01049 uint8_t halfV[64];\
01050 uint8_t halfHV[64];\
01051 copy_block9(full, src, 16, stride, 9);\
01052 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01053 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01054 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01055 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01056 }\
01057 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01058 uint8_t full[16*9];\
01059 uint8_t halfH[72];\
01060 copy_block9(full, src, 16, stride, 9);\
01061 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01062 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01063 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01064 }\
01065 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01066 uint8_t full[16*9];\
01067 uint8_t halfH[72];\
01068 uint8_t halfV[64];\
01069 uint8_t halfHV[64];\
01070 copy_block9(full, src, 16, stride, 9);\
01071 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01072 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01073 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01074 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01075 }\
01076 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01077 uint8_t full[16*9];\
01078 uint8_t halfH[72];\
01079 copy_block9(full, src, 16, stride, 9);\
01080 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01081 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01082 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01083 }\
01084 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01085 uint8_t halfH[72];\
01086 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01087 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01088 }\
01089 \
01090 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01091 uint8_t half[256];\
01092 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01093 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
01094 }\
01095 \
01096 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01097 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01098 }\
01099 \
01100 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01101 uint8_t half[256];\
01102 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01103 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
01104 }\
01105 \
01106 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01107 uint8_t full[24*17];\
01108 uint8_t half[256];\
01109 copy_block17(full, src, 24, stride, 17);\
01110 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01111 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
01112 }\
01113 \
01114 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01115 uint8_t full[24*17];\
01116 copy_block17(full, src, 24, stride, 17);\
01117 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01118 }\
01119 \
01120 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01121 uint8_t full[24*17];\
01122 uint8_t half[256];\
01123 copy_block17(full, src, 24, stride, 17);\
01124 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01125 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
01126 }\
01127 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01128 uint8_t full[24*17];\
01129 uint8_t halfH[272];\
01130 uint8_t halfV[256];\
01131 uint8_t halfHV[256];\
01132 copy_block17(full, src, 24, stride, 17);\
01133 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01134 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01135 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01136 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01137 }\
01138 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01139 uint8_t full[24*17];\
01140 uint8_t halfH[272];\
01141 uint8_t halfHV[256];\
01142 copy_block17(full, src, 24, stride, 17);\
01143 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01144 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01145 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01146 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01147 }\
01148 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01149 uint8_t full[24*17];\
01150 uint8_t halfH[272];\
01151 uint8_t halfV[256];\
01152 uint8_t halfHV[256];\
01153 copy_block17(full, src, 24, stride, 17);\
01154 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01155 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01156 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01157 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01158 }\
01159 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01160 uint8_t full[24*17];\
01161 uint8_t halfH[272];\
01162 uint8_t halfHV[256];\
01163 copy_block17(full, src, 24, stride, 17);\
01164 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01165 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01166 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01167 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01168 }\
01169 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01170 uint8_t full[24*17];\
01171 uint8_t halfH[272];\
01172 uint8_t halfV[256];\
01173 uint8_t halfHV[256];\
01174 copy_block17(full, src, 24, stride, 17);\
01175 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01176 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01177 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01178 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01179 }\
01180 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01181 uint8_t full[24*17];\
01182 uint8_t halfH[272];\
01183 uint8_t halfHV[256];\
01184 copy_block17(full, src, 24, stride, 17);\
01185 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01186 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01187 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01188 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01189 }\
01190 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01191 uint8_t full[24*17];\
01192 uint8_t halfH[272];\
01193 uint8_t halfV[256];\
01194 uint8_t halfHV[256];\
01195 copy_block17(full, src, 24, stride, 17);\
01196 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
01197 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01198 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01199 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01200 }\
01201 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01202 uint8_t full[24*17];\
01203 uint8_t halfH[272];\
01204 uint8_t halfHV[256];\
01205 copy_block17(full, src, 24, stride, 17);\
01206 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01207 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01208 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01209 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01210 }\
01211 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01212 uint8_t halfH[272];\
01213 uint8_t halfHV[256];\
01214 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01215 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01216 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01217 }\
01218 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01219 uint8_t halfH[272];\
01220 uint8_t halfHV[256];\
01221 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01222 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01223 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01224 }\
01225 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01226 uint8_t full[24*17];\
01227 uint8_t halfH[272];\
01228 uint8_t halfV[256];\
01229 uint8_t halfHV[256];\
01230 copy_block17(full, src, 24, stride, 17);\
01231 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01232 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01233 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01234 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01235 }\
01236 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01237 uint8_t full[24*17];\
01238 uint8_t halfH[272];\
01239 copy_block17(full, src, 24, stride, 17);\
01240 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01241 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01242 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01243 }\
01244 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01245 uint8_t full[24*17];\
01246 uint8_t halfH[272];\
01247 uint8_t halfV[256];\
01248 uint8_t halfHV[256];\
01249 copy_block17(full, src, 24, stride, 17);\
01250 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01251 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01252 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01253 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01254 }\
01255 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01256 uint8_t full[24*17];\
01257 uint8_t halfH[272];\
01258 copy_block17(full, src, 24, stride, 17);\
01259 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01260 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01261 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01262 }\
01263 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01264 uint8_t halfH[272];\
01265 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01266 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01267 }
01268
01269 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01270 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
01271 #define op_put(a, b) a = cm[((b) + 16)>>5]
01272 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
01273
01274 QPEL_MC(0, put_ , _ , op_put)
01275 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
01276 QPEL_MC(0, avg_ , _ , op_avg)
01277
01278 #undef op_avg
01279 #undef op_avg_no_rnd
01280 #undef op_put
01281 #undef op_put_no_rnd
01282
01283 #define put_qpel8_mc00_c ff_put_pixels8x8_c
01284 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
01285 #define put_qpel16_mc00_c ff_put_pixels16x16_c
01286 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
01287 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
01288 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
01289
01290 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
01291 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01292 int i;
01293
01294 for(i=0; i<h; i++){
01295 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
01296 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
01297 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
01298 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
01299 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
01300 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
01301 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
01302 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
01303 dst+=dstStride;
01304 src+=srcStride;
01305 }
01306 }
01307
01308 #if CONFIG_RV40_DECODER
01309 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01310 put_pixels16_xy2_8_c(dst, src, stride, 16);
01311 }
01312 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01313 avg_pixels16_xy2_8_c(dst, src, stride, 16);
01314 }
01315 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01316 put_pixels8_xy2_8_c(dst, src, stride, 8);
01317 }
01318 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01319 avg_pixels8_xy2_8_c(dst, src, stride, 8);
01320 }
01321 #endif
01322
01323 #if CONFIG_DIRAC_DECODER
01324 #define DIRAC_MC(OPNAME)\
01325 void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01326 {\
01327 OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
01328 }\
01329 void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01330 {\
01331 OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
01332 }\
01333 void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01334 {\
01335 OPNAME ## _pixels16_8_c(dst , src[0] , stride, h);\
01336 OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
01337 }\
01338 void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01339 {\
01340 OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
01341 }\
01342 void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01343 {\
01344 OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
01345 }\
01346 void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01347 {\
01348 OPNAME ## _pixels16_l2_8(dst , src[0] , src[1] , stride, stride, stride, h);\
01349 OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
01350 }\
01351 void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01352 {\
01353 OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
01354 }\
01355 void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01356 {\
01357 OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
01358 }\
01359 void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01360 {\
01361 OPNAME ## _pixels16_l4_8(dst , src[0] , src[1] , src[2] , src[3] , stride, stride, stride, stride, stride, h);\
01362 OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
01363 }
01364 DIRAC_MC(put)
01365 DIRAC_MC(avg)
01366 #endif
01367
01368 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
01369 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01370 int i;
01371
01372 for(i=0; i<w; i++){
01373 const int src_1= src[ -srcStride];
01374 const int src0 = src[0 ];
01375 const int src1 = src[ srcStride];
01376 const int src2 = src[2*srcStride];
01377 const int src3 = src[3*srcStride];
01378 const int src4 = src[4*srcStride];
01379 const int src5 = src[5*srcStride];
01380 const int src6 = src[6*srcStride];
01381 const int src7 = src[7*srcStride];
01382 const int src8 = src[8*srcStride];
01383 const int src9 = src[9*srcStride];
01384 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
01385 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
01386 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
01387 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
01388 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
01389 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
01390 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
01391 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
01392 src++;
01393 dst++;
01394 }
01395 }
01396
01397 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
01398 uint8_t half[64];
01399 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01400 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
01401 }
01402
01403 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
01404 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
01405 }
01406
01407 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
01408 uint8_t half[64];
01409 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01410 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
01411 }
01412
01413 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
01414 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
01415 }
01416
01417 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
01418 uint8_t halfH[88];
01419 uint8_t halfV[64];
01420 uint8_t halfHV[64];
01421 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01422 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
01423 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01424 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01425 }
01426 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
01427 uint8_t halfH[88];
01428 uint8_t halfV[64];
01429 uint8_t halfHV[64];
01430 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01431 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
01432 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01433 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01434 }
01435 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
01436 uint8_t halfH[88];
01437 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01438 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
01439 }
01440
01441 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
01442 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01443 int x;
01444 const int strength= ff_h263_loop_filter_strength[qscale];
01445
01446 for(x=0; x<8; x++){
01447 int d1, d2, ad1;
01448 int p0= src[x-2*stride];
01449 int p1= src[x-1*stride];
01450 int p2= src[x+0*stride];
01451 int p3= src[x+1*stride];
01452 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01453
01454 if (d<-2*strength) d1= 0;
01455 else if(d<- strength) d1=-2*strength - d;
01456 else if(d< strength) d1= d;
01457 else if(d< 2*strength) d1= 2*strength - d;
01458 else d1= 0;
01459
01460 p1 += d1;
01461 p2 -= d1;
01462 if(p1&256) p1= ~(p1>>31);
01463 if(p2&256) p2= ~(p2>>31);
01464
01465 src[x-1*stride] = p1;
01466 src[x+0*stride] = p2;
01467
01468 ad1= FFABS(d1)>>1;
01469
01470 d2= av_clip((p0-p3)/4, -ad1, ad1);
01471
01472 src[x-2*stride] = p0 - d2;
01473 src[x+ stride] = p3 + d2;
01474 }
01475 }
01476 }
01477
01478 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
01479 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01480 int y;
01481 const int strength= ff_h263_loop_filter_strength[qscale];
01482
01483 for(y=0; y<8; y++){
01484 int d1, d2, ad1;
01485 int p0= src[y*stride-2];
01486 int p1= src[y*stride-1];
01487 int p2= src[y*stride+0];
01488 int p3= src[y*stride+1];
01489 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01490
01491 if (d<-2*strength) d1= 0;
01492 else if(d<- strength) d1=-2*strength - d;
01493 else if(d< strength) d1= d;
01494 else if(d< 2*strength) d1= 2*strength - d;
01495 else d1= 0;
01496
01497 p1 += d1;
01498 p2 -= d1;
01499 if(p1&256) p1= ~(p1>>31);
01500 if(p2&256) p2= ~(p2>>31);
01501
01502 src[y*stride-1] = p1;
01503 src[y*stride+0] = p2;
01504
01505 ad1= FFABS(d1)>>1;
01506
01507 d2= av_clip((p0-p3)/4, -ad1, ad1);
01508
01509 src[y*stride-2] = p0 - d2;
01510 src[y*stride+1] = p3 + d2;
01511 }
01512 }
01513 }
01514
01515 static void h261_loop_filter_c(uint8_t *src, int stride){
01516 int x,y,xy,yz;
01517 int temp[64];
01518
01519 for(x=0; x<8; x++){
01520 temp[x ] = 4*src[x ];
01521 temp[x + 7*8] = 4*src[x + 7*stride];
01522 }
01523 for(y=1; y<7; y++){
01524 for(x=0; x<8; x++){
01525 xy = y * stride + x;
01526 yz = y * 8 + x;
01527 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
01528 }
01529 }
01530
01531 for(y=0; y<8; y++){
01532 src[ y*stride] = (temp[ y*8] + 2)>>2;
01533 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
01534 for(x=1; x<7; x++){
01535 xy = y * stride + x;
01536 yz = y * 8 + x;
01537 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
01538 }
01539 }
01540 }
01541
01542 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01543 {
01544 int s, i;
01545
01546 s = 0;
01547 for(i=0;i<h;i++) {
01548 s += abs(pix1[0] - pix2[0]);
01549 s += abs(pix1[1] - pix2[1]);
01550 s += abs(pix1[2] - pix2[2]);
01551 s += abs(pix1[3] - pix2[3]);
01552 s += abs(pix1[4] - pix2[4]);
01553 s += abs(pix1[5] - pix2[5]);
01554 s += abs(pix1[6] - pix2[6]);
01555 s += abs(pix1[7] - pix2[7]);
01556 s += abs(pix1[8] - pix2[8]);
01557 s += abs(pix1[9] - pix2[9]);
01558 s += abs(pix1[10] - pix2[10]);
01559 s += abs(pix1[11] - pix2[11]);
01560 s += abs(pix1[12] - pix2[12]);
01561 s += abs(pix1[13] - pix2[13]);
01562 s += abs(pix1[14] - pix2[14]);
01563 s += abs(pix1[15] - pix2[15]);
01564 pix1 += line_size;
01565 pix2 += line_size;
01566 }
01567 return s;
01568 }
01569
01570 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01571 {
01572 int s, i;
01573
01574 s = 0;
01575 for(i=0;i<h;i++) {
01576 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01577 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01578 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01579 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01580 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01581 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01582 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01583 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01584 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
01585 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
01586 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
01587 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
01588 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
01589 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
01590 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
01591 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
01592 pix1 += line_size;
01593 pix2 += line_size;
01594 }
01595 return s;
01596 }
01597
01598 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01599 {
01600 int s, i;
01601 uint8_t *pix3 = pix2 + line_size;
01602
01603 s = 0;
01604 for(i=0;i<h;i++) {
01605 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01606 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01607 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01608 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01609 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01610 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01611 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01612 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01613 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
01614 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
01615 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
01616 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
01617 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
01618 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
01619 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
01620 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
01621 pix1 += line_size;
01622 pix2 += line_size;
01623 pix3 += line_size;
01624 }
01625 return s;
01626 }
01627
01628 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01629 {
01630 int s, i;
01631 uint8_t *pix3 = pix2 + line_size;
01632
01633 s = 0;
01634 for(i=0;i<h;i++) {
01635 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01636 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01637 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01638 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01639 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01640 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01641 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01642 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01643 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
01644 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
01645 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
01646 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
01647 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
01648 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
01649 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
01650 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
01651 pix1 += line_size;
01652 pix2 += line_size;
01653 pix3 += line_size;
01654 }
01655 return s;
01656 }
01657
01658 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01659 {
01660 int s, i;
01661
01662 s = 0;
01663 for(i=0;i<h;i++) {
01664 s += abs(pix1[0] - pix2[0]);
01665 s += abs(pix1[1] - pix2[1]);
01666 s += abs(pix1[2] - pix2[2]);
01667 s += abs(pix1[3] - pix2[3]);
01668 s += abs(pix1[4] - pix2[4]);
01669 s += abs(pix1[5] - pix2[5]);
01670 s += abs(pix1[6] - pix2[6]);
01671 s += abs(pix1[7] - pix2[7]);
01672 pix1 += line_size;
01673 pix2 += line_size;
01674 }
01675 return s;
01676 }
01677
01678 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01679 {
01680 int s, i;
01681
01682 s = 0;
01683 for(i=0;i<h;i++) {
01684 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01685 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01686 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01687 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01688 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01689 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01690 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01691 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01692 pix1 += line_size;
01693 pix2 += line_size;
01694 }
01695 return s;
01696 }
01697
01698 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01699 {
01700 int s, i;
01701 uint8_t *pix3 = pix2 + line_size;
01702
01703 s = 0;
01704 for(i=0;i<h;i++) {
01705 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01706 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01707 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01708 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01709 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01710 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01711 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01712 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01713 pix1 += line_size;
01714 pix2 += line_size;
01715 pix3 += line_size;
01716 }
01717 return s;
01718 }
01719
01720 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01721 {
01722 int s, i;
01723 uint8_t *pix3 = pix2 + line_size;
01724
01725 s = 0;
01726 for(i=0;i<h;i++) {
01727 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01728 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01729 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01730 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01731 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01732 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01733 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01734 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01735 pix1 += line_size;
01736 pix2 += line_size;
01737 pix3 += line_size;
01738 }
01739 return s;
01740 }
01741
01742 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01743 MpegEncContext *c = v;
01744 int score1=0;
01745 int score2=0;
01746 int x,y;
01747
01748 for(y=0; y<h; y++){
01749 for(x=0; x<16; x++){
01750 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
01751 }
01752 if(y+1<h){
01753 for(x=0; x<15; x++){
01754 score2+= FFABS( s1[x ] - s1[x +stride]
01755 - s1[x+1] + s1[x+1+stride])
01756 -FFABS( s2[x ] - s2[x +stride]
01757 - s2[x+1] + s2[x+1+stride]);
01758 }
01759 }
01760 s1+= stride;
01761 s2+= stride;
01762 }
01763
01764 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01765 else return score1 + FFABS(score2)*8;
01766 }
01767
01768 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01769 MpegEncContext *c = v;
01770 int score1=0;
01771 int score2=0;
01772 int x,y;
01773
01774 for(y=0; y<h; y++){
01775 for(x=0; x<8; x++){
01776 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
01777 }
01778 if(y+1<h){
01779 for(x=0; x<7; x++){
01780 score2+= FFABS( s1[x ] - s1[x +stride]
01781 - s1[x+1] + s1[x+1+stride])
01782 -FFABS( s2[x ] - s2[x +stride]
01783 - s2[x+1] + s2[x+1+stride]);
01784 }
01785 }
01786 s1+= stride;
01787 s2+= stride;
01788 }
01789
01790 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01791 else return score1 + FFABS(score2)*8;
01792 }
01793
01794 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
01795 int i;
01796 unsigned int sum=0;
01797
01798 for(i=0; i<8*8; i++){
01799 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
01800 int w= weight[i];
01801 b>>= RECON_SHIFT;
01802 assert(-512<b && b<512);
01803
01804 sum += (w*b)*(w*b)>>4;
01805 }
01806 return sum>>2;
01807 }
01808
01809 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
01810 int i;
01811
01812 for(i=0; i<8*8; i++){
01813 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
01814 }
01815 }
01816
01825 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
01826 {
01827 int i;
01828 DCTELEM temp[64];
01829
01830 if(last<=0) return;
01831
01832
01833 for(i=0; i<=last; i++){
01834 const int j= scantable[i];
01835 temp[j]= block[j];
01836 block[j]=0;
01837 }
01838
01839 for(i=0; i<=last; i++){
01840 const int j= scantable[i];
01841 const int perm_j= permutation[j];
01842 block[perm_j]= temp[j];
01843 }
01844 }
01845
01846 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
01847 return 0;
01848 }
01849
01850 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
01851 int i;
01852
01853 memset(cmp, 0, sizeof(void*)*6);
01854
01855 for(i=0; i<6; i++){
01856 switch(type&0xFF){
01857 case FF_CMP_SAD:
01858 cmp[i]= c->sad[i];
01859 break;
01860 case FF_CMP_SATD:
01861 cmp[i]= c->hadamard8_diff[i];
01862 break;
01863 case FF_CMP_SSE:
01864 cmp[i]= c->sse[i];
01865 break;
01866 case FF_CMP_DCT:
01867 cmp[i]= c->dct_sad[i];
01868 break;
01869 case FF_CMP_DCT264:
01870 cmp[i]= c->dct264_sad[i];
01871 break;
01872 case FF_CMP_DCTMAX:
01873 cmp[i]= c->dct_max[i];
01874 break;
01875 case FF_CMP_PSNR:
01876 cmp[i]= c->quant_psnr[i];
01877 break;
01878 case FF_CMP_BIT:
01879 cmp[i]= c->bit[i];
01880 break;
01881 case FF_CMP_RD:
01882 cmp[i]= c->rd[i];
01883 break;
01884 case FF_CMP_VSAD:
01885 cmp[i]= c->vsad[i];
01886 break;
01887 case FF_CMP_VSSE:
01888 cmp[i]= c->vsse[i];
01889 break;
01890 case FF_CMP_ZERO:
01891 cmp[i]= zero_cmp;
01892 break;
01893 case FF_CMP_NSSE:
01894 cmp[i]= c->nsse[i];
01895 break;
01896 #if CONFIG_DWT
01897 case FF_CMP_W53:
01898 cmp[i]= c->w53[i];
01899 break;
01900 case FF_CMP_W97:
01901 cmp[i]= c->w97[i];
01902 break;
01903 #endif
01904 default:
01905 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
01906 }
01907 }
01908 }
01909
01910 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
01911 long i;
01912 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01913 long a = *(long*)(src+i);
01914 long b = *(long*)(dst+i);
01915 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
01916 }
01917 for(; i<w; i++)
01918 dst[i+0] += src[i+0];
01919 }
01920
01921 static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w){
01922 long i;
01923 #if !HAVE_FAST_UNALIGNED
01924 if((long)src2 & (sizeof(long)-1)){
01925 for(i=0; i+7<w; i+=8){
01926 dst[i+0] = src1[i+0]-src2[i+0];
01927 dst[i+1] = src1[i+1]-src2[i+1];
01928 dst[i+2] = src1[i+2]-src2[i+2];
01929 dst[i+3] = src1[i+3]-src2[i+3];
01930 dst[i+4] = src1[i+4]-src2[i+4];
01931 dst[i+5] = src1[i+5]-src2[i+5];
01932 dst[i+6] = src1[i+6]-src2[i+6];
01933 dst[i+7] = src1[i+7]-src2[i+7];
01934 }
01935 }else
01936 #endif
01937 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01938 long a = *(long*)(src1+i);
01939 long b = *(long*)(src2+i);
01940 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
01941 }
01942 for(; i<w; i++)
01943 dst[i+0] = src1[i+0]-src2[i+0];
01944 }
01945
01946 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
01947 int i;
01948 uint8_t l, lt;
01949
01950 l= *left;
01951 lt= *left_top;
01952
01953 for(i=0; i<w; i++){
01954 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
01955 lt= src1[i];
01956 dst[i]= l;
01957 }
01958
01959 *left= l;
01960 *left_top= lt;
01961 }
01962
01963 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
01964 int i;
01965 uint8_t l, lt;
01966
01967 l= *left;
01968 lt= *left_top;
01969
01970 for(i=0; i<w; i++){
01971 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
01972 lt= src1[i];
01973 l= src2[i];
01974 dst[i]= l - pred;
01975 }
01976
01977 *left= l;
01978 *left_top= lt;
01979 }
01980
01981 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
01982 int i;
01983
01984 for(i=0; i<w-1; i++){
01985 acc+= src[i];
01986 dst[i]= acc;
01987 i++;
01988 acc+= src[i];
01989 dst[i]= acc;
01990 }
01991
01992 for(; i<w; i++){
01993 acc+= src[i];
01994 dst[i]= acc;
01995 }
01996
01997 return acc;
01998 }
01999
02000 #if HAVE_BIGENDIAN
02001 #define B 3
02002 #define G 2
02003 #define R 1
02004 #define A 0
02005 #else
02006 #define B 0
02007 #define G 1
02008 #define R 2
02009 #define A 3
02010 #endif
02011 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
02012 int i;
02013 int r,g,b,a;
02014 r= *red;
02015 g= *green;
02016 b= *blue;
02017 a= *alpha;
02018
02019 for(i=0; i<w; i++){
02020 b+= src[4*i+B];
02021 g+= src[4*i+G];
02022 r+= src[4*i+R];
02023 a+= src[4*i+A];
02024
02025 dst[4*i+B]= b;
02026 dst[4*i+G]= g;
02027 dst[4*i+R]= r;
02028 dst[4*i+A]= a;
02029 }
02030
02031 *red= r;
02032 *green= g;
02033 *blue= b;
02034 *alpha= a;
02035 }
02036 #undef B
02037 #undef G
02038 #undef R
02039 #undef A
02040
02041 #define BUTTERFLY2(o1,o2,i1,i2) \
02042 o1= (i1)+(i2);\
02043 o2= (i1)-(i2);
02044
02045 #define BUTTERFLY1(x,y) \
02046 {\
02047 int a,b;\
02048 a= x;\
02049 b= y;\
02050 x= a+b;\
02051 y= a-b;\
02052 }
02053
02054 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
02055
02056 static int hadamard8_diff8x8_c( void *s, uint8_t *dst, uint8_t *src, int stride, int h){
02057 int i;
02058 int temp[64];
02059 int sum=0;
02060
02061 assert(h==8);
02062
02063 for(i=0; i<8; i++){
02064
02065 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
02066 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
02067 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
02068 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
02069
02070 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02071 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02072 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02073 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02074
02075 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02076 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02077 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02078 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02079 }
02080
02081 for(i=0; i<8; i++){
02082 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02083 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02084 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02085 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02086
02087 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02088 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02089 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02090 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02091
02092 sum +=
02093 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02094 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02095 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02096 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02097 }
02098 return sum;
02099 }
02100
02101 static int hadamard8_intra8x8_c( void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
02102 int i;
02103 int temp[64];
02104 int sum=0;
02105
02106 assert(h==8);
02107
02108 for(i=0; i<8; i++){
02109
02110 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
02111 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
02112 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
02113 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
02114
02115 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02116 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02117 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02118 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02119
02120 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02121 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02122 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02123 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02124 }
02125
02126 for(i=0; i<8; i++){
02127 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02128 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02129 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02130 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02131
02132 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02133 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02134 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02135 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02136
02137 sum +=
02138 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02139 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02140 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02141 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02142 }
02143
02144 sum -= FFABS(temp[8*0] + temp[8*4]);
02145
02146 return sum;
02147 }
02148
02149 static int dct_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02150 MpegEncContext * const s= (MpegEncContext *)c;
02151 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02152
02153 assert(h==8);
02154
02155 s->dsp.diff_pixels(temp, src1, src2, stride);
02156 s->dsp.fdct(temp);
02157 return s->dsp.sum_abs_dctelem(temp);
02158 }
02159
02160 #if CONFIG_GPL
02161 #define DCT8_1D {\
02162 const int s07 = SRC(0) + SRC(7);\
02163 const int s16 = SRC(1) + SRC(6);\
02164 const int s25 = SRC(2) + SRC(5);\
02165 const int s34 = SRC(3) + SRC(4);\
02166 const int a0 = s07 + s34;\
02167 const int a1 = s16 + s25;\
02168 const int a2 = s07 - s34;\
02169 const int a3 = s16 - s25;\
02170 const int d07 = SRC(0) - SRC(7);\
02171 const int d16 = SRC(1) - SRC(6);\
02172 const int d25 = SRC(2) - SRC(5);\
02173 const int d34 = SRC(3) - SRC(4);\
02174 const int a4 = d16 + d25 + (d07 + (d07>>1));\
02175 const int a5 = d07 - d34 - (d25 + (d25>>1));\
02176 const int a6 = d07 + d34 - (d16 + (d16>>1));\
02177 const int a7 = d16 - d25 + (d34 + (d34>>1));\
02178 DST(0, a0 + a1 ) ;\
02179 DST(1, a4 + (a7>>2)) ;\
02180 DST(2, a2 + (a3>>1)) ;\
02181 DST(3, a5 + (a6>>2)) ;\
02182 DST(4, a0 - a1 ) ;\
02183 DST(5, a6 - (a5>>2)) ;\
02184 DST(6, (a2>>1) - a3 ) ;\
02185 DST(7, (a4>>2) - a7 ) ;\
02186 }
02187
02188 static int dct264_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02189 MpegEncContext * const s= (MpegEncContext *)c;
02190 DCTELEM dct[8][8];
02191 int i;
02192 int sum=0;
02193
02194 s->dsp.diff_pixels(dct[0], src1, src2, stride);
02195
02196 #define SRC(x) dct[i][x]
02197 #define DST(x,v) dct[i][x]= v
02198 for( i = 0; i < 8; i++ )
02199 DCT8_1D
02200 #undef SRC
02201 #undef DST
02202
02203 #define SRC(x) dct[x][i]
02204 #define DST(x,v) sum += FFABS(v)
02205 for( i = 0; i < 8; i++ )
02206 DCT8_1D
02207 #undef SRC
02208 #undef DST
02209 return sum;
02210 }
02211 #endif
02212
02213 static int dct_max8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02214 MpegEncContext * const s= (MpegEncContext *)c;
02215 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02216 int sum=0, i;
02217
02218 assert(h==8);
02219
02220 s->dsp.diff_pixels(temp, src1, src2, stride);
02221 s->dsp.fdct(temp);
02222
02223 for(i=0; i<64; i++)
02224 sum= FFMAX(sum, FFABS(temp[i]));
02225
02226 return sum;
02227 }
02228
02229 static int quant_psnr8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02230 MpegEncContext * const s= (MpegEncContext *)c;
02231 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
02232 DCTELEM * const bak = temp+64;
02233 int sum=0, i;
02234
02235 assert(h==8);
02236 s->mb_intra=0;
02237
02238 s->dsp.diff_pixels(temp, src1, src2, stride);
02239
02240 memcpy(bak, temp, 64*sizeof(DCTELEM));
02241
02242 s->block_last_index[0]= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02243 s->dct_unquantize_inter(s, temp, 0, s->qscale);
02244 ff_simple_idct_8(temp);
02245
02246 for(i=0; i<64; i++)
02247 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
02248
02249 return sum;
02250 }
02251
02252 static int rd8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02253 MpegEncContext * const s= (MpegEncContext *)c;
02254 const uint8_t *scantable= s->intra_scantable.permutated;
02255 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02256 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
02257 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
02258 int i, last, run, bits, level, distortion, start_i;
02259 const int esc_length= s->ac_esc_length;
02260 uint8_t * length;
02261 uint8_t * last_length;
02262
02263 assert(h==8);
02264
02265 copy_block8(lsrc1, src1, 8, stride, 8);
02266 copy_block8(lsrc2, src2, 8, stride, 8);
02267
02268 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
02269
02270 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02271
02272 bits=0;
02273
02274 if (s->mb_intra) {
02275 start_i = 1;
02276 length = s->intra_ac_vlc_length;
02277 last_length= s->intra_ac_vlc_last_length;
02278 bits+= s->luma_dc_vlc_length[temp[0] + 256];
02279 } else {
02280 start_i = 0;
02281 length = s->inter_ac_vlc_length;
02282 last_length= s->inter_ac_vlc_last_length;
02283 }
02284
02285 if(last>=start_i){
02286 run=0;
02287 for(i=start_i; i<last; i++){
02288 int j= scantable[i];
02289 level= temp[j];
02290
02291 if(level){
02292 level+=64;
02293 if((level&(~127)) == 0){
02294 bits+= length[UNI_AC_ENC_INDEX(run, level)];
02295 }else
02296 bits+= esc_length;
02297 run=0;
02298 }else
02299 run++;
02300 }
02301 i= scantable[last];
02302
02303 level= temp[i] + 64;
02304
02305 assert(level - 64);
02306
02307 if((level&(~127)) == 0){
02308 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02309 }else
02310 bits+= esc_length;
02311
02312 }
02313
02314 if(last>=0){
02315 if(s->mb_intra)
02316 s->dct_unquantize_intra(s, temp, 0, s->qscale);
02317 else
02318 s->dct_unquantize_inter(s, temp, 0, s->qscale);
02319 }
02320
02321 s->dsp.idct_add(lsrc2, 8, temp);
02322
02323 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
02324
02325 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
02326 }
02327
02328 static int bit8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02329 MpegEncContext * const s= (MpegEncContext *)c;
02330 const uint8_t *scantable= s->intra_scantable.permutated;
02331 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02332 int i, last, run, bits, level, start_i;
02333 const int esc_length= s->ac_esc_length;
02334 uint8_t * length;
02335 uint8_t * last_length;
02336
02337 assert(h==8);
02338
02339 s->dsp.diff_pixels(temp, src1, src2, stride);
02340
02341 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02342
02343 bits=0;
02344
02345 if (s->mb_intra) {
02346 start_i = 1;
02347 length = s->intra_ac_vlc_length;
02348 last_length= s->intra_ac_vlc_last_length;
02349 bits+= s->luma_dc_vlc_length[temp[0] + 256];
02350 } else {
02351 start_i = 0;
02352 length = s->inter_ac_vlc_length;
02353 last_length= s->inter_ac_vlc_last_length;
02354 }
02355
02356 if(last>=start_i){
02357 run=0;
02358 for(i=start_i; i<last; i++){
02359 int j= scantable[i];
02360 level= temp[j];
02361
02362 if(level){
02363 level+=64;
02364 if((level&(~127)) == 0){
02365 bits+= length[UNI_AC_ENC_INDEX(run, level)];
02366 }else
02367 bits+= esc_length;
02368 run=0;
02369 }else
02370 run++;
02371 }
02372 i= scantable[last];
02373
02374 level= temp[i] + 64;
02375
02376 assert(level - 64);
02377
02378 if((level&(~127)) == 0){
02379 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02380 }else
02381 bits+= esc_length;
02382 }
02383
02384 return bits;
02385 }
02386
02387 #define VSAD_INTRA(size) \
02388 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02389 int score=0; \
02390 int x,y; \
02391 \
02392 for(y=1; y<h; y++){ \
02393 for(x=0; x<size; x+=4){ \
02394 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
02395 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
02396 } \
02397 s+= stride; \
02398 } \
02399 \
02400 return score; \
02401 }
02402 VSAD_INTRA(8)
02403 VSAD_INTRA(16)
02404
02405 static int vsad16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02406 int score=0;
02407 int x,y;
02408
02409 for(y=1; y<h; y++){
02410 for(x=0; x<16; x++){
02411 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
02412 }
02413 s1+= stride;
02414 s2+= stride;
02415 }
02416
02417 return score;
02418 }
02419
02420 #define SQ(a) ((a)*(a))
02421 #define VSSE_INTRA(size) \
02422 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02423 int score=0; \
02424 int x,y; \
02425 \
02426 for(y=1; y<h; y++){ \
02427 for(x=0; x<size; x+=4){ \
02428 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
02429 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
02430 } \
02431 s+= stride; \
02432 } \
02433 \
02434 return score; \
02435 }
02436 VSSE_INTRA(8)
02437 VSSE_INTRA(16)
02438
02439 static int vsse16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02440 int score=0;
02441 int x,y;
02442
02443 for(y=1; y<h; y++){
02444 for(x=0; x<16; x++){
02445 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
02446 }
02447 s1+= stride;
02448 s2+= stride;
02449 }
02450
02451 return score;
02452 }
02453
02454 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
02455 int size){
02456 int score=0;
02457 int i;
02458 for(i=0; i<size; i++)
02459 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
02460 return score;
02461 }
02462
02463 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
02464 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
02465 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
02466 #if CONFIG_GPL
02467 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
02468 #endif
02469 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
02470 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
02471 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
02472 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
02473
02474 static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
02475 int i;
02476 for(i=0; i<len; i++)
02477 dst[i] = src0[i] * src1[i];
02478 }
02479
02480 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
02481 int i;
02482 src1 += len-1;
02483 for(i=0; i<len; i++)
02484 dst[i] = src0[i] * src1[-i];
02485 }
02486
02487 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
02488 int i;
02489 for(i=0; i<len; i++)
02490 dst[i] = src0[i] * src1[i] + src2[i];
02491 }
02492
02493 static void vector_fmul_window_c(float *dst, const float *src0,
02494 const float *src1, const float *win, int len)
02495 {
02496 int i,j;
02497 dst += len;
02498 win += len;
02499 src0+= len;
02500 for(i=-len, j=len-1; i<0; i++, j--) {
02501 float s0 = src0[i];
02502 float s1 = src1[j];
02503 float wi = win[i];
02504 float wj = win[j];
02505 dst[i] = s0*wj - s1*wi;
02506 dst[j] = s0*wi + s1*wj;
02507 }
02508 }
02509
02510 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
02511 int len)
02512 {
02513 int i;
02514 for (i = 0; i < len; i++)
02515 dst[i] = src[i] * mul;
02516 }
02517
02518 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
02519 int len)
02520 {
02521 int i;
02522 for (i = 0; i < len; i++)
02523 dst[i] += src[i] * mul;
02524 }
02525
02526 static void butterflies_float_c(float *restrict v1, float *restrict v2,
02527 int len)
02528 {
02529 int i;
02530 for (i = 0; i < len; i++) {
02531 float t = v1[i] - v2[i];
02532 v1[i] += v2[i];
02533 v2[i] = t;
02534 }
02535 }
02536
02537 static void butterflies_float_interleave_c(float *dst, const float *src0,
02538 const float *src1, int len)
02539 {
02540 int i;
02541 for (i = 0; i < len; i++) {
02542 float f1 = src0[i];
02543 float f2 = src1[i];
02544 dst[2*i ] = f1 + f2;
02545 dst[2*i + 1] = f1 - f2;
02546 }
02547 }
02548
02549 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
02550 {
02551 float p = 0.0;
02552 int i;
02553
02554 for (i = 0; i < len; i++)
02555 p += v1[i] * v2[i];
02556
02557 return p;
02558 }
02559
02560 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
02561 uint32_t maxi, uint32_t maxisign)
02562 {
02563
02564 if(a > mini) return mini;
02565 else if((a^(1U<<31)) > maxisign) return maxi;
02566 else return a;
02567 }
02568
02569 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
02570 int i;
02571 uint32_t mini = *(uint32_t*)min;
02572 uint32_t maxi = *(uint32_t*)max;
02573 uint32_t maxisign = maxi ^ (1U<<31);
02574 uint32_t *dsti = (uint32_t*)dst;
02575 const uint32_t *srci = (const uint32_t*)src;
02576 for(i=0; i<len; i+=8) {
02577 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
02578 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
02579 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
02580 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
02581 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
02582 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
02583 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
02584 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
02585 }
02586 }
02587 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
02588 int i;
02589 if(min < 0 && max > 0) {
02590 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
02591 } else {
02592 for(i=0; i < len; i+=8) {
02593 dst[i ] = av_clipf(src[i ], min, max);
02594 dst[i + 1] = av_clipf(src[i + 1], min, max);
02595 dst[i + 2] = av_clipf(src[i + 2], min, max);
02596 dst[i + 3] = av_clipf(src[i + 3], min, max);
02597 dst[i + 4] = av_clipf(src[i + 4], min, max);
02598 dst[i + 5] = av_clipf(src[i + 5], min, max);
02599 dst[i + 6] = av_clipf(src[i + 6], min, max);
02600 dst[i + 7] = av_clipf(src[i + 7], min, max);
02601 }
02602 }
02603 }
02604
02605 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
02606 {
02607 int res = 0;
02608
02609 while (order--)
02610 res += *v1++ * *v2++;
02611
02612 return res;
02613 }
02614
02615 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
02616 {
02617 int res = 0;
02618 while (order--) {
02619 res += *v1 * *v2++;
02620 *v1++ += mul * *v3++;
02621 }
02622 return res;
02623 }
02624
02625 static void apply_window_int16_c(int16_t *output, const int16_t *input,
02626 const int16_t *window, unsigned int len)
02627 {
02628 int i;
02629 int len2 = len >> 1;
02630
02631 for (i = 0; i < len2; i++) {
02632 int16_t w = window[i];
02633 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
02634 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
02635 }
02636 }
02637
02638 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
02639 int32_t max, unsigned int len)
02640 {
02641 do {
02642 *dst++ = av_clip(*src++, min, max);
02643 *dst++ = av_clip(*src++, min, max);
02644 *dst++ = av_clip(*src++, min, max);
02645 *dst++ = av_clip(*src++, min, max);
02646 *dst++ = av_clip(*src++, min, max);
02647 *dst++ = av_clip(*src++, min, max);
02648 *dst++ = av_clip(*src++, min, max);
02649 *dst++ = av_clip(*src++, min, max);
02650 len -= 8;
02651 } while (len > 0);
02652 }
02653
02654 #define W0 2048
02655 #define W1 2841
02656 #define W2 2676
02657 #define W3 2408
02658 #define W4 2048
02659 #define W5 1609
02660 #define W6 1108
02661 #define W7 565
02662
02663 static void wmv2_idct_row(short * b)
02664 {
02665 int s1,s2;
02666 int a0,a1,a2,a3,a4,a5,a6,a7;
02667
02668 a1 = W1*b[1]+W7*b[7];
02669 a7 = W7*b[1]-W1*b[7];
02670 a5 = W5*b[5]+W3*b[3];
02671 a3 = W3*b[5]-W5*b[3];
02672 a2 = W2*b[2]+W6*b[6];
02673 a6 = W6*b[2]-W2*b[6];
02674 a0 = W0*b[0]+W0*b[4];
02675 a4 = W0*b[0]-W0*b[4];
02676
02677 s1 = (181*(a1-a5+a7-a3)+128)>>8;
02678 s2 = (181*(a1-a5-a7+a3)+128)>>8;
02679
02680 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
02681 b[1] = (a4+a6 +s1 + (1<<7))>>8;
02682 b[2] = (a4-a6 +s2 + (1<<7))>>8;
02683 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
02684 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
02685 b[5] = (a4-a6 -s2 + (1<<7))>>8;
02686 b[6] = (a4+a6 -s1 + (1<<7))>>8;
02687 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
02688 }
02689 static void wmv2_idct_col(short * b)
02690 {
02691 int s1,s2;
02692 int a0,a1,a2,a3,a4,a5,a6,a7;
02693
02694 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
02695 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
02696 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
02697 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
02698 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
02699 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
02700 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
02701 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
02702
02703 s1 = (181*(a1-a5+a7-a3)+128)>>8;
02704 s2 = (181*(a1-a5-a7+a3)+128)>>8;
02705
02706 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
02707 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
02708 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
02709 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
02710
02711 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
02712 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
02713 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
02714 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
02715 }
02716 void ff_wmv2_idct_c(short * block){
02717 int i;
02718
02719 for(i=0;i<64;i+=8){
02720 wmv2_idct_row(block+i);
02721 }
02722 for(i=0;i<8;i++){
02723 wmv2_idct_col(block+i);
02724 }
02725 }
02726
02727
02728 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
02729 {
02730 ff_wmv2_idct_c(block);
02731 ff_put_pixels_clamped_c(block, dest, line_size);
02732 }
02733 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
02734 {
02735 ff_wmv2_idct_c(block);
02736 ff_add_pixels_clamped_c(block, dest, line_size);
02737 }
02738 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
02739 {
02740 ff_j_rev_dct (block);
02741 ff_put_pixels_clamped_c(block, dest, line_size);
02742 }
02743 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
02744 {
02745 ff_j_rev_dct (block);
02746 ff_add_pixels_clamped_c(block, dest, line_size);
02747 }
02748
02749 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
02750 {
02751 ff_j_rev_dct4 (block);
02752 put_pixels_clamped4_c(block, dest, line_size);
02753 }
02754 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
02755 {
02756 ff_j_rev_dct4 (block);
02757 add_pixels_clamped4_c(block, dest, line_size);
02758 }
02759
02760 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
02761 {
02762 ff_j_rev_dct2 (block);
02763 put_pixels_clamped2_c(block, dest, line_size);
02764 }
02765 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
02766 {
02767 ff_j_rev_dct2 (block);
02768 add_pixels_clamped2_c(block, dest, line_size);
02769 }
02770
02771 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
02772 {
02773 dest[0] = av_clip_uint8((block[0] + 4)>>3);
02774 }
02775 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
02776 {
02777 dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
02778 }
02779
02780 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
02781
02782
02783 av_cold void ff_dsputil_static_init(void)
02784 {
02785 int i;
02786
02787 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
02788 for(i=0;i<MAX_NEG_CROP;i++) {
02789 ff_cropTbl[i] = 0;
02790 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
02791 }
02792
02793 for(i=0;i<512;i++) {
02794 ff_squareTbl[i] = (i - 256) * (i - 256);
02795 }
02796
02797 for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
02798 }
02799
02800 int ff_check_alignment(void){
02801 static int did_fail=0;
02802 LOCAL_ALIGNED_16(int, aligned, [4]);
02803
02804 if((intptr_t)aligned & 15){
02805 if(!did_fail){
02806 #if HAVE_MMX || HAVE_ALTIVEC
02807 av_log(NULL, AV_LOG_ERROR,
02808 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
02809 "and may be very slow or crash. This is not a bug in libavcodec,\n"
02810 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
02811 "Do not report crashes to FFmpeg developers.\n");
02812 #endif
02813 did_fail=1;
02814 }
02815 return -1;
02816 }
02817 return 0;
02818 }
02819
02820 av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
02821 {
02822 int i, j;
02823
02824 ff_check_alignment();
02825
02826 #if CONFIG_ENCODERS
02827 if (avctx->bits_per_raw_sample == 10) {
02828 c->fdct = ff_jpeg_fdct_islow_10;
02829 c->fdct248 = ff_fdct248_islow_10;
02830 } else {
02831 if(avctx->dct_algo==FF_DCT_FASTINT) {
02832 c->fdct = ff_fdct_ifast;
02833 c->fdct248 = ff_fdct_ifast248;
02834 }
02835 else if(avctx->dct_algo==FF_DCT_FAAN) {
02836 c->fdct = ff_faandct;
02837 c->fdct248 = ff_faandct248;
02838 }
02839 else {
02840 c->fdct = ff_jpeg_fdct_islow_8;
02841 c->fdct248 = ff_fdct248_islow_8;
02842 }
02843 }
02844 #endif //CONFIG_ENCODERS
02845
02846 if(avctx->lowres==1){
02847 c->idct_put= ff_jref_idct4_put;
02848 c->idct_add= ff_jref_idct4_add;
02849 c->idct = ff_j_rev_dct4;
02850 c->idct_permutation_type= FF_NO_IDCT_PERM;
02851 }else if(avctx->lowres==2){
02852 c->idct_put= ff_jref_idct2_put;
02853 c->idct_add= ff_jref_idct2_add;
02854 c->idct = ff_j_rev_dct2;
02855 c->idct_permutation_type= FF_NO_IDCT_PERM;
02856 }else if(avctx->lowres==3){
02857 c->idct_put= ff_jref_idct1_put;
02858 c->idct_add= ff_jref_idct1_add;
02859 c->idct = ff_j_rev_dct1;
02860 c->idct_permutation_type= FF_NO_IDCT_PERM;
02861 }else{
02862 if (avctx->bits_per_raw_sample == 10) {
02863 c->idct_put = ff_simple_idct_put_10;
02864 c->idct_add = ff_simple_idct_add_10;
02865 c->idct = ff_simple_idct_10;
02866 c->idct_permutation_type = FF_NO_IDCT_PERM;
02867 } else {
02868 if(avctx->idct_algo==FF_IDCT_INT){
02869 c->idct_put= ff_jref_idct_put;
02870 c->idct_add= ff_jref_idct_add;
02871 c->idct = ff_j_rev_dct;
02872 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
02873 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
02874 avctx->idct_algo==FF_IDCT_VP3){
02875 c->idct_put= ff_vp3_idct_put_c;
02876 c->idct_add= ff_vp3_idct_add_c;
02877 c->idct = ff_vp3_idct_c;
02878 c->idct_permutation_type= FF_NO_IDCT_PERM;
02879 }else if(avctx->idct_algo==FF_IDCT_WMV2){
02880 c->idct_put= ff_wmv2_idct_put_c;
02881 c->idct_add= ff_wmv2_idct_add_c;
02882 c->idct = ff_wmv2_idct_c;
02883 c->idct_permutation_type= FF_NO_IDCT_PERM;
02884 }else if(avctx->idct_algo==FF_IDCT_FAAN){
02885 c->idct_put= ff_faanidct_put;
02886 c->idct_add= ff_faanidct_add;
02887 c->idct = ff_faanidct;
02888 c->idct_permutation_type= FF_NO_IDCT_PERM;
02889 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
02890 c->idct_put= ff_ea_idct_put_c;
02891 c->idct_permutation_type= FF_NO_IDCT_PERM;
02892 }else{
02893 c->idct_put = ff_simple_idct_put_8;
02894 c->idct_add = ff_simple_idct_add_8;
02895 c->idct = ff_simple_idct_8;
02896 c->idct_permutation_type= FF_NO_IDCT_PERM;
02897 }
02898 }
02899 }
02900
02901 c->diff_pixels = diff_pixels_c;
02902 c->put_pixels_clamped = ff_put_pixels_clamped_c;
02903 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
02904 c->add_pixels_clamped = ff_add_pixels_clamped_c;
02905 c->sum_abs_dctelem = sum_abs_dctelem_c;
02906 c->gmc1 = gmc1_c;
02907 c->gmc = ff_gmc_c;
02908 c->pix_sum = pix_sum_c;
02909 c->pix_norm1 = pix_norm1_c;
02910
02911 c->fill_block_tab[0] = fill_block16_c;
02912 c->fill_block_tab[1] = fill_block8_c;
02913
02914
02915 c->pix_abs[0][0] = pix_abs16_c;
02916 c->pix_abs[0][1] = pix_abs16_x2_c;
02917 c->pix_abs[0][2] = pix_abs16_y2_c;
02918 c->pix_abs[0][3] = pix_abs16_xy2_c;
02919 c->pix_abs[1][0] = pix_abs8_c;
02920 c->pix_abs[1][1] = pix_abs8_x2_c;
02921 c->pix_abs[1][2] = pix_abs8_y2_c;
02922 c->pix_abs[1][3] = pix_abs8_xy2_c;
02923
02924 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
02925 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
02926 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
02927 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
02928 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
02929 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
02930 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
02931 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
02932 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
02933
02934 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
02935 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
02936 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
02937 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
02938 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
02939 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
02940 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
02941 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
02942 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
02943
02944 #define dspfunc(PFX, IDX, NUM) \
02945 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
02946 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
02947 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
02948 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
02949 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
02950 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
02951 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
02952 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
02953 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
02954 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
02955 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
02956 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
02957 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
02958 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
02959 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
02960 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
02961
02962 dspfunc(put_qpel, 0, 16);
02963 dspfunc(put_no_rnd_qpel, 0, 16);
02964
02965 dspfunc(avg_qpel, 0, 16);
02966
02967
02968 dspfunc(put_qpel, 1, 8);
02969 dspfunc(put_no_rnd_qpel, 1, 8);
02970
02971 dspfunc(avg_qpel, 1, 8);
02972
02973
02974 #undef dspfunc
02975
02976 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
02977 ff_mlp_init(c, avctx);
02978 #endif
02979 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
02980 ff_intrax8dsp_init(c,avctx);
02981 #endif
02982
02983 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
02984 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
02985 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
02986 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
02987 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
02988 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
02989 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
02990 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
02991
02992 #define SET_CMP_FUNC(name) \
02993 c->name[0]= name ## 16_c;\
02994 c->name[1]= name ## 8x8_c;
02995
02996 SET_CMP_FUNC(hadamard8_diff)
02997 c->hadamard8_diff[4]= hadamard8_intra16_c;
02998 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
02999 SET_CMP_FUNC(dct_sad)
03000 SET_CMP_FUNC(dct_max)
03001 #if CONFIG_GPL
03002 SET_CMP_FUNC(dct264_sad)
03003 #endif
03004 c->sad[0]= pix_abs16_c;
03005 c->sad[1]= pix_abs8_c;
03006 c->sse[0]= sse16_c;
03007 c->sse[1]= sse8_c;
03008 c->sse[2]= sse4_c;
03009 SET_CMP_FUNC(quant_psnr)
03010 SET_CMP_FUNC(rd)
03011 SET_CMP_FUNC(bit)
03012 c->vsad[0]= vsad16_c;
03013 c->vsad[4]= vsad_intra16_c;
03014 c->vsad[5]= vsad_intra8_c;
03015 c->vsse[0]= vsse16_c;
03016 c->vsse[4]= vsse_intra16_c;
03017 c->vsse[5]= vsse_intra8_c;
03018 c->nsse[0]= nsse16_c;
03019 c->nsse[1]= nsse8_c;
03020 #if CONFIG_DWT
03021 ff_dsputil_init_dwt(c);
03022 #endif
03023
03024 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
03025
03026 c->add_bytes= add_bytes_c;
03027 c->diff_bytes= diff_bytes_c;
03028 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
03029 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
03030 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
03031 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
03032 c->bswap_buf= bswap_buf;
03033 c->bswap16_buf = bswap16_buf;
03034
03035 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
03036 c->h263_h_loop_filter= h263_h_loop_filter_c;
03037 c->h263_v_loop_filter= h263_v_loop_filter_c;
03038 }
03039
03040 if (CONFIG_VP3_DECODER) {
03041 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
03042 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
03043 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
03044 }
03045
03046 c->h261_loop_filter= h261_loop_filter_c;
03047
03048 c->try_8x8basis= try_8x8basis_c;
03049 c->add_8x8basis= add_8x8basis_c;
03050
03051 #if CONFIG_VORBIS_DECODER
03052 c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling;
03053 #endif
03054 #if CONFIG_AC3_DECODER
03055 c->ac3_downmix = ff_ac3_downmix_c;
03056 #endif
03057 c->vector_fmul = vector_fmul_c;
03058 c->vector_fmul_reverse = vector_fmul_reverse_c;
03059 c->vector_fmul_add = vector_fmul_add_c;
03060 c->vector_fmul_window = vector_fmul_window_c;
03061 c->vector_clipf = vector_clipf_c;
03062 c->scalarproduct_int16 = scalarproduct_int16_c;
03063 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
03064 c->apply_window_int16 = apply_window_int16_c;
03065 c->vector_clip_int32 = vector_clip_int32_c;
03066 c->scalarproduct_float = scalarproduct_float_c;
03067 c->butterflies_float = butterflies_float_c;
03068 c->butterflies_float_interleave = butterflies_float_interleave_c;
03069 c->vector_fmul_scalar = vector_fmul_scalar_c;
03070 c->vector_fmac_scalar = vector_fmac_scalar_c;
03071
03072 c->shrink[0]= av_image_copy_plane;
03073 c->shrink[1]= ff_shrink22;
03074 c->shrink[2]= ff_shrink44;
03075 c->shrink[3]= ff_shrink88;
03076
03077 c->prefetch= just_return;
03078
03079 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
03080 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
03081
03082 #undef FUNC
03083 #undef FUNCC
03084 #define FUNC(f, depth) f ## _ ## depth
03085 #define FUNCC(f, depth) f ## _ ## depth ## _c
03086
03087 #define dspfunc1(PFX, IDX, NUM, depth)\
03088 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
03089 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
03090 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
03091 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
03092
03093 #define dspfunc2(PFX, IDX, NUM, depth)\
03094 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
03095 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
03096 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
03097 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
03098 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
03099 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
03100 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
03101 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
03102 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
03103 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
03104 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
03105 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
03106 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
03107 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
03108 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
03109 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
03110
03111
03112 #define BIT_DEPTH_FUNCS(depth, dct)\
03113 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
03114 c->draw_edges = FUNCC(draw_edges , depth);\
03115 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
03116 c->clear_block = FUNCC(clear_block ## dct , depth);\
03117 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
03118 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
03119 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
03120 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
03121 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
03122 \
03123 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
03124 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
03125 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
03126 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
03127 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
03128 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
03129 \
03130 dspfunc1(put , 0, 16, depth);\
03131 dspfunc1(put , 1, 8, depth);\
03132 dspfunc1(put , 2, 4, depth);\
03133 dspfunc1(put , 3, 2, depth);\
03134 dspfunc1(put_no_rnd, 0, 16, depth);\
03135 dspfunc1(put_no_rnd, 1, 8, depth);\
03136 dspfunc1(avg , 0, 16, depth);\
03137 dspfunc1(avg , 1, 8, depth);\
03138 dspfunc1(avg , 2, 4, depth);\
03139 dspfunc1(avg , 3, 2, depth);\
03140 dspfunc1(avg_no_rnd, 0, 16, depth);\
03141 dspfunc1(avg_no_rnd, 1, 8, depth);\
03142 \
03143 dspfunc2(put_h264_qpel, 0, 16, depth);\
03144 dspfunc2(put_h264_qpel, 1, 8, depth);\
03145 dspfunc2(put_h264_qpel, 2, 4, depth);\
03146 dspfunc2(put_h264_qpel, 3, 2, depth);\
03147 dspfunc2(avg_h264_qpel, 0, 16, depth);\
03148 dspfunc2(avg_h264_qpel, 1, 8, depth);\
03149 dspfunc2(avg_h264_qpel, 2, 4, depth);
03150
03151 switch (avctx->bits_per_raw_sample) {
03152 case 9:
03153 if (c->dct_bits == 32) {
03154 BIT_DEPTH_FUNCS(9, _32);
03155 } else {
03156 BIT_DEPTH_FUNCS(9, _16);
03157 }
03158 break;
03159 case 10:
03160 if (c->dct_bits == 32) {
03161 BIT_DEPTH_FUNCS(10, _32);
03162 } else {
03163 BIT_DEPTH_FUNCS(10, _16);
03164 }
03165 break;
03166 default:
03167 BIT_DEPTH_FUNCS(8, _16);
03168 break;
03169 }
03170
03171
03172 if (HAVE_MMX) ff_dsputil_init_mmx (c, avctx);
03173 if (ARCH_ARM) ff_dsputil_init_arm (c, avctx);
03174 if (HAVE_VIS) ff_dsputil_init_vis (c, avctx);
03175 if (ARCH_ALPHA) ff_dsputil_init_alpha (c, avctx);
03176 if (ARCH_PPC) ff_dsputil_init_ppc (c, avctx);
03177 if (HAVE_MMI) ff_dsputil_init_mmi (c, avctx);
03178 if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx);
03179 if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx);
03180
03181 for (i = 0; i < 4; i++) {
03182 for (j = 0; j < 16; j++) {
03183 if(!c->put_2tap_qpel_pixels_tab[i][j])
03184 c->put_2tap_qpel_pixels_tab[i][j] =
03185 c->put_h264_qpel_pixels_tab[i][j];
03186 if(!c->avg_2tap_qpel_pixels_tab[i][j])
03187 c->avg_2tap_qpel_pixels_tab[i][j] =
03188 c->avg_h264_qpel_pixels_tab[i][j];
03189 }
03190 }
03191
03192 ff_init_scantable_permutation(c->idct_permutation,
03193 c->idct_permutation_type);
03194 }
03195
03196 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
03197 {
03198 ff_dsputil_init(c, avctx);
03199 }