FFmpeg: libavcodec/dsputil.c Source File

00001 /*
00002  * DSP utils
00003  * Copyright (c) 2000, 2001 Fabrice Bellard
00004  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
00005  *
00006  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
00007  *
00008  * This file is part of FFmpeg.
00009  *
00010  * FFmpeg is free software; you can redistribute it and/or
00011  * modify it under the terms of the GNU Lesser General Public
00012  * License as published by the Free Software Foundation; either
00013  * version 2.1 of the License, or (at your option) any later version.
00014  *
00015  * FFmpeg is distributed in the hope that it will be useful,
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018  * Lesser General Public License for more details.
00019  *
00020  * You should have received a copy of the GNU Lesser General Public
00021  * License along with FFmpeg; if not, write to the Free Software
00022  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00023  */
00024 
00030 #include "libavutil/imgutils.h"
00031 #include "avcodec.h"
00032 #include "dsputil.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "faanidct.h"
00036 #include "mathops.h"
00037 #include "mpegvideo.h"
00038 #include "config.h"
00039 #include "ac3dec.h"
00040 #include "vorbis.h"
00041 #include "diracdsp.h"
00042 
00043 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00044 uint32_t ff_squareTbl[512] = {0, };
00045 
00046 #define BIT_DEPTH 9
00047 #include "dsputil_template.c"
00048 #undef BIT_DEPTH
00049 
00050 #define BIT_DEPTH 10
00051 #include "dsputil_template.c"
00052 #undef BIT_DEPTH
00053 
00054 #define BIT_DEPTH 8
00055 #include "dsputil_template.c"
00056 
00057 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
00058 #define pb_7f (~0UL/255 * 0x7f)
00059 #define pb_80 (~0UL/255 * 0x80)
00060 
00061 const uint8_t ff_zigzag_direct[64] = {
00062     0,   1,  8, 16,  9,  2,  3, 10,
00063     17, 24, 32, 25, 18, 11,  4,  5,
00064     12, 19, 26, 33, 40, 48, 41, 34,
00065     27, 20, 13,  6,  7, 14, 21, 28,
00066     35, 42, 49, 56, 57, 50, 43, 36,
00067     29, 22, 15, 23, 30, 37, 44, 51,
00068     58, 59, 52, 45, 38, 31, 39, 46,
00069     53, 60, 61, 54, 47, 55, 62, 63
00070 };
00071 
00072 /* Specific zigzag scan for 248 idct. NOTE that unlike the
00073    specification, we interleave the fields */
00074 const uint8_t ff_zigzag248_direct[64] = {
00075      0,  8,  1,  9, 16, 24,  2, 10,
00076     17, 25, 32, 40, 48, 56, 33, 41,
00077     18, 26,  3, 11,  4, 12, 19, 27,
00078     34, 42, 49, 57, 50, 58, 35, 43,
00079     20, 28,  5, 13,  6, 14, 21, 29,
00080     36, 44, 51, 59, 52, 60, 37, 45,
00081     22, 30,  7, 15, 23, 31, 38, 46,
00082     53, 61, 54, 62, 39, 47, 55, 63,
00083 };
00084 
00085 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
00086 DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
00087 
00088 const uint8_t ff_alternate_horizontal_scan[64] = {
00089     0,  1,   2,  3,  8,  9, 16, 17,
00090     10, 11,  4,  5,  6,  7, 15, 14,
00091     13, 12, 19, 18, 24, 25, 32, 33,
00092     26, 27, 20, 21, 22, 23, 28, 29,
00093     30, 31, 34, 35, 40, 41, 48, 49,
00094     42, 43, 36, 37, 38, 39, 44, 45,
00095     46, 47, 50, 51, 56, 57, 58, 59,
00096     52, 53, 54, 55, 60, 61, 62, 63,
00097 };
00098 
00099 const uint8_t ff_alternate_vertical_scan[64] = {
00100     0,  8,  16, 24,  1,  9,  2, 10,
00101     17, 25, 32, 40, 48, 56, 57, 49,
00102     41, 33, 26, 18,  3, 11,  4, 12,
00103     19, 27, 34, 42, 50, 58, 35, 43,
00104     51, 59, 20, 28,  5, 13,  6, 14,
00105     21, 29, 36, 44, 52, 60, 37, 45,
00106     53, 61, 22, 30,  7, 15, 23, 31,
00107     38, 46, 54, 62, 39, 47, 55, 63,
00108 };
00109 
00110 /* Input permutation for the simple_idct_mmx */
00111 static const uint8_t simple_mmx_permutation[64]={
00112         0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00113         0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00114         0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00115         0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00116         0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00117         0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00118         0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00119         0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00120 };
00121 
00122 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00123 
00124 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00125     int i;
00126     int end;
00127 
00128     st->scantable= src_scantable;
00129 
00130     for(i=0; i<64; i++){
00131         int j;
00132         j = src_scantable[i];
00133         st->permutated[i] = permutation[j];
00134     }
00135 
00136     end=-1;
00137     for(i=0; i<64; i++){
00138         int j;
00139         j = st->permutated[i];
00140         if(j>end) end=j;
00141         st->raster_end[i]= end;
00142     }
00143 }
00144 
00145 void ff_init_scantable_permutation(uint8_t *idct_permutation,
00146                                    int idct_permutation_type)
00147 {
00148     int i;
00149 
00150     switch(idct_permutation_type){
00151     case FF_NO_IDCT_PERM:
00152         for(i=0; i<64; i++)
00153             idct_permutation[i]= i;
00154         break;
00155     case FF_LIBMPEG2_IDCT_PERM:
00156         for(i=0; i<64; i++)
00157             idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00158         break;
00159     case FF_SIMPLE_IDCT_PERM:
00160         for(i=0; i<64; i++)
00161             idct_permutation[i]= simple_mmx_permutation[i];
00162         break;
00163     case FF_TRANSPOSE_IDCT_PERM:
00164         for(i=0; i<64; i++)
00165             idct_permutation[i]= ((i&7)<<3) | (i>>3);
00166         break;
00167     case FF_PARTTRANS_IDCT_PERM:
00168         for(i=0; i<64; i++)
00169             idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
00170         break;
00171     case FF_SSE2_IDCT_PERM:
00172         for(i=0; i<64; i++)
00173             idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
00174         break;
00175     default:
00176         av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
00177     }
00178 }
00179 
00180 static int pix_sum_c(uint8_t * pix, int line_size)
00181 {
00182     int s, i, j;
00183 
00184     s = 0;
00185     for (i = 0; i < 16; i++) {
00186         for (j = 0; j < 16; j += 8) {
00187             s += pix[0];
00188             s += pix[1];
00189             s += pix[2];
00190             s += pix[3];
00191             s += pix[4];
00192             s += pix[5];
00193             s += pix[6];
00194             s += pix[7];
00195             pix += 8;
00196         }
00197         pix += line_size - 16;
00198     }
00199     return s;
00200 }
00201 
00202 static int pix_norm1_c(uint8_t * pix, int line_size)
00203 {
00204     int s, i, j;
00205     uint32_t *sq = ff_squareTbl + 256;
00206 
00207     s = 0;
00208     for (i = 0; i < 16; i++) {
00209         for (j = 0; j < 16; j += 8) {
00210 #if 0
00211             s += sq[pix[0]];
00212             s += sq[pix[1]];
00213             s += sq[pix[2]];
00214             s += sq[pix[3]];
00215             s += sq[pix[4]];
00216             s += sq[pix[5]];
00217             s += sq[pix[6]];
00218             s += sq[pix[7]];
00219 #else
00220 #if HAVE_FAST_64BIT
00221             register uint64_t x=*(uint64_t*)pix;
00222             s += sq[x&0xff];
00223             s += sq[(x>>8)&0xff];
00224             s += sq[(x>>16)&0xff];
00225             s += sq[(x>>24)&0xff];
00226             s += sq[(x>>32)&0xff];
00227             s += sq[(x>>40)&0xff];
00228             s += sq[(x>>48)&0xff];
00229             s += sq[(x>>56)&0xff];
00230 #else
00231             register uint32_t x=*(uint32_t*)pix;
00232             s += sq[x&0xff];
00233             s += sq[(x>>8)&0xff];
00234             s += sq[(x>>16)&0xff];
00235             s += sq[(x>>24)&0xff];
00236             x=*(uint32_t*)(pix+4);
00237             s += sq[x&0xff];
00238             s += sq[(x>>8)&0xff];
00239             s += sq[(x>>16)&0xff];
00240             s += sq[(x>>24)&0xff];
00241 #endif
00242 #endif
00243             pix += 8;
00244         }
00245         pix += line_size - 16;
00246     }
00247     return s;
00248 }
00249 
00250 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00251     int i;
00252 
00253     for(i=0; i+8<=w; i+=8){
00254         dst[i+0]= av_bswap32(src[i+0]);
00255         dst[i+1]= av_bswap32(src[i+1]);
00256         dst[i+2]= av_bswap32(src[i+2]);
00257         dst[i+3]= av_bswap32(src[i+3]);
00258         dst[i+4]= av_bswap32(src[i+4]);
00259         dst[i+5]= av_bswap32(src[i+5]);
00260         dst[i+6]= av_bswap32(src[i+6]);
00261         dst[i+7]= av_bswap32(src[i+7]);
00262     }
00263     for(;i<w; i++){
00264         dst[i+0]= av_bswap32(src[i+0]);
00265     }
00266 }
00267 
00268 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
00269 {
00270     while (len--)
00271         *dst++ = av_bswap16(*src++);
00272 }
00273 
00274 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00275 {
00276     int s, i;
00277     uint32_t *sq = ff_squareTbl + 256;
00278 
00279     s = 0;
00280     for (i = 0; i < h; i++) {
00281         s += sq[pix1[0] - pix2[0]];
00282         s += sq[pix1[1] - pix2[1]];
00283         s += sq[pix1[2] - pix2[2]];
00284         s += sq[pix1[3] - pix2[3]];
00285         pix1 += line_size;
00286         pix2 += line_size;
00287     }
00288     return s;
00289 }
00290 
00291 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00292 {
00293     int s, i;
00294     uint32_t *sq = ff_squareTbl + 256;
00295 
00296     s = 0;
00297     for (i = 0; i < h; i++) {
00298         s += sq[pix1[0] - pix2[0]];
00299         s += sq[pix1[1] - pix2[1]];
00300         s += sq[pix1[2] - pix2[2]];
00301         s += sq[pix1[3] - pix2[3]];
00302         s += sq[pix1[4] - pix2[4]];
00303         s += sq[pix1[5] - pix2[5]];
00304         s += sq[pix1[6] - pix2[6]];
00305         s += sq[pix1[7] - pix2[7]];
00306         pix1 += line_size;
00307         pix2 += line_size;
00308     }
00309     return s;
00310 }
00311 
00312 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00313 {
00314     int s, i;
00315     uint32_t *sq = ff_squareTbl + 256;
00316 
00317     s = 0;
00318     for (i = 0; i < h; i++) {
00319         s += sq[pix1[ 0] - pix2[ 0]];
00320         s += sq[pix1[ 1] - pix2[ 1]];
00321         s += sq[pix1[ 2] - pix2[ 2]];
00322         s += sq[pix1[ 3] - pix2[ 3]];
00323         s += sq[pix1[ 4] - pix2[ 4]];
00324         s += sq[pix1[ 5] - pix2[ 5]];
00325         s += sq[pix1[ 6] - pix2[ 6]];
00326         s += sq[pix1[ 7] - pix2[ 7]];
00327         s += sq[pix1[ 8] - pix2[ 8]];
00328         s += sq[pix1[ 9] - pix2[ 9]];
00329         s += sq[pix1[10] - pix2[10]];
00330         s += sq[pix1[11] - pix2[11]];
00331         s += sq[pix1[12] - pix2[12]];
00332         s += sq[pix1[13] - pix2[13]];
00333         s += sq[pix1[14] - pix2[14]];
00334         s += sq[pix1[15] - pix2[15]];
00335 
00336         pix1 += line_size;
00337         pix2 += line_size;
00338     }
00339     return s;
00340 }
00341 
00342 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00343                           const uint8_t *s2, int stride){
00344     int i;
00345 
00346     /* read the pixels */
00347     for(i=0;i<8;i++) {
00348         block[0] = s1[0] - s2[0];
00349         block[1] = s1[1] - s2[1];
00350         block[2] = s1[2] - s2[2];
00351         block[3] = s1[3] - s2[3];
00352         block[4] = s1[4] - s2[4];
00353         block[5] = s1[5] - s2[5];
00354         block[6] = s1[6] - s2[6];
00355         block[7] = s1[7] - s2[7];
00356         s1 += stride;
00357         s2 += stride;
00358         block += 8;
00359     }
00360 }
00361 
00362 
00363 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00364                              int line_size)
00365 {
00366     int i;
00367 
00368     /* read the pixels */
00369     for(i=0;i<8;i++) {
00370         pixels[0] = av_clip_uint8(block[0]);
00371         pixels[1] = av_clip_uint8(block[1]);
00372         pixels[2] = av_clip_uint8(block[2]);
00373         pixels[3] = av_clip_uint8(block[3]);
00374         pixels[4] = av_clip_uint8(block[4]);
00375         pixels[5] = av_clip_uint8(block[5]);
00376         pixels[6] = av_clip_uint8(block[6]);
00377         pixels[7] = av_clip_uint8(block[7]);
00378 
00379         pixels += line_size;
00380         block += 8;
00381     }
00382 }
00383 
00384 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00385                                  int line_size)
00386 {
00387     int i;
00388 
00389     /* read the pixels */
00390     for(i=0;i<4;i++) {
00391         pixels[0] = av_clip_uint8(block[0]);
00392         pixels[1] = av_clip_uint8(block[1]);
00393         pixels[2] = av_clip_uint8(block[2]);
00394         pixels[3] = av_clip_uint8(block[3]);
00395 
00396         pixels += line_size;
00397         block += 8;
00398     }
00399 }
00400 
00401 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00402                                  int line_size)
00403 {
00404     int i;
00405 
00406     /* read the pixels */
00407     for(i=0;i<2;i++) {
00408         pixels[0] = av_clip_uint8(block[0]);
00409         pixels[1] = av_clip_uint8(block[1]);
00410 
00411         pixels += line_size;
00412         block += 8;
00413     }
00414 }
00415 
00416 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
00417                                     uint8_t *restrict pixels,
00418                                     int line_size)
00419 {
00420     int i, j;
00421 
00422     for (i = 0; i < 8; i++) {
00423         for (j = 0; j < 8; j++) {
00424             if (*block < -128)
00425                 *pixels = 0;
00426             else if (*block > 127)
00427                 *pixels = 255;
00428             else
00429                 *pixels = (uint8_t)(*block + 128);
00430             block++;
00431             pixels++;
00432         }
00433         pixels += (line_size - 8);
00434     }
00435 }
00436 
00437 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00438                              int line_size)
00439 {
00440     int i;
00441 
00442     /* read the pixels */
00443     for(i=0;i<8;i++) {
00444         pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00445         pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00446         pixels[2] = av_clip_uint8(pixels[2] + block[2]);
00447         pixels[3] = av_clip_uint8(pixels[3] + block[3]);
00448         pixels[4] = av_clip_uint8(pixels[4] + block[4]);
00449         pixels[5] = av_clip_uint8(pixels[5] + block[5]);
00450         pixels[6] = av_clip_uint8(pixels[6] + block[6]);
00451         pixels[7] = av_clip_uint8(pixels[7] + block[7]);
00452         pixels += line_size;
00453         block += 8;
00454     }
00455 }
00456 
00457 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00458                           int line_size)
00459 {
00460     int i;
00461 
00462     /* read the pixels */
00463     for(i=0;i<4;i++) {
00464         pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00465         pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00466         pixels[2] = av_clip_uint8(pixels[2] + block[2]);
00467         pixels[3] = av_clip_uint8(pixels[3] + block[3]);
00468         pixels += line_size;
00469         block += 8;
00470     }
00471 }
00472 
00473 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00474                           int line_size)
00475 {
00476     int i;
00477 
00478     /* read the pixels */
00479     for(i=0;i<2;i++) {
00480         pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00481         pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00482         pixels += line_size;
00483         block += 8;
00484     }
00485 }
00486 
00487 static int sum_abs_dctelem_c(DCTELEM *block)
00488 {
00489     int sum=0, i;
00490     for(i=0; i<64; i++)
00491         sum+= FFABS(block[i]);
00492     return sum;
00493 }
00494 
00495 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
00496 {
00497     int i;
00498 
00499     for (i = 0; i < h; i++) {
00500         memset(block, value, 16);
00501         block += line_size;
00502     }
00503 }
00504 
00505 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
00506 {
00507     int i;
00508 
00509     for (i = 0; i < h; i++) {
00510         memset(block, value, 8);
00511         block += line_size;
00512     }
00513 }
00514 
00515 #define avg2(a,b) ((a+b+1)>>1)
00516 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
00517 
00518 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
00519 {
00520     const int A=(16-x16)*(16-y16);
00521     const int B=(   x16)*(16-y16);
00522     const int C=(16-x16)*(   y16);
00523     const int D=(   x16)*(   y16);
00524     int i;
00525 
00526     for(i=0; i<h; i++)
00527     {
00528         dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
00529         dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
00530         dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
00531         dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
00532         dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
00533         dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
00534         dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
00535         dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
00536         dst+= stride;
00537         src+= stride;
00538     }
00539 }
00540 
00541 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
00542                   int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
00543 {
00544     int y, vx, vy;
00545     const int s= 1<<shift;
00546 
00547     width--;
00548     height--;
00549 
00550     for(y=0; y<h; y++){
00551         int x;
00552 
00553         vx= ox;
00554         vy= oy;
00555         for(x=0; x<8; x++){ //XXX FIXME optimize
00556             int src_x, src_y, frac_x, frac_y, index;
00557 
00558             src_x= vx>>16;
00559             src_y= vy>>16;
00560             frac_x= src_x&(s-1);
00561             frac_y= src_y&(s-1);
00562             src_x>>=shift;
00563             src_y>>=shift;
00564 
00565             if((unsigned)src_x < width){
00566                 if((unsigned)src_y < height){
00567                     index= src_x + src_y*stride;
00568                     dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
00569                                            + src[index       +1]*   frac_x )*(s-frac_y)
00570                                         + (  src[index+stride  ]*(s-frac_x)
00571                                            + src[index+stride+1]*   frac_x )*   frac_y
00572                                         + r)>>(shift*2);
00573                 }else{
00574                     index= src_x + av_clip(src_y, 0, height)*stride;
00575                     dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
00576                                           + src[index       +1]*   frac_x )*s
00577                                         + r)>>(shift*2);
00578                 }
00579             }else{
00580                 if((unsigned)src_y < height){
00581                     index= av_clip(src_x, 0, width) + src_y*stride;
00582                     dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
00583                                            + src[index+stride  ]*   frac_y )*s
00584                                         + r)>>(shift*2);
00585                 }else{
00586                     index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
00587                     dst[y*stride + x]=    src[index         ];
00588                 }
00589             }
00590 
00591             vx+= dxx;
00592             vy+= dyx;
00593         }
00594         ox += dxy;
00595         oy += dyy;
00596     }
00597 }
00598 
00599 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00600     switch(width){
00601     case 2: put_pixels2_8_c (dst, src, stride, height); break;
00602     case 4: put_pixels4_8_c (dst, src, stride, height); break;
00603     case 8: put_pixels8_8_c (dst, src, stride, height); break;
00604     case 16:put_pixels16_8_c(dst, src, stride, height); break;
00605     }
00606 }
00607 
00608 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00609     int i,j;
00610     for (i=0; i < height; i++) {
00611       for (j=0; j < width; j++) {
00612         dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
00613       }
00614       src += stride;
00615       dst += stride;
00616     }
00617 }
00618 
00619 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00620     int i,j;
00621     for (i=0; i < height; i++) {
00622       for (j=0; j < width; j++) {
00623         dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
00624       }
00625       src += stride;
00626       dst += stride;
00627     }
00628 }
00629 
00630 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00631     int i,j;
00632     for (i=0; i < height; i++) {
00633       for (j=0; j < width; j++) {
00634         dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
00635       }
00636       src += stride;
00637       dst += stride;
00638     }
00639 }
00640 
00641 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00642     int i,j;
00643     for (i=0; i < height; i++) {
00644       for (j=0; j < width; j++) {
00645         dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
00646       }
00647       src += stride;
00648       dst += stride;
00649     }
00650 }
00651 
00652 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00653     int i,j;
00654     for (i=0; i < height; i++) {
00655       for (j=0; j < width; j++) {
00656         dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00657       }
00658       src += stride;
00659       dst += stride;
00660     }
00661 }
00662 
00663 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00664     int i,j;
00665     for (i=0; i < height; i++) {
00666       for (j=0; j < width; j++) {
00667         dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
00668       }
00669       src += stride;
00670       dst += stride;
00671     }
00672 }
00673 
00674 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00675     int i,j;
00676     for (i=0; i < height; i++) {
00677       for (j=0; j < width; j++) {
00678         dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00679       }
00680       src += stride;
00681       dst += stride;
00682     }
00683 }
00684 
00685 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00686     int i,j;
00687     for (i=0; i < height; i++) {
00688       for (j=0; j < width; j++) {
00689         dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
00690       }
00691       src += stride;
00692       dst += stride;
00693     }
00694 }
00695 
00696 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00697     switch(width){
00698     case 2: avg_pixels2_8_c (dst, src, stride, height); break;
00699     case 4: avg_pixels4_8_c (dst, src, stride, height); break;
00700     case 8: avg_pixels8_8_c (dst, src, stride, height); break;
00701     case 16:avg_pixels16_8_c(dst, src, stride, height); break;
00702     }
00703 }
00704 
00705 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00706     int i,j;
00707     for (i=0; i < height; i++) {
00708       for (j=0; j < width; j++) {
00709         dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
00710       }
00711       src += stride;
00712       dst += stride;
00713     }
00714 }
00715 
00716 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00717     int i,j;
00718     for (i=0; i < height; i++) {
00719       for (j=0; j < width; j++) {
00720         dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
00721       }
00722       src += stride;
00723       dst += stride;
00724     }
00725 }
00726 
00727 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00728     int i,j;
00729     for (i=0; i < height; i++) {
00730       for (j=0; j < width; j++) {
00731         dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
00732       }
00733       src += stride;
00734       dst += stride;
00735     }
00736 }
00737 
00738 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00739     int i,j;
00740     for (i=0; i < height; i++) {
00741       for (j=0; j < width; j++) {
00742         dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00743       }
00744       src += stride;
00745       dst += stride;
00746     }
00747 }
00748 
00749 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00750     int i,j;
00751     for (i=0; i < height; i++) {
00752       for (j=0; j < width; j++) {
00753         dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00754       }
00755       src += stride;
00756       dst += stride;
00757     }
00758 }
00759 
00760 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00761     int i,j;
00762     for (i=0; i < height; i++) {
00763       for (j=0; j < width; j++) {
00764         dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
00765       }
00766       src += stride;
00767       dst += stride;
00768     }
00769 }
00770 
00771 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00772     int i,j;
00773     for (i=0; i < height; i++) {
00774       for (j=0; j < width; j++) {
00775         dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00776       }
00777       src += stride;
00778       dst += stride;
00779     }
00780 }
00781 
00782 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00783     int i,j;
00784     for (i=0; i < height; i++) {
00785       for (j=0; j < width; j++) {
00786         dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00787       }
00788       src += stride;
00789       dst += stride;
00790     }
00791 }
00792 
00793 #define QPEL_MC(r, OPNAME, RND, OP) \
00794 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00795     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00796     int i;\
00797     for(i=0; i<h; i++)\
00798     {\
00799         OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
00800         OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
00801         OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
00802         OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
00803         OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
00804         OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
00805         OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
00806         OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
00807         dst+=dstStride;\
00808         src+=srcStride;\
00809     }\
00810 }\
00811 \
00812 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00813     const int w=8;\
00814     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00815     int i;\
00816     for(i=0; i<w; i++)\
00817     {\
00818         const int src0= src[0*srcStride];\
00819         const int src1= src[1*srcStride];\
00820         const int src2= src[2*srcStride];\
00821         const int src3= src[3*srcStride];\
00822         const int src4= src[4*srcStride];\
00823         const int src5= src[5*srcStride];\
00824         const int src6= src[6*srcStride];\
00825         const int src7= src[7*srcStride];\
00826         const int src8= src[8*srcStride];\
00827         OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
00828         OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
00829         OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
00830         OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
00831         OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
00832         OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
00833         OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
00834         OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00835         dst++;\
00836         src++;\
00837     }\
00838 }\
00839 \
00840 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00841     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00842     int i;\
00843     \
00844     for(i=0; i<h; i++)\
00845     {\
00846         OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
00847         OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
00848         OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
00849         OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
00850         OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
00851         OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
00852         OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
00853         OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
00854         OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
00855         OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
00856         OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
00857         OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
00858         OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
00859         OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
00860         OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
00861         OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
00862         dst+=dstStride;\
00863         src+=srcStride;\
00864     }\
00865 }\
00866 \
00867 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00868     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00869     int i;\
00870     const int w=16;\
00871     for(i=0; i<w; i++)\
00872     {\
00873         const int src0= src[0*srcStride];\
00874         const int src1= src[1*srcStride];\
00875         const int src2= src[2*srcStride];\
00876         const int src3= src[3*srcStride];\
00877         const int src4= src[4*srcStride];\
00878         const int src5= src[5*srcStride];\
00879         const int src6= src[6*srcStride];\
00880         const int src7= src[7*srcStride];\
00881         const int src8= src[8*srcStride];\
00882         const int src9= src[9*srcStride];\
00883         const int src10= src[10*srcStride];\
00884         const int src11= src[11*srcStride];\
00885         const int src12= src[12*srcStride];\
00886         const int src13= src[13*srcStride];\
00887         const int src14= src[14*srcStride];\
00888         const int src15= src[15*srcStride];\
00889         const int src16= src[16*srcStride];\
00890         OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
00891         OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
00892         OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
00893         OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
00894         OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
00895         OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
00896         OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
00897         OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
00898         OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
00899         OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
00900         OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
00901         OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
00902         OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
00903         OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
00904         OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
00905         OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00906         dst++;\
00907         src++;\
00908     }\
00909 }\
00910 \
00911 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
00912     uint8_t half[64];\
00913     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00914     OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
00915 }\
00916 \
00917 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
00918     OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
00919 }\
00920 \
00921 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
00922     uint8_t half[64];\
00923     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00924     OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
00925 }\
00926 \
00927 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
00928     uint8_t full[16*9];\
00929     uint8_t half[64];\
00930     copy_block9(full, src, 16, stride, 9);\
00931     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00932     OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
00933 }\
00934 \
00935 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
00936     uint8_t full[16*9];\
00937     copy_block9(full, src, 16, stride, 9);\
00938     OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
00939 }\
00940 \
00941 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
00942     uint8_t full[16*9];\
00943     uint8_t half[64];\
00944     copy_block9(full, src, 16, stride, 9);\
00945     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00946     OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
00947 }\
00948 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
00949     uint8_t full[16*9];\
00950     uint8_t halfH[72];\
00951     uint8_t halfV[64];\
00952     uint8_t halfHV[64];\
00953     copy_block9(full, src, 16, stride, 9);\
00954     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00955     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00956     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00957     OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00958 }\
00959 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
00960     uint8_t full[16*9];\
00961     uint8_t halfH[72];\
00962     uint8_t halfHV[64];\
00963     copy_block9(full, src, 16, stride, 9);\
00964     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00965     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
00966     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00967     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00968 }\
00969 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
00970     uint8_t full[16*9];\
00971     uint8_t halfH[72];\
00972     uint8_t halfV[64];\
00973     uint8_t halfHV[64];\
00974     copy_block9(full, src, 16, stride, 9);\
00975     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00976     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
00977     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00978     OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00979 }\
00980 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
00981     uint8_t full[16*9];\
00982     uint8_t halfH[72];\
00983     uint8_t halfHV[64];\
00984     copy_block9(full, src, 16, stride, 9);\
00985     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00986     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
00987     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00988     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00989 }\
00990 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
00991     uint8_t full[16*9];\
00992     uint8_t halfH[72];\
00993     uint8_t halfV[64];\
00994     uint8_t halfHV[64];\
00995     copy_block9(full, src, 16, stride, 9);\
00996     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00997     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00998     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00999     OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01000 }\
01001 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01002     uint8_t full[16*9];\
01003     uint8_t halfH[72];\
01004     uint8_t halfHV[64];\
01005     copy_block9(full, src, 16, stride, 9);\
01006     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01007     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01008     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01009     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01010 }\
01011 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01012     uint8_t full[16*9];\
01013     uint8_t halfH[72];\
01014     uint8_t halfV[64];\
01015     uint8_t halfHV[64];\
01016     copy_block9(full, src, 16, stride, 9);\
01017     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
01018     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01019     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01020     OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01021 }\
01022 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01023     uint8_t full[16*9];\
01024     uint8_t halfH[72];\
01025     uint8_t halfHV[64];\
01026     copy_block9(full, src, 16, stride, 9);\
01027     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01028     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01029     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01030     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01031 }\
01032 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01033     uint8_t halfH[72];\
01034     uint8_t halfHV[64];\
01035     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01036     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01037     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01038 }\
01039 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01040     uint8_t halfH[72];\
01041     uint8_t halfHV[64];\
01042     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01043     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01044     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01045 }\
01046 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01047     uint8_t full[16*9];\
01048     uint8_t halfH[72];\
01049     uint8_t halfV[64];\
01050     uint8_t halfHV[64];\
01051     copy_block9(full, src, 16, stride, 9);\
01052     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01053     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01054     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01055     OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01056 }\
01057 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01058     uint8_t full[16*9];\
01059     uint8_t halfH[72];\
01060     copy_block9(full, src, 16, stride, 9);\
01061     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01062     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01063     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01064 }\
01065 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01066     uint8_t full[16*9];\
01067     uint8_t halfH[72];\
01068     uint8_t halfV[64];\
01069     uint8_t halfHV[64];\
01070     copy_block9(full, src, 16, stride, 9);\
01071     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01072     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01073     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01074     OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01075 }\
01076 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01077     uint8_t full[16*9];\
01078     uint8_t halfH[72];\
01079     copy_block9(full, src, 16, stride, 9);\
01080     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01081     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01082     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01083 }\
01084 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01085     uint8_t halfH[72];\
01086     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01087     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01088 }\
01089 \
01090 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01091     uint8_t half[256];\
01092     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01093     OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
01094 }\
01095 \
01096 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01097     OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01098 }\
01099 \
01100 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01101     uint8_t half[256];\
01102     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01103     OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
01104 }\
01105 \
01106 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01107     uint8_t full[24*17];\
01108     uint8_t half[256];\
01109     copy_block17(full, src, 24, stride, 17);\
01110     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01111     OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
01112 }\
01113 \
01114 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01115     uint8_t full[24*17];\
01116     copy_block17(full, src, 24, stride, 17);\
01117     OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01118 }\
01119 \
01120 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01121     uint8_t full[24*17];\
01122     uint8_t half[256];\
01123     copy_block17(full, src, 24, stride, 17);\
01124     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01125     OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
01126 }\
01127 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01128     uint8_t full[24*17];\
01129     uint8_t halfH[272];\
01130     uint8_t halfV[256];\
01131     uint8_t halfHV[256];\
01132     copy_block17(full, src, 24, stride, 17);\
01133     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01134     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01135     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01136     OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01137 }\
01138 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01139     uint8_t full[24*17];\
01140     uint8_t halfH[272];\
01141     uint8_t halfHV[256];\
01142     copy_block17(full, src, 24, stride, 17);\
01143     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01144     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01145     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01146     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01147 }\
01148 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01149     uint8_t full[24*17];\
01150     uint8_t halfH[272];\
01151     uint8_t halfV[256];\
01152     uint8_t halfHV[256];\
01153     copy_block17(full, src, 24, stride, 17);\
01154     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01155     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01156     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01157     OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01158 }\
01159 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01160     uint8_t full[24*17];\
01161     uint8_t halfH[272];\
01162     uint8_t halfHV[256];\
01163     copy_block17(full, src, 24, stride, 17);\
01164     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01165     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01166     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01167     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01168 }\
01169 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01170     uint8_t full[24*17];\
01171     uint8_t halfH[272];\
01172     uint8_t halfV[256];\
01173     uint8_t halfHV[256];\
01174     copy_block17(full, src, 24, stride, 17);\
01175     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01176     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01177     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01178     OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01179 }\
01180 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01181     uint8_t full[24*17];\
01182     uint8_t halfH[272];\
01183     uint8_t halfHV[256];\
01184     copy_block17(full, src, 24, stride, 17);\
01185     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01186     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01187     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01188     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01189 }\
01190 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01191     uint8_t full[24*17];\
01192     uint8_t halfH[272];\
01193     uint8_t halfV[256];\
01194     uint8_t halfHV[256];\
01195     copy_block17(full, src, 24, stride, 17);\
01196     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
01197     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01198     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01199     OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01200 }\
01201 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01202     uint8_t full[24*17];\
01203     uint8_t halfH[272];\
01204     uint8_t halfHV[256];\
01205     copy_block17(full, src, 24, stride, 17);\
01206     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01207     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01208     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01209     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01210 }\
01211 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01212     uint8_t halfH[272];\
01213     uint8_t halfHV[256];\
01214     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01215     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01216     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01217 }\
01218 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01219     uint8_t halfH[272];\
01220     uint8_t halfHV[256];\
01221     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01222     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01223     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01224 }\
01225 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01226     uint8_t full[24*17];\
01227     uint8_t halfH[272];\
01228     uint8_t halfV[256];\
01229     uint8_t halfHV[256];\
01230     copy_block17(full, src, 24, stride, 17);\
01231     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01232     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01233     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01234     OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01235 }\
01236 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01237     uint8_t full[24*17];\
01238     uint8_t halfH[272];\
01239     copy_block17(full, src, 24, stride, 17);\
01240     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01241     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01242     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01243 }\
01244 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01245     uint8_t full[24*17];\
01246     uint8_t halfH[272];\
01247     uint8_t halfV[256];\
01248     uint8_t halfHV[256];\
01249     copy_block17(full, src, 24, stride, 17);\
01250     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01251     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01252     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01253     OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01254 }\
01255 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01256     uint8_t full[24*17];\
01257     uint8_t halfH[272];\
01258     copy_block17(full, src, 24, stride, 17);\
01259     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01260     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01261     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01262 }\
01263 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01264     uint8_t halfH[272];\
01265     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01266     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01267 }
01268 
01269 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01270 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
01271 #define op_put(a, b) a = cm[((b) + 16)>>5]
01272 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
01273 
01274 QPEL_MC(0, put_       , _       , op_put)
01275 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
01276 QPEL_MC(0, avg_       , _       , op_avg)
01277 //QPEL_MC(1, avg_no_rnd , _       , op_avg)
01278 #undef op_avg
01279 #undef op_avg_no_rnd
01280 #undef op_put
01281 #undef op_put_no_rnd
01282 
01283 #define put_qpel8_mc00_c  ff_put_pixels8x8_c
01284 #define avg_qpel8_mc00_c  ff_avg_pixels8x8_c
01285 #define put_qpel16_mc00_c ff_put_pixels16x16_c
01286 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
01287 #define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
01288 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
01289 
01290 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
01291     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01292     int i;
01293 
01294     for(i=0; i<h; i++){
01295         dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
01296         dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
01297         dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
01298         dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
01299         dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
01300         dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
01301         dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
01302         dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
01303         dst+=dstStride;
01304         src+=srcStride;
01305     }
01306 }
01307 
01308 #if CONFIG_RV40_DECODER
01309 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01310     put_pixels16_xy2_8_c(dst, src, stride, 16);
01311 }
01312 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01313     avg_pixels16_xy2_8_c(dst, src, stride, 16);
01314 }
01315 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01316     put_pixels8_xy2_8_c(dst, src, stride, 8);
01317 }
01318 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01319     avg_pixels8_xy2_8_c(dst, src, stride, 8);
01320 }
01321 #endif /* CONFIG_RV40_DECODER */
01322 
01323 #if CONFIG_DIRAC_DECODER
01324 #define DIRAC_MC(OPNAME)\
01325 void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01326 {\
01327      OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
01328 }\
01329 void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01330 {\
01331     OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
01332 }\
01333 void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01334 {\
01335     OPNAME ## _pixels16_8_c(dst   , src[0]   , stride, h);\
01336     OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
01337 }\
01338 void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01339 {\
01340     OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
01341 }\
01342 void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01343 {\
01344     OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
01345 }\
01346 void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01347 {\
01348     OPNAME ## _pixels16_l2_8(dst   , src[0]   , src[1]   , stride, stride, stride, h);\
01349     OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
01350 }\
01351 void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01352 {\
01353     OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
01354 }\
01355 void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01356 {\
01357     OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
01358 }\
01359 void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01360 {\
01361     OPNAME ## _pixels16_l4_8(dst   , src[0]   , src[1]   , src[2]   , src[3]   , stride, stride, stride, stride, stride, h);\
01362     OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
01363 }
01364 DIRAC_MC(put)
01365 DIRAC_MC(avg)
01366 #endif
01367 
01368 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
01369     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01370     int i;
01371 
01372     for(i=0; i<w; i++){
01373         const int src_1= src[ -srcStride];
01374         const int src0 = src[0          ];
01375         const int src1 = src[  srcStride];
01376         const int src2 = src[2*srcStride];
01377         const int src3 = src[3*srcStride];
01378         const int src4 = src[4*srcStride];
01379         const int src5 = src[5*srcStride];
01380         const int src6 = src[6*srcStride];
01381         const int src7 = src[7*srcStride];
01382         const int src8 = src[8*srcStride];
01383         const int src9 = src[9*srcStride];
01384         dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
01385         dst[1*dstStride]= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4];
01386         dst[2*dstStride]= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4];
01387         dst[3*dstStride]= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4];
01388         dst[4*dstStride]= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4];
01389         dst[5*dstStride]= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4];
01390         dst[6*dstStride]= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4];
01391         dst[7*dstStride]= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4];
01392         src++;
01393         dst++;
01394     }
01395 }
01396 
01397 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
01398     uint8_t half[64];
01399     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01400     put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
01401 }
01402 
01403 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
01404     wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
01405 }
01406 
01407 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
01408     uint8_t half[64];
01409     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01410     put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
01411 }
01412 
01413 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
01414     wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
01415 }
01416 
01417 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
01418     uint8_t halfH[88];
01419     uint8_t halfV[64];
01420     uint8_t halfHV[64];
01421     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01422     wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
01423     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01424     put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01425 }
01426 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
01427     uint8_t halfH[88];
01428     uint8_t halfV[64];
01429     uint8_t halfHV[64];
01430     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01431     wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
01432     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01433     put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01434 }
01435 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
01436     uint8_t halfH[88];
01437     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01438     wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
01439 }
01440 
01441 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
01442     if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01443     int x;
01444     const int strength= ff_h263_loop_filter_strength[qscale];
01445 
01446     for(x=0; x<8; x++){
01447         int d1, d2, ad1;
01448         int p0= src[x-2*stride];
01449         int p1= src[x-1*stride];
01450         int p2= src[x+0*stride];
01451         int p3= src[x+1*stride];
01452         int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01453 
01454         if     (d<-2*strength) d1= 0;
01455         else if(d<-  strength) d1=-2*strength - d;
01456         else if(d<   strength) d1= d;
01457         else if(d< 2*strength) d1= 2*strength - d;
01458         else                   d1= 0;
01459 
01460         p1 += d1;
01461         p2 -= d1;
01462         if(p1&256) p1= ~(p1>>31);
01463         if(p2&256) p2= ~(p2>>31);
01464 
01465         src[x-1*stride] = p1;
01466         src[x+0*stride] = p2;
01467 
01468         ad1= FFABS(d1)>>1;
01469 
01470         d2= av_clip((p0-p3)/4, -ad1, ad1);
01471 
01472         src[x-2*stride] = p0 - d2;
01473         src[x+  stride] = p3 + d2;
01474     }
01475     }
01476 }
01477 
01478 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
01479     if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01480     int y;
01481     const int strength= ff_h263_loop_filter_strength[qscale];
01482 
01483     for(y=0; y<8; y++){
01484         int d1, d2, ad1;
01485         int p0= src[y*stride-2];
01486         int p1= src[y*stride-1];
01487         int p2= src[y*stride+0];
01488         int p3= src[y*stride+1];
01489         int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01490 
01491         if     (d<-2*strength) d1= 0;
01492         else if(d<-  strength) d1=-2*strength - d;
01493         else if(d<   strength) d1= d;
01494         else if(d< 2*strength) d1= 2*strength - d;
01495         else                   d1= 0;
01496 
01497         p1 += d1;
01498         p2 -= d1;
01499         if(p1&256) p1= ~(p1>>31);
01500         if(p2&256) p2= ~(p2>>31);
01501 
01502         src[y*stride-1] = p1;
01503         src[y*stride+0] = p2;
01504 
01505         ad1= FFABS(d1)>>1;
01506 
01507         d2= av_clip((p0-p3)/4, -ad1, ad1);
01508 
01509         src[y*stride-2] = p0 - d2;
01510         src[y*stride+1] = p3 + d2;
01511     }
01512     }
01513 }
01514 
01515 static void h261_loop_filter_c(uint8_t *src, int stride){
01516     int x,y,xy,yz;
01517     int temp[64];
01518 
01519     for(x=0; x<8; x++){
01520         temp[x      ] = 4*src[x           ];
01521         temp[x + 7*8] = 4*src[x + 7*stride];
01522     }
01523     for(y=1; y<7; y++){
01524         for(x=0; x<8; x++){
01525             xy = y * stride + x;
01526             yz = y * 8 + x;
01527             temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
01528         }
01529     }
01530 
01531     for(y=0; y<8; y++){
01532         src[  y*stride] = (temp[  y*8] + 2)>>2;
01533         src[7+y*stride] = (temp[7+y*8] + 2)>>2;
01534         for(x=1; x<7; x++){
01535             xy = y * stride + x;
01536             yz = y * 8 + x;
01537             src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
01538         }
01539     }
01540 }
01541 
01542 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01543 {
01544     int s, i;
01545 
01546     s = 0;
01547     for(i=0;i<h;i++) {
01548         s += abs(pix1[0] - pix2[0]);
01549         s += abs(pix1[1] - pix2[1]);
01550         s += abs(pix1[2] - pix2[2]);
01551         s += abs(pix1[3] - pix2[3]);
01552         s += abs(pix1[4] - pix2[4]);
01553         s += abs(pix1[5] - pix2[5]);
01554         s += abs(pix1[6] - pix2[6]);
01555         s += abs(pix1[7] - pix2[7]);
01556         s += abs(pix1[8] - pix2[8]);
01557         s += abs(pix1[9] - pix2[9]);
01558         s += abs(pix1[10] - pix2[10]);
01559         s += abs(pix1[11] - pix2[11]);
01560         s += abs(pix1[12] - pix2[12]);
01561         s += abs(pix1[13] - pix2[13]);
01562         s += abs(pix1[14] - pix2[14]);
01563         s += abs(pix1[15] - pix2[15]);
01564         pix1 += line_size;
01565         pix2 += line_size;
01566     }
01567     return s;
01568 }
01569 
01570 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01571 {
01572     int s, i;
01573 
01574     s = 0;
01575     for(i=0;i<h;i++) {
01576         s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01577         s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01578         s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01579         s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01580         s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01581         s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01582         s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01583         s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01584         s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
01585         s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
01586         s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
01587         s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
01588         s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
01589         s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
01590         s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
01591         s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
01592         pix1 += line_size;
01593         pix2 += line_size;
01594     }
01595     return s;
01596 }
01597 
01598 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01599 {
01600     int s, i;
01601     uint8_t *pix3 = pix2 + line_size;
01602 
01603     s = 0;
01604     for(i=0;i<h;i++) {
01605         s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01606         s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01607         s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01608         s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01609         s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01610         s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01611         s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01612         s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01613         s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
01614         s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
01615         s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
01616         s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
01617         s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
01618         s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
01619         s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
01620         s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
01621         pix1 += line_size;
01622         pix2 += line_size;
01623         pix3 += line_size;
01624     }
01625     return s;
01626 }
01627 
01628 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01629 {
01630     int s, i;
01631     uint8_t *pix3 = pix2 + line_size;
01632 
01633     s = 0;
01634     for(i=0;i<h;i++) {
01635         s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01636         s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01637         s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01638         s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01639         s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01640         s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01641         s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01642         s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01643         s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
01644         s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
01645         s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
01646         s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
01647         s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
01648         s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
01649         s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
01650         s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
01651         pix1 += line_size;
01652         pix2 += line_size;
01653         pix3 += line_size;
01654     }
01655     return s;
01656 }
01657 
01658 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01659 {
01660     int s, i;
01661 
01662     s = 0;
01663     for(i=0;i<h;i++) {
01664         s += abs(pix1[0] - pix2[0]);
01665         s += abs(pix1[1] - pix2[1]);
01666         s += abs(pix1[2] - pix2[2]);
01667         s += abs(pix1[3] - pix2[3]);
01668         s += abs(pix1[4] - pix2[4]);
01669         s += abs(pix1[5] - pix2[5]);
01670         s += abs(pix1[6] - pix2[6]);
01671         s += abs(pix1[7] - pix2[7]);
01672         pix1 += line_size;
01673         pix2 += line_size;
01674     }
01675     return s;
01676 }
01677 
01678 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01679 {
01680     int s, i;
01681 
01682     s = 0;
01683     for(i=0;i<h;i++) {
01684         s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01685         s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01686         s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01687         s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01688         s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01689         s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01690         s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01691         s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01692         pix1 += line_size;
01693         pix2 += line_size;
01694     }
01695     return s;
01696 }
01697 
01698 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01699 {
01700     int s, i;
01701     uint8_t *pix3 = pix2 + line_size;
01702 
01703     s = 0;
01704     for(i=0;i<h;i++) {
01705         s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01706         s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01707         s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01708         s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01709         s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01710         s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01711         s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01712         s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01713         pix1 += line_size;
01714         pix2 += line_size;
01715         pix3 += line_size;
01716     }
01717     return s;
01718 }
01719 
01720 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01721 {
01722     int s, i;
01723     uint8_t *pix3 = pix2 + line_size;
01724 
01725     s = 0;
01726     for(i=0;i<h;i++) {
01727         s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01728         s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01729         s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01730         s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01731         s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01732         s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01733         s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01734         s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01735         pix1 += line_size;
01736         pix2 += line_size;
01737         pix3 += line_size;
01738     }
01739     return s;
01740 }
01741 
01742 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01743     MpegEncContext *c = v;
01744     int score1=0;
01745     int score2=0;
01746     int x,y;
01747 
01748     for(y=0; y<h; y++){
01749         for(x=0; x<16; x++){
01750             score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
01751         }
01752         if(y+1<h){
01753             for(x=0; x<15; x++){
01754                 score2+= FFABS(  s1[x  ] - s1[x  +stride]
01755                              - s1[x+1] + s1[x+1+stride])
01756                         -FFABS(  s2[x  ] - s2[x  +stride]
01757                              - s2[x+1] + s2[x+1+stride]);
01758             }
01759         }
01760         s1+= stride;
01761         s2+= stride;
01762     }
01763 
01764     if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01765     else  return score1 + FFABS(score2)*8;
01766 }
01767 
01768 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01769     MpegEncContext *c = v;
01770     int score1=0;
01771     int score2=0;
01772     int x,y;
01773 
01774     for(y=0; y<h; y++){
01775         for(x=0; x<8; x++){
01776             score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
01777         }
01778         if(y+1<h){
01779             for(x=0; x<7; x++){
01780                 score2+= FFABS(  s1[x  ] - s1[x  +stride]
01781                              - s1[x+1] + s1[x+1+stride])
01782                         -FFABS(  s2[x  ] - s2[x  +stride]
01783                              - s2[x+1] + s2[x+1+stride]);
01784             }
01785         }
01786         s1+= stride;
01787         s2+= stride;
01788     }
01789 
01790     if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01791     else  return score1 + FFABS(score2)*8;
01792 }
01793 
01794 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
01795     int i;
01796     unsigned int sum=0;
01797 
01798     for(i=0; i<8*8; i++){
01799         int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
01800         int w= weight[i];
01801         b>>= RECON_SHIFT;
01802         assert(-512<b && b<512);
01803 
01804         sum += (w*b)*(w*b)>>4;
01805     }
01806     return sum>>2;
01807 }
01808 
01809 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
01810     int i;
01811 
01812     for(i=0; i<8*8; i++){
01813         rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
01814     }
01815 }
01816 
01825 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
01826 {
01827     int i;
01828     DCTELEM temp[64];
01829 
01830     if(last<=0) return;
01831     //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
01832 
01833     for(i=0; i<=last; i++){
01834         const int j= scantable[i];
01835         temp[j]= block[j];
01836         block[j]=0;
01837     }
01838 
01839     for(i=0; i<=last; i++){
01840         const int j= scantable[i];
01841         const int perm_j= permutation[j];
01842         block[perm_j]= temp[j];
01843     }
01844 }
01845 
01846 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
01847     return 0;
01848 }
01849 
01850 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
01851     int i;
01852 
01853     memset(cmp, 0, sizeof(void*)*6);
01854 
01855     for(i=0; i<6; i++){
01856         switch(type&0xFF){
01857         case FF_CMP_SAD:
01858             cmp[i]= c->sad[i];
01859             break;
01860         case FF_CMP_SATD:
01861             cmp[i]= c->hadamard8_diff[i];
01862             break;
01863         case FF_CMP_SSE:
01864             cmp[i]= c->sse[i];
01865             break;
01866         case FF_CMP_DCT:
01867             cmp[i]= c->dct_sad[i];
01868             break;
01869         case FF_CMP_DCT264:
01870             cmp[i]= c->dct264_sad[i];
01871             break;
01872         case FF_CMP_DCTMAX:
01873             cmp[i]= c->dct_max[i];
01874             break;
01875         case FF_CMP_PSNR:
01876             cmp[i]= c->quant_psnr[i];
01877             break;
01878         case FF_CMP_BIT:
01879             cmp[i]= c->bit[i];
01880             break;
01881         case FF_CMP_RD:
01882             cmp[i]= c->rd[i];
01883             break;
01884         case FF_CMP_VSAD:
01885             cmp[i]= c->vsad[i];
01886             break;
01887         case FF_CMP_VSSE:
01888             cmp[i]= c->vsse[i];
01889             break;
01890         case FF_CMP_ZERO:
01891             cmp[i]= zero_cmp;
01892             break;
01893         case FF_CMP_NSSE:
01894             cmp[i]= c->nsse[i];
01895             break;
01896 #if CONFIG_DWT
01897         case FF_CMP_W53:
01898             cmp[i]= c->w53[i];
01899             break;
01900         case FF_CMP_W97:
01901             cmp[i]= c->w97[i];
01902             break;
01903 #endif
01904         default:
01905             av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
01906         }
01907     }
01908 }
01909 
01910 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
01911     long i;
01912     for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01913         long a = *(long*)(src+i);
01914         long b = *(long*)(dst+i);
01915         *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
01916     }
01917     for(; i<w; i++)
01918         dst[i+0] += src[i+0];
01919 }
01920 
01921 static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w){
01922     long i;
01923 #if !HAVE_FAST_UNALIGNED
01924     if((long)src2 & (sizeof(long)-1)){
01925         for(i=0; i+7<w; i+=8){
01926             dst[i+0] = src1[i+0]-src2[i+0];
01927             dst[i+1] = src1[i+1]-src2[i+1];
01928             dst[i+2] = src1[i+2]-src2[i+2];
01929             dst[i+3] = src1[i+3]-src2[i+3];
01930             dst[i+4] = src1[i+4]-src2[i+4];
01931             dst[i+5] = src1[i+5]-src2[i+5];
01932             dst[i+6] = src1[i+6]-src2[i+6];
01933             dst[i+7] = src1[i+7]-src2[i+7];
01934         }
01935     }else
01936 #endif
01937     for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01938         long a = *(long*)(src1+i);
01939         long b = *(long*)(src2+i);
01940         *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
01941     }
01942     for(; i<w; i++)
01943         dst[i+0] = src1[i+0]-src2[i+0];
01944 }
01945 
01946 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
01947     int i;
01948     uint8_t l, lt;
01949 
01950     l= *left;
01951     lt= *left_top;
01952 
01953     for(i=0; i<w; i++){
01954         l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
01955         lt= src1[i];
01956         dst[i]= l;
01957     }
01958 
01959     *left= l;
01960     *left_top= lt;
01961 }
01962 
01963 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
01964     int i;
01965     uint8_t l, lt;
01966 
01967     l= *left;
01968     lt= *left_top;
01969 
01970     for(i=0; i<w; i++){
01971         const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
01972         lt= src1[i];
01973         l= src2[i];
01974         dst[i]= l - pred;
01975     }
01976 
01977     *left= l;
01978     *left_top= lt;
01979 }
01980 
01981 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
01982     int i;
01983 
01984     for(i=0; i<w-1; i++){
01985         acc+= src[i];
01986         dst[i]= acc;
01987         i++;
01988         acc+= src[i];
01989         dst[i]= acc;
01990     }
01991 
01992     for(; i<w; i++){
01993         acc+= src[i];
01994         dst[i]= acc;
01995     }
01996 
01997     return acc;
01998 }
01999 
02000 #if HAVE_BIGENDIAN
02001 #define B 3
02002 #define G 2
02003 #define R 1
02004 #define A 0
02005 #else
02006 #define B 0
02007 #define G 1
02008 #define R 2
02009 #define A 3
02010 #endif
02011 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
02012     int i;
02013     int r,g,b,a;
02014     r= *red;
02015     g= *green;
02016     b= *blue;
02017     a= *alpha;
02018 
02019     for(i=0; i<w; i++){
02020         b+= src[4*i+B];
02021         g+= src[4*i+G];
02022         r+= src[4*i+R];
02023         a+= src[4*i+A];
02024 
02025         dst[4*i+B]= b;
02026         dst[4*i+G]= g;
02027         dst[4*i+R]= r;
02028         dst[4*i+A]= a;
02029     }
02030 
02031     *red= r;
02032     *green= g;
02033     *blue= b;
02034     *alpha= a;
02035 }
02036 #undef B
02037 #undef G
02038 #undef R
02039 #undef A
02040 
02041 #define BUTTERFLY2(o1,o2,i1,i2) \
02042 o1= (i1)+(i2);\
02043 o2= (i1)-(i2);
02044 
02045 #define BUTTERFLY1(x,y) \
02046 {\
02047     int a,b;\
02048     a= x;\
02049     b= y;\
02050     x= a+b;\
02051     y= a-b;\
02052 }
02053 
02054 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
02055 
02056 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
02057     int i;
02058     int temp[64];
02059     int sum=0;
02060 
02061     assert(h==8);
02062 
02063     for(i=0; i<8; i++){
02064         //FIXME try pointer walks
02065         BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
02066         BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
02067         BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
02068         BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
02069 
02070         BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02071         BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02072         BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02073         BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02074 
02075         BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02076         BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02077         BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02078         BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02079     }
02080 
02081     for(i=0; i<8; i++){
02082         BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02083         BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02084         BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02085         BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02086 
02087         BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02088         BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02089         BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02090         BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02091 
02092         sum +=
02093              BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02094             +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02095             +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02096             +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02097     }
02098     return sum;
02099 }
02100 
02101 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
02102     int i;
02103     int temp[64];
02104     int sum=0;
02105 
02106     assert(h==8);
02107 
02108     for(i=0; i<8; i++){
02109         //FIXME try pointer walks
02110         BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
02111         BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
02112         BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
02113         BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
02114 
02115         BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02116         BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02117         BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02118         BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02119 
02120         BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02121         BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02122         BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02123         BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02124     }
02125 
02126     for(i=0; i<8; i++){
02127         BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02128         BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02129         BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02130         BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02131 
02132         BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02133         BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02134         BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02135         BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02136 
02137         sum +=
02138              BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02139             +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02140             +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02141             +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02142     }
02143 
02144     sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
02145 
02146     return sum;
02147 }
02148 
02149 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02150     MpegEncContext * const s= (MpegEncContext *)c;
02151     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02152 
02153     assert(h==8);
02154 
02155     s->dsp.diff_pixels(temp, src1, src2, stride);
02156     s->dsp.fdct(temp);
02157     return s->dsp.sum_abs_dctelem(temp);
02158 }
02159 
02160 #if CONFIG_GPL
02161 #define DCT8_1D {\
02162     const int s07 = SRC(0) + SRC(7);\
02163     const int s16 = SRC(1) + SRC(6);\
02164     const int s25 = SRC(2) + SRC(5);\
02165     const int s34 = SRC(3) + SRC(4);\
02166     const int a0 = s07 + s34;\
02167     const int a1 = s16 + s25;\
02168     const int a2 = s07 - s34;\
02169     const int a3 = s16 - s25;\
02170     const int d07 = SRC(0) - SRC(7);\
02171     const int d16 = SRC(1) - SRC(6);\
02172     const int d25 = SRC(2) - SRC(5);\
02173     const int d34 = SRC(3) - SRC(4);\
02174     const int a4 = d16 + d25 + (d07 + (d07>>1));\
02175     const int a5 = d07 - d34 - (d25 + (d25>>1));\
02176     const int a6 = d07 + d34 - (d16 + (d16>>1));\
02177     const int a7 = d16 - d25 + (d34 + (d34>>1));\
02178     DST(0,  a0 + a1     ) ;\
02179     DST(1,  a4 + (a7>>2)) ;\
02180     DST(2,  a2 + (a3>>1)) ;\
02181     DST(3,  a5 + (a6>>2)) ;\
02182     DST(4,  a0 - a1     ) ;\
02183     DST(5,  a6 - (a5>>2)) ;\
02184     DST(6, (a2>>1) - a3 ) ;\
02185     DST(7, (a4>>2) - a7 ) ;\
02186 }
02187 
02188 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02189     MpegEncContext * const s= (MpegEncContext *)c;
02190     DCTELEM dct[8][8];
02191     int i;
02192     int sum=0;
02193 
02194     s->dsp.diff_pixels(dct[0], src1, src2, stride);
02195 
02196 #define SRC(x) dct[i][x]
02197 #define DST(x,v) dct[i][x]= v
02198     for( i = 0; i < 8; i++ )
02199         DCT8_1D
02200 #undef SRC
02201 #undef DST
02202 
02203 #define SRC(x) dct[x][i]
02204 #define DST(x,v) sum += FFABS(v)
02205     for( i = 0; i < 8; i++ )
02206         DCT8_1D
02207 #undef SRC
02208 #undef DST
02209     return sum;
02210 }
02211 #endif
02212 
02213 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02214     MpegEncContext * const s= (MpegEncContext *)c;
02215     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02216     int sum=0, i;
02217 
02218     assert(h==8);
02219 
02220     s->dsp.diff_pixels(temp, src1, src2, stride);
02221     s->dsp.fdct(temp);
02222 
02223     for(i=0; i<64; i++)
02224         sum= FFMAX(sum, FFABS(temp[i]));
02225 
02226     return sum;
02227 }
02228 
02229 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02230     MpegEncContext * const s= (MpegEncContext *)c;
02231     LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
02232     DCTELEM * const bak = temp+64;
02233     int sum=0, i;
02234 
02235     assert(h==8);
02236     s->mb_intra=0;
02237 
02238     s->dsp.diff_pixels(temp, src1, src2, stride);
02239 
02240     memcpy(bak, temp, 64*sizeof(DCTELEM));
02241 
02242     s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02243     s->dct_unquantize_inter(s, temp, 0, s->qscale);
02244     ff_simple_idct_8(temp); //FIXME
02245 
02246     for(i=0; i<64; i++)
02247         sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
02248 
02249     return sum;
02250 }
02251 
02252 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02253     MpegEncContext * const s= (MpegEncContext *)c;
02254     const uint8_t *scantable= s->intra_scantable.permutated;
02255     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02256     LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
02257     LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
02258     int i, last, run, bits, level, distortion, start_i;
02259     const int esc_length= s->ac_esc_length;
02260     uint8_t * length;
02261     uint8_t * last_length;
02262 
02263     assert(h==8);
02264 
02265     copy_block8(lsrc1, src1, 8, stride, 8);
02266     copy_block8(lsrc2, src2, 8, stride, 8);
02267 
02268     s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
02269 
02270     s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02271 
02272     bits=0;
02273 
02274     if (s->mb_intra) {
02275         start_i = 1;
02276         length     = s->intra_ac_vlc_length;
02277         last_length= s->intra_ac_vlc_last_length;
02278         bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
02279     } else {
02280         start_i = 0;
02281         length     = s->inter_ac_vlc_length;
02282         last_length= s->inter_ac_vlc_last_length;
02283     }
02284 
02285     if(last>=start_i){
02286         run=0;
02287         for(i=start_i; i<last; i++){
02288             int j= scantable[i];
02289             level= temp[j];
02290 
02291             if(level){
02292                 level+=64;
02293                 if((level&(~127)) == 0){
02294                     bits+= length[UNI_AC_ENC_INDEX(run, level)];
02295                 }else
02296                     bits+= esc_length;
02297                 run=0;
02298             }else
02299                 run++;
02300         }
02301         i= scantable[last];
02302 
02303         level= temp[i] + 64;
02304 
02305         assert(level - 64);
02306 
02307         if((level&(~127)) == 0){
02308             bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02309         }else
02310             bits+= esc_length;
02311 
02312     }
02313 
02314     if(last>=0){
02315         if(s->mb_intra)
02316             s->dct_unquantize_intra(s, temp, 0, s->qscale);
02317         else
02318             s->dct_unquantize_inter(s, temp, 0, s->qscale);
02319     }
02320 
02321     s->dsp.idct_add(lsrc2, 8, temp);
02322 
02323     distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
02324 
02325     return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
02326 }
02327 
02328 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02329     MpegEncContext * const s= (MpegEncContext *)c;
02330     const uint8_t *scantable= s->intra_scantable.permutated;
02331     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02332     int i, last, run, bits, level, start_i;
02333     const int esc_length= s->ac_esc_length;
02334     uint8_t * length;
02335     uint8_t * last_length;
02336 
02337     assert(h==8);
02338 
02339     s->dsp.diff_pixels(temp, src1, src2, stride);
02340 
02341     s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02342 
02343     bits=0;
02344 
02345     if (s->mb_intra) {
02346         start_i = 1;
02347         length     = s->intra_ac_vlc_length;
02348         last_length= s->intra_ac_vlc_last_length;
02349         bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
02350     } else {
02351         start_i = 0;
02352         length     = s->inter_ac_vlc_length;
02353         last_length= s->inter_ac_vlc_last_length;
02354     }
02355 
02356     if(last>=start_i){
02357         run=0;
02358         for(i=start_i; i<last; i++){
02359             int j= scantable[i];
02360             level= temp[j];
02361 
02362             if(level){
02363                 level+=64;
02364                 if((level&(~127)) == 0){
02365                     bits+= length[UNI_AC_ENC_INDEX(run, level)];
02366                 }else
02367                     bits+= esc_length;
02368                 run=0;
02369             }else
02370                 run++;
02371         }
02372         i= scantable[last];
02373 
02374         level= temp[i] + 64;
02375 
02376         assert(level - 64);
02377 
02378         if((level&(~127)) == 0){
02379             bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02380         }else
02381             bits+= esc_length;
02382     }
02383 
02384     return bits;
02385 }
02386 
02387 #define VSAD_INTRA(size) \
02388 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02389     int score=0;                                                                                            \
02390     int x,y;                                                                                                \
02391                                                                                                             \
02392     for(y=1; y<h; y++){                                                                                     \
02393         for(x=0; x<size; x+=4){                                                                             \
02394             score+= FFABS(s[x  ] - s[x  +stride]) + FFABS(s[x+1] - s[x+1+stride])                           \
02395                    +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]);                          \
02396         }                                                                                                   \
02397         s+= stride;                                                                                         \
02398     }                                                                                                       \
02399                                                                                                             \
02400     return score;                                                                                           \
02401 }
02402 VSAD_INTRA(8)
02403 VSAD_INTRA(16)
02404 
02405 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02406     int score=0;
02407     int x,y;
02408 
02409     for(y=1; y<h; y++){
02410         for(x=0; x<16; x++){
02411             score+= FFABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
02412         }
02413         s1+= stride;
02414         s2+= stride;
02415     }
02416 
02417     return score;
02418 }
02419 
02420 #define SQ(a) ((a)*(a))
02421 #define VSSE_INTRA(size) \
02422 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02423     int score=0;                                                                                            \
02424     int x,y;                                                                                                \
02425                                                                                                             \
02426     for(y=1; y<h; y++){                                                                                     \
02427         for(x=0; x<size; x+=4){                                                                               \
02428             score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])                                 \
02429                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);                                \
02430         }                                                                                                   \
02431         s+= stride;                                                                                         \
02432     }                                                                                                       \
02433                                                                                                             \
02434     return score;                                                                                           \
02435 }
02436 VSSE_INTRA(8)
02437 VSSE_INTRA(16)
02438 
02439 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02440     int score=0;
02441     int x,y;
02442 
02443     for(y=1; y<h; y++){
02444         for(x=0; x<16; x++){
02445             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
02446         }
02447         s1+= stride;
02448         s2+= stride;
02449     }
02450 
02451     return score;
02452 }
02453 
02454 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
02455                                int size){
02456     int score=0;
02457     int i;
02458     for(i=0; i<size; i++)
02459         score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
02460     return score;
02461 }
02462 
02463 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
02464 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
02465 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
02466 #if CONFIG_GPL
02467 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
02468 #endif
02469 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
02470 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
02471 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
02472 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
02473 
02474 static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
02475     int i;
02476     for(i=0; i<len; i++)
02477         dst[i] = src0[i] * src1[i];
02478 }
02479 
02480 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
02481     int i;
02482     src1 += len-1;
02483     for(i=0; i<len; i++)
02484         dst[i] = src0[i] * src1[-i];
02485 }
02486 
02487 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
02488     int i;
02489     for(i=0; i<len; i++)
02490         dst[i] = src0[i] * src1[i] + src2[i];
02491 }
02492 
02493 static void vector_fmul_window_c(float *dst, const float *src0,
02494                                  const float *src1, const float *win, int len)
02495 {
02496     int i,j;
02497     dst += len;
02498     win += len;
02499     src0+= len;
02500     for(i=-len, j=len-1; i<0; i++, j--) {
02501         float s0 = src0[i];
02502         float s1 = src1[j];
02503         float wi = win[i];
02504         float wj = win[j];
02505         dst[i] = s0*wj - s1*wi;
02506         dst[j] = s0*wi + s1*wj;
02507     }
02508 }
02509 
02510 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
02511                                  int len)
02512 {
02513     int i;
02514     for (i = 0; i < len; i++)
02515         dst[i] = src[i] * mul;
02516 }
02517 
02518 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
02519                                  int len)
02520 {
02521     int i;
02522     for (i = 0; i < len; i++)
02523         dst[i] += src[i] * mul;
02524 }
02525 
02526 static void butterflies_float_c(float *restrict v1, float *restrict v2,
02527                                 int len)
02528 {
02529     int i;
02530     for (i = 0; i < len; i++) {
02531         float t = v1[i] - v2[i];
02532         v1[i] += v2[i];
02533         v2[i] = t;
02534     }
02535 }
02536 
02537 static void butterflies_float_interleave_c(float *dst, const float *src0,
02538                                            const float *src1, int len)
02539 {
02540     int i;
02541     for (i = 0; i < len; i++) {
02542         float f1 = src0[i];
02543         float f2 = src1[i];
02544         dst[2*i    ] = f1 + f2;
02545         dst[2*i + 1] = f1 - f2;
02546     }
02547 }
02548 
02549 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
02550 {
02551     float p = 0.0;
02552     int i;
02553 
02554     for (i = 0; i < len; i++)
02555         p += v1[i] * v2[i];
02556 
02557     return p;
02558 }
02559 
02560 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
02561                    uint32_t maxi, uint32_t maxisign)
02562 {
02563 
02564     if(a > mini) return mini;
02565     else if((a^(1U<<31)) > maxisign) return maxi;
02566     else return a;
02567 }
02568 
02569 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
02570     int i;
02571     uint32_t mini = *(uint32_t*)min;
02572     uint32_t maxi = *(uint32_t*)max;
02573     uint32_t maxisign = maxi ^ (1U<<31);
02574     uint32_t *dsti = (uint32_t*)dst;
02575     const uint32_t *srci = (const uint32_t*)src;
02576     for(i=0; i<len; i+=8) {
02577         dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
02578         dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
02579         dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
02580         dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
02581         dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
02582         dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
02583         dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
02584         dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
02585     }
02586 }
02587 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
02588     int i;
02589     if(min < 0 && max > 0) {
02590         vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
02591     } else {
02592         for(i=0; i < len; i+=8) {
02593             dst[i    ] = av_clipf(src[i    ], min, max);
02594             dst[i + 1] = av_clipf(src[i + 1], min, max);
02595             dst[i + 2] = av_clipf(src[i + 2], min, max);
02596             dst[i + 3] = av_clipf(src[i + 3], min, max);
02597             dst[i + 4] = av_clipf(src[i + 4], min, max);
02598             dst[i + 5] = av_clipf(src[i + 5], min, max);
02599             dst[i + 6] = av_clipf(src[i + 6], min, max);
02600             dst[i + 7] = av_clipf(src[i + 7], min, max);
02601         }
02602     }
02603 }
02604 
02605 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
02606 {
02607     int res = 0;
02608 
02609     while (order--)
02610         res += *v1++ * *v2++;
02611 
02612     return res;
02613 }
02614 
02615 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
02616 {
02617     int res = 0;
02618     while (order--) {
02619         res   += *v1 * *v2++;
02620         *v1++ += mul * *v3++;
02621     }
02622     return res;
02623 }
02624 
02625 static void apply_window_int16_c(int16_t *output, const int16_t *input,
02626                                  const int16_t *window, unsigned int len)
02627 {
02628     int i;
02629     int len2 = len >> 1;
02630 
02631     for (i = 0; i < len2; i++) {
02632         int16_t w       = window[i];
02633         output[i]       = (MUL16(input[i],       w) + (1 << 14)) >> 15;
02634         output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
02635     }
02636 }
02637 
02638 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
02639                                 int32_t max, unsigned int len)
02640 {
02641     do {
02642         *dst++ = av_clip(*src++, min, max);
02643         *dst++ = av_clip(*src++, min, max);
02644         *dst++ = av_clip(*src++, min, max);
02645         *dst++ = av_clip(*src++, min, max);
02646         *dst++ = av_clip(*src++, min, max);
02647         *dst++ = av_clip(*src++, min, max);
02648         *dst++ = av_clip(*src++, min, max);
02649         *dst++ = av_clip(*src++, min, max);
02650         len -= 8;
02651     } while (len > 0);
02652 }
02653 
02654 #define W0 2048
02655 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
02656 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
02657 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
02658 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
02659 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
02660 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
02661 #define W7 565  /* 2048*sqrt (2)*cos (7*pi/16) */
02662 
02663 static void wmv2_idct_row(short * b)
02664 {
02665     int s1,s2;
02666     int a0,a1,a2,a3,a4,a5,a6,a7;
02667     /*step 1*/
02668     a1 = W1*b[1]+W7*b[7];
02669     a7 = W7*b[1]-W1*b[7];
02670     a5 = W5*b[5]+W3*b[3];
02671     a3 = W3*b[5]-W5*b[3];
02672     a2 = W2*b[2]+W6*b[6];
02673     a6 = W6*b[2]-W2*b[6];
02674     a0 = W0*b[0]+W0*b[4];
02675     a4 = W0*b[0]-W0*b[4];
02676     /*step 2*/
02677     s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
02678     s2 = (181*(a1-a5-a7+a3)+128)>>8;
02679     /*step 3*/
02680     b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
02681     b[1] = (a4+a6 +s1   + (1<<7))>>8;
02682     b[2] = (a4-a6 +s2   + (1<<7))>>8;
02683     b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
02684     b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
02685     b[5] = (a4-a6 -s2   + (1<<7))>>8;
02686     b[6] = (a4+a6 -s1   + (1<<7))>>8;
02687     b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
02688 }
02689 static void wmv2_idct_col(short * b)
02690 {
02691     int s1,s2;
02692     int a0,a1,a2,a3,a4,a5,a6,a7;
02693     /*step 1, with extended precision*/
02694     a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
02695     a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
02696     a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
02697     a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
02698     a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
02699     a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
02700     a0 = (W0*b[8*0]+W0*b[8*4]    )>>3;
02701     a4 = (W0*b[8*0]-W0*b[8*4]    )>>3;
02702     /*step 2*/
02703     s1 = (181*(a1-a5+a7-a3)+128)>>8;
02704     s2 = (181*(a1-a5-a7+a3)+128)>>8;
02705     /*step 3*/
02706     b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
02707     b[8*1] = (a4+a6 +s1   + (1<<13))>>14;
02708     b[8*2] = (a4-a6 +s2   + (1<<13))>>14;
02709     b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
02710 
02711     b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
02712     b[8*5] = (a4-a6 -s2   + (1<<13))>>14;
02713     b[8*6] = (a4+a6 -s1   + (1<<13))>>14;
02714     b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
02715 }
02716 void ff_wmv2_idct_c(short * block){
02717     int i;
02718 
02719     for(i=0;i<64;i+=8){
02720         wmv2_idct_row(block+i);
02721     }
02722     for(i=0;i<8;i++){
02723         wmv2_idct_col(block+i);
02724     }
02725 }
02726 /* XXX: those functions should be suppressed ASAP when all IDCTs are
02727  converted */
02728 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
02729 {
02730     ff_wmv2_idct_c(block);
02731     ff_put_pixels_clamped_c(block, dest, line_size);
02732 }
02733 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
02734 {
02735     ff_wmv2_idct_c(block);
02736     ff_add_pixels_clamped_c(block, dest, line_size);
02737 }
02738 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
02739 {
02740     ff_j_rev_dct (block);
02741     ff_put_pixels_clamped_c(block, dest, line_size);
02742 }
02743 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
02744 {
02745     ff_j_rev_dct (block);
02746     ff_add_pixels_clamped_c(block, dest, line_size);
02747 }
02748 
02749 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
02750 {
02751     ff_j_rev_dct4 (block);
02752     put_pixels_clamped4_c(block, dest, line_size);
02753 }
02754 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
02755 {
02756     ff_j_rev_dct4 (block);
02757     add_pixels_clamped4_c(block, dest, line_size);
02758 }
02759 
02760 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
02761 {
02762     ff_j_rev_dct2 (block);
02763     put_pixels_clamped2_c(block, dest, line_size);
02764 }
02765 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
02766 {
02767     ff_j_rev_dct2 (block);
02768     add_pixels_clamped2_c(block, dest, line_size);
02769 }
02770 
02771 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
02772 {
02773     dest[0] = av_clip_uint8((block[0] + 4)>>3);
02774 }
02775 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
02776 {
02777     dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
02778 }
02779 
02780 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
02781 
02782 /* init static data */
02783 av_cold void ff_dsputil_static_init(void)
02784 {
02785     int i;
02786 
02787     for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
02788     for(i=0;i<MAX_NEG_CROP;i++) {
02789         ff_cropTbl[i] = 0;
02790         ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
02791     }
02792 
02793     for(i=0;i<512;i++) {
02794         ff_squareTbl[i] = (i - 256) * (i - 256);
02795     }
02796 
02797     for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
02798 }
02799 
02800 int ff_check_alignment(void){
02801     static int did_fail=0;
02802     LOCAL_ALIGNED_16(int, aligned, [4]);
02803 
02804     if((intptr_t)aligned & 15){
02805         if(!did_fail){
02806 #if HAVE_MMX || HAVE_ALTIVEC
02807             av_log(NULL, AV_LOG_ERROR,
02808                 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
02809                 "and may be very slow or crash. This is not a bug in libavcodec,\n"
02810                 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
02811                 "Do not report crashes to FFmpeg developers.\n");
02812 #endif
02813             did_fail=1;
02814         }
02815         return -1;
02816     }
02817     return 0;
02818 }
02819 
02820 av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
02821 {
02822     int i, j;
02823 
02824     ff_check_alignment();
02825 
02826 #if CONFIG_ENCODERS
02827     if (avctx->bits_per_raw_sample == 10) {
02828         c->fdct    = ff_jpeg_fdct_islow_10;
02829         c->fdct248 = ff_fdct248_islow_10;
02830     } else {
02831         if(avctx->dct_algo==FF_DCT_FASTINT) {
02832             c->fdct    = ff_fdct_ifast;
02833             c->fdct248 = ff_fdct_ifast248;
02834         }
02835         else if(avctx->dct_algo==FF_DCT_FAAN) {
02836             c->fdct    = ff_faandct;
02837             c->fdct248 = ff_faandct248;
02838         }
02839         else {
02840             c->fdct    = ff_jpeg_fdct_islow_8; //slow/accurate/default
02841             c->fdct248 = ff_fdct248_islow_8;
02842         }
02843     }
02844 #endif //CONFIG_ENCODERS
02845 
02846     if(avctx->lowres==1){
02847         c->idct_put= ff_jref_idct4_put;
02848         c->idct_add= ff_jref_idct4_add;
02849         c->idct    = ff_j_rev_dct4;
02850         c->idct_permutation_type= FF_NO_IDCT_PERM;
02851     }else if(avctx->lowres==2){
02852         c->idct_put= ff_jref_idct2_put;
02853         c->idct_add= ff_jref_idct2_add;
02854         c->idct    = ff_j_rev_dct2;
02855         c->idct_permutation_type= FF_NO_IDCT_PERM;
02856     }else if(avctx->lowres==3){
02857         c->idct_put= ff_jref_idct1_put;
02858         c->idct_add= ff_jref_idct1_add;
02859         c->idct    = ff_j_rev_dct1;
02860         c->idct_permutation_type= FF_NO_IDCT_PERM;
02861     }else{
02862         if (avctx->bits_per_raw_sample == 10) {
02863             c->idct_put              = ff_simple_idct_put_10;
02864             c->idct_add              = ff_simple_idct_add_10;
02865             c->idct                  = ff_simple_idct_10;
02866             c->idct_permutation_type = FF_NO_IDCT_PERM;
02867         } else {
02868         if(avctx->idct_algo==FF_IDCT_INT){
02869             c->idct_put= ff_jref_idct_put;
02870             c->idct_add= ff_jref_idct_add;
02871             c->idct    = ff_j_rev_dct;
02872             c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
02873         }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
02874                 avctx->idct_algo==FF_IDCT_VP3){
02875             c->idct_put= ff_vp3_idct_put_c;
02876             c->idct_add= ff_vp3_idct_add_c;
02877             c->idct    = ff_vp3_idct_c;
02878             c->idct_permutation_type= FF_NO_IDCT_PERM;
02879         }else if(avctx->idct_algo==FF_IDCT_WMV2){
02880             c->idct_put= ff_wmv2_idct_put_c;
02881             c->idct_add= ff_wmv2_idct_add_c;
02882             c->idct    = ff_wmv2_idct_c;
02883             c->idct_permutation_type= FF_NO_IDCT_PERM;
02884         }else if(avctx->idct_algo==FF_IDCT_FAAN){
02885             c->idct_put= ff_faanidct_put;
02886             c->idct_add= ff_faanidct_add;
02887             c->idct    = ff_faanidct;
02888             c->idct_permutation_type= FF_NO_IDCT_PERM;
02889         }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
02890             c->idct_put= ff_ea_idct_put_c;
02891             c->idct_permutation_type= FF_NO_IDCT_PERM;
02892         }else{ //accurate/default
02893             c->idct_put = ff_simple_idct_put_8;
02894             c->idct_add = ff_simple_idct_add_8;
02895             c->idct     = ff_simple_idct_8;
02896             c->idct_permutation_type= FF_NO_IDCT_PERM;
02897         }
02898         }
02899     }
02900 
02901     c->diff_pixels = diff_pixels_c;
02902     c->put_pixels_clamped = ff_put_pixels_clamped_c;
02903     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
02904     c->add_pixels_clamped = ff_add_pixels_clamped_c;
02905     c->sum_abs_dctelem = sum_abs_dctelem_c;
02906     c->gmc1 = gmc1_c;
02907     c->gmc = ff_gmc_c;
02908     c->pix_sum = pix_sum_c;
02909     c->pix_norm1 = pix_norm1_c;
02910 
02911     c->fill_block_tab[0] = fill_block16_c;
02912     c->fill_block_tab[1] = fill_block8_c;
02913 
02914     /* TODO [0] 16  [1] 8 */
02915     c->pix_abs[0][0] = pix_abs16_c;
02916     c->pix_abs[0][1] = pix_abs16_x2_c;
02917     c->pix_abs[0][2] = pix_abs16_y2_c;
02918     c->pix_abs[0][3] = pix_abs16_xy2_c;
02919     c->pix_abs[1][0] = pix_abs8_c;
02920     c->pix_abs[1][1] = pix_abs8_x2_c;
02921     c->pix_abs[1][2] = pix_abs8_y2_c;
02922     c->pix_abs[1][3] = pix_abs8_xy2_c;
02923 
02924     c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
02925     c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
02926     c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
02927     c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
02928     c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
02929     c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
02930     c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
02931     c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
02932     c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
02933 
02934     c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
02935     c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
02936     c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
02937     c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
02938     c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
02939     c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
02940     c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
02941     c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
02942     c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
02943 
02944 #define dspfunc(PFX, IDX, NUM) \
02945     c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
02946     c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
02947     c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
02948     c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
02949     c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
02950     c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
02951     c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
02952     c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
02953     c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
02954     c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
02955     c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
02956     c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
02957     c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
02958     c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
02959     c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
02960     c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
02961 
02962     dspfunc(put_qpel, 0, 16);
02963     dspfunc(put_no_rnd_qpel, 0, 16);
02964 
02965     dspfunc(avg_qpel, 0, 16);
02966     /* dspfunc(avg_no_rnd_qpel, 0, 16); */
02967 
02968     dspfunc(put_qpel, 1, 8);
02969     dspfunc(put_no_rnd_qpel, 1, 8);
02970 
02971     dspfunc(avg_qpel, 1, 8);
02972     /* dspfunc(avg_no_rnd_qpel, 1, 8); */
02973 
02974 #undef dspfunc
02975 
02976 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
02977     ff_mlp_init(c, avctx);
02978 #endif
02979 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
02980     ff_intrax8dsp_init(c,avctx);
02981 #endif
02982 
02983     c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
02984     c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
02985     c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
02986     c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
02987     c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
02988     c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
02989     c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
02990     c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
02991 
02992 #define SET_CMP_FUNC(name) \
02993     c->name[0]= name ## 16_c;\
02994     c->name[1]= name ## 8x8_c;
02995 
02996     SET_CMP_FUNC(hadamard8_diff)
02997     c->hadamard8_diff[4]= hadamard8_intra16_c;
02998     c->hadamard8_diff[5]= hadamard8_intra8x8_c;
02999     SET_CMP_FUNC(dct_sad)
03000     SET_CMP_FUNC(dct_max)
03001 #if CONFIG_GPL
03002     SET_CMP_FUNC(dct264_sad)
03003 #endif
03004     c->sad[0]= pix_abs16_c;
03005     c->sad[1]= pix_abs8_c;
03006     c->sse[0]= sse16_c;
03007     c->sse[1]= sse8_c;
03008     c->sse[2]= sse4_c;
03009     SET_CMP_FUNC(quant_psnr)
03010     SET_CMP_FUNC(rd)
03011     SET_CMP_FUNC(bit)
03012     c->vsad[0]= vsad16_c;
03013     c->vsad[4]= vsad_intra16_c;
03014     c->vsad[5]= vsad_intra8_c;
03015     c->vsse[0]= vsse16_c;
03016     c->vsse[4]= vsse_intra16_c;
03017     c->vsse[5]= vsse_intra8_c;
03018     c->nsse[0]= nsse16_c;
03019     c->nsse[1]= nsse8_c;
03020 #if CONFIG_DWT
03021     ff_dsputil_init_dwt(c);
03022 #endif
03023 
03024     c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
03025 
03026     c->add_bytes= add_bytes_c;
03027     c->diff_bytes= diff_bytes_c;
03028     c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
03029     c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
03030     c->add_hfyu_left_prediction  = add_hfyu_left_prediction_c;
03031     c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
03032     c->bswap_buf= bswap_buf;
03033     c->bswap16_buf = bswap16_buf;
03034 
03035     if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
03036         c->h263_h_loop_filter= h263_h_loop_filter_c;
03037         c->h263_v_loop_filter= h263_v_loop_filter_c;
03038     }
03039 
03040     if (CONFIG_VP3_DECODER) {
03041         c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
03042         c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
03043         c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
03044     }
03045 
03046     c->h261_loop_filter= h261_loop_filter_c;
03047 
03048     c->try_8x8basis= try_8x8basis_c;
03049     c->add_8x8basis= add_8x8basis_c;
03050 
03051 #if CONFIG_VORBIS_DECODER
03052     c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling;
03053 #endif
03054 #if CONFIG_AC3_DECODER
03055     c->ac3_downmix = ff_ac3_downmix_c;
03056 #endif
03057     c->vector_fmul = vector_fmul_c;
03058     c->vector_fmul_reverse = vector_fmul_reverse_c;
03059     c->vector_fmul_add = vector_fmul_add_c;
03060     c->vector_fmul_window = vector_fmul_window_c;
03061     c->vector_clipf = vector_clipf_c;
03062     c->scalarproduct_int16 = scalarproduct_int16_c;
03063     c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
03064     c->apply_window_int16 = apply_window_int16_c;
03065     c->vector_clip_int32 = vector_clip_int32_c;
03066     c->scalarproduct_float = scalarproduct_float_c;
03067     c->butterflies_float = butterflies_float_c;
03068     c->butterflies_float_interleave = butterflies_float_interleave_c;
03069     c->vector_fmul_scalar = vector_fmul_scalar_c;
03070     c->vector_fmac_scalar = vector_fmac_scalar_c;
03071 
03072     c->shrink[0]= av_image_copy_plane;
03073     c->shrink[1]= ff_shrink22;
03074     c->shrink[2]= ff_shrink44;
03075     c->shrink[3]= ff_shrink88;
03076 
03077     c->prefetch= just_return;
03078 
03079     memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
03080     memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
03081 
03082 #undef FUNC
03083 #undef FUNCC
03084 #define FUNC(f, depth) f ## _ ## depth
03085 #define FUNCC(f, depth) f ## _ ## depth ## _c
03086 
03087 #define dspfunc1(PFX, IDX, NUM, depth)\
03088     c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM        , depth);\
03089     c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
03090     c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
03091     c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
03092 
03093 #define dspfunc2(PFX, IDX, NUM, depth)\
03094     c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
03095     c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
03096     c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
03097     c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
03098     c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
03099     c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
03100     c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
03101     c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
03102     c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
03103     c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
03104     c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
03105     c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
03106     c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
03107     c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
03108     c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
03109     c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
03110 
03111 
03112 #define BIT_DEPTH_FUNCS(depth, dct)\
03113     c->get_pixels                    = FUNCC(get_pixels   ## dct   , depth);\
03114     c->draw_edges                    = FUNCC(draw_edges            , depth);\
03115     c->emulated_edge_mc              = FUNC (ff_emulated_edge_mc   , depth);\
03116     c->clear_block                   = FUNCC(clear_block  ## dct   , depth);\
03117     c->clear_blocks                  = FUNCC(clear_blocks ## dct   , depth);\
03118     c->add_pixels8                   = FUNCC(add_pixels8  ## dct   , depth);\
03119     c->add_pixels4                   = FUNCC(add_pixels4  ## dct   , depth);\
03120     c->put_no_rnd_pixels_l2[0]       = FUNCC(put_no_rnd_pixels16_l2, depth);\
03121     c->put_no_rnd_pixels_l2[1]       = FUNCC(put_no_rnd_pixels8_l2 , depth);\
03122 \
03123     c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8   , depth);\
03124     c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4   , depth);\
03125     c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2   , depth);\
03126     c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8   , depth);\
03127     c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4   , depth);\
03128     c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2   , depth);\
03129 \
03130     dspfunc1(put       , 0, 16, depth);\
03131     dspfunc1(put       , 1,  8, depth);\
03132     dspfunc1(put       , 2,  4, depth);\
03133     dspfunc1(put       , 3,  2, depth);\
03134     dspfunc1(put_no_rnd, 0, 16, depth);\
03135     dspfunc1(put_no_rnd, 1,  8, depth);\
03136     dspfunc1(avg       , 0, 16, depth);\
03137     dspfunc1(avg       , 1,  8, depth);\
03138     dspfunc1(avg       , 2,  4, depth);\
03139     dspfunc1(avg       , 3,  2, depth);\
03140     dspfunc1(avg_no_rnd, 0, 16, depth);\
03141     dspfunc1(avg_no_rnd, 1,  8, depth);\
03142 \
03143     dspfunc2(put_h264_qpel, 0, 16, depth);\
03144     dspfunc2(put_h264_qpel, 1,  8, depth);\
03145     dspfunc2(put_h264_qpel, 2,  4, depth);\
03146     dspfunc2(put_h264_qpel, 3,  2, depth);\
03147     dspfunc2(avg_h264_qpel, 0, 16, depth);\
03148     dspfunc2(avg_h264_qpel, 1,  8, depth);\
03149     dspfunc2(avg_h264_qpel, 2,  4, depth);
03150 
03151     switch (avctx->bits_per_raw_sample) {
03152     case 9:
03153         if (c->dct_bits == 32) {
03154             BIT_DEPTH_FUNCS(9, _32);
03155         } else {
03156             BIT_DEPTH_FUNCS(9, _16);
03157         }
03158         break;
03159     case 10:
03160         if (c->dct_bits == 32) {
03161             BIT_DEPTH_FUNCS(10, _32);
03162         } else {
03163             BIT_DEPTH_FUNCS(10, _16);
03164         }
03165         break;
03166     default:
03167         BIT_DEPTH_FUNCS(8, _16);
03168         break;
03169     }
03170 
03171 
03172     if (HAVE_MMX)        ff_dsputil_init_mmx   (c, avctx);
03173     if (ARCH_ARM)        ff_dsputil_init_arm   (c, avctx);
03174     if (HAVE_VIS)        ff_dsputil_init_vis   (c, avctx);
03175     if (ARCH_ALPHA)      ff_dsputil_init_alpha (c, avctx);
03176     if (ARCH_PPC)        ff_dsputil_init_ppc   (c, avctx);
03177     if (HAVE_MMI)        ff_dsputil_init_mmi   (c, avctx);
03178     if (ARCH_SH4)        ff_dsputil_init_sh4   (c, avctx);
03179     if (ARCH_BFIN)       ff_dsputil_init_bfin  (c, avctx);
03180 
03181     for (i = 0; i < 4; i++) {
03182         for (j = 0; j < 16; j++) {
03183             if(!c->put_2tap_qpel_pixels_tab[i][j])
03184                 c->put_2tap_qpel_pixels_tab[i][j] =
03185                     c->put_h264_qpel_pixels_tab[i][j];
03186             if(!c->avg_2tap_qpel_pixels_tab[i][j])
03187                 c->avg_2tap_qpel_pixels_tab[i][j] =
03188                     c->avg_h264_qpel_pixels_tab[i][j];
03189         }
03190     }
03191 
03192     ff_init_scantable_permutation(c->idct_permutation,
03193                                   c->idct_permutation_type);
03194 }
03195 
03196 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
03197 {
03198     ff_dsputil_init(c, avctx);
03199 }