FFmpeg: libpostproc/postprocess.c Source File

00001 /*
00002  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
00003  *
00004  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
00005  *
00006  * This file is part of FFmpeg.
00007  *
00008  * FFmpeg is free software; you can redistribute it and/or modify
00009  * it under the terms of the GNU General Public License as published by
00010  * the Free Software Foundation; either version 2 of the License, or
00011  * (at your option) any later version.
00012  *
00013  * FFmpeg is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with FFmpeg; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00021  */
00022 
00028 /*
00029                         C       MMX     MMX2    3DNow   AltiVec
00030 isVertDC                Ec      Ec                      Ec
00031 isVertMinMaxOk          Ec      Ec                      Ec
00032 doVertLowPass           E               e       e       Ec
00033 doVertDefFilter         Ec      Ec      e       e       Ec
00034 isHorizDC               Ec      Ec                      Ec
00035 isHorizMinMaxOk         a       E                       Ec
00036 doHorizLowPass          E               e       e       Ec
00037 doHorizDefFilter        Ec      Ec      e       e       Ec
00038 do_a_deblock            Ec      E       Ec      E
00039 deRing                  E               e       e*      Ecp
00040 Vertical RKAlgo1        E               a       a
00041 Horizontal RKAlgo1                      a       a
00042 Vertical X1#            a               E       E
00043 Horizontal X1#          a               E       E
00044 LinIpolDeinterlace      e               E       E*
00045 CubicIpolDeinterlace    a               e       e*
00046 LinBlendDeinterlace     e               E       E*
00047 MedianDeinterlace#      E       Ec      Ec
00048 TempDeNoiser#           E               e       e       Ec
00049 
00050 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
00051 # more or less selfinvented filters so the exactness is not too meaningful
00052 E = Exact implementation
00053 e = almost exact implementation (slightly different rounding,...)
00054 a = alternative / approximate impl
00055 c = checked against the other implementations (-vo md5)
00056 p = partially optimized, still some work to do
00057 */
00058 
00059 /*
00060 TODO:
00061 reduce the time wasted on the mem transfer
00062 unroll stuff if instructions depend too much on the prior one
00063 move YScale thing to the end instead of fixing QP
00064 write a faster and higher quality deblocking filter :)
00065 make the mainloop more flexible (variable number of blocks at once
00066         (the if/else stuff per block is slowing things down)
00067 compare the quality & speed of all filters
00068 split this huge file
00069 optimize c versions
00070 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
00071 ...
00072 */
00073 
00074 //Changelog: use git log
00075 
00076 #include "config.h"
00077 #include "libavutil/avutil.h"
00078 #include "libavutil/avassert.h"
00079 #include <inttypes.h>
00080 #include <stdio.h>
00081 #include <stdlib.h>
00082 #include <string.h>
00083 //#undef HAVE_MMX2
00084 //#define HAVE_AMD3DNOW
00085 //#undef HAVE_MMX
00086 //#undef ARCH_X86
00087 //#define DEBUG_BRIGHTNESS
00088 #include "postprocess.h"
00089 #include "postprocess_internal.h"
00090 #include "libavutil/avstring.h"
00091 
00092 unsigned postproc_version(void)
00093 {
00094     av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
00095     return LIBPOSTPROC_VERSION_INT;
00096 }
00097 
00098 const char *postproc_configuration(void)
00099 {
00100     return FFMPEG_CONFIGURATION;
00101 }
00102 
00103 const char *postproc_license(void)
00104 {
00105 #define LICENSE_PREFIX "libpostproc license: "
00106     return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
00107 }
00108 
00109 #if HAVE_ALTIVEC_H
00110 #include <altivec.h>
00111 #endif
00112 
00113 #define GET_MODE_BUFFER_SIZE 500
00114 #define OPTIONS_ARRAY_SIZE 10
00115 #define BLOCK_SIZE 8
00116 #define TEMP_STRIDE 8
00117 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
00118 
00119 #if ARCH_X86
00120 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
00121 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
00122 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
00123 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
00124 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
00125 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
00126 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
00127 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
00128 #endif
00129 
00130 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
00131 
00132 
00133 static struct PPFilter filters[]=
00134 {
00135     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
00136     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
00137 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
00138     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
00139     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
00140     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
00141     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
00142     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
00143     {"dr", "dering",                1, 5, 6, DERING},
00144     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
00145     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
00146     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
00147     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
00148     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
00149     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
00150     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
00151     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
00152     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
00153     {"be", "bitexact",              1, 0, 0, BITEXACT},
00154     {NULL, NULL,0,0,0,0} //End Marker
00155 };
00156 
00157 static const char *replaceTable[]=
00158 {
00159     "default",      "hb:a,vb:a,dr:a",
00160     "de",           "hb:a,vb:a,dr:a",
00161     "fast",         "h1:a,v1:a,dr:a",
00162     "fa",           "h1:a,v1:a,dr:a",
00163     "ac",           "ha:a:128:7,va:a,dr:a",
00164     NULL //End Marker
00165 };
00166 
00167 
00168 #if ARCH_X86
00169 static inline void prefetchnta(void *p)
00170 {
00171     __asm__ volatile(   "prefetchnta (%0)\n\t"
00172         : : "r" (p)
00173     );
00174 }
00175 
00176 static inline void prefetcht0(void *p)
00177 {
00178     __asm__ volatile(   "prefetcht0 (%0)\n\t"
00179         : : "r" (p)
00180     );
00181 }
00182 
00183 static inline void prefetcht1(void *p)
00184 {
00185     __asm__ volatile(   "prefetcht1 (%0)\n\t"
00186         : : "r" (p)
00187     );
00188 }
00189 
00190 static inline void prefetcht2(void *p)
00191 {
00192     __asm__ volatile(   "prefetcht2 (%0)\n\t"
00193         : : "r" (p)
00194     );
00195 }
00196 #endif
00197 
00198 /* The horizontal functions exist only in C because the MMX
00199  * code is faster with vertical filters and transposing. */
00200 
00204 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
00205 {
00206     int numEq= 0;
00207     int y;
00208     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00209     const int dcThreshold= dcOffset*2 + 1;
00210 
00211     for(y=0; y<BLOCK_SIZE; y++){
00212         if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
00213         if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
00214         if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
00215         if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
00216         if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
00217         if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
00218         if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
00219         src+= stride;
00220     }
00221     return numEq > c->ppMode.flatnessThreshold;
00222 }
00223 
00227 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
00228 {
00229     int numEq= 0;
00230     int y;
00231     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00232     const int dcThreshold= dcOffset*2 + 1;
00233 
00234     src+= stride*4; // src points to begin of the 8x8 Block
00235     for(y=0; y<BLOCK_SIZE-1; y++){
00236         if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
00237         if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
00238         if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
00239         if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
00240         if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
00241         if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
00242         if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
00243         if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
00244         src+= stride;
00245     }
00246     return numEq > c->ppMode.flatnessThreshold;
00247 }
00248 
00249 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
00250 {
00251     int i;
00252     for(i=0; i<2; i++){
00253         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
00254         src += stride;
00255         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
00256         src += stride;
00257         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
00258         src += stride;
00259         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
00260         src += stride;
00261     }
00262     return 1;
00263 }
00264 
00265 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
00266 {
00267     int x;
00268     src+= stride*4;
00269     for(x=0; x<BLOCK_SIZE; x+=4){
00270         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
00271         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
00272         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
00273         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
00274     }
00275     return 1;
00276 }
00277 
00278 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
00279 {
00280     if( isHorizDC_C(src, stride, c) ){
00281         if( isHorizMinMaxOk_C(src, stride, c->QP) )
00282             return 1;
00283         else
00284             return 0;
00285     }else{
00286         return 2;
00287     }
00288 }
00289 
00290 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
00291 {
00292     if( isVertDC_C(src, stride, c) ){
00293         if( isVertMinMaxOk_C(src, stride, c->QP) )
00294             return 1;
00295         else
00296             return 0;
00297     }else{
00298         return 2;
00299     }
00300 }
00301 
00302 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
00303 {
00304     int y;
00305     for(y=0; y<BLOCK_SIZE; y++){
00306         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
00307 
00308         if(FFABS(middleEnergy) < 8*c->QP){
00309             const int q=(dst[3] - dst[4])/2;
00310             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
00311             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
00312 
00313             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00314             d= FFMAX(d, 0);
00315 
00316             d= (5*d + 32) >> 6;
00317             d*= FFSIGN(-middleEnergy);
00318 
00319             if(q>0)
00320             {
00321                 d= d<0 ? 0 : d;
00322                 d= d>q ? q : d;
00323             }
00324             else
00325             {
00326                 d= d>0 ? 0 : d;
00327                 d= d<q ? q : d;
00328             }
00329 
00330             dst[3]-= d;
00331             dst[4]+= d;
00332         }
00333         dst+= stride;
00334     }
00335 }
00336 
00341 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
00342 {
00343     int y;
00344     for(y=0; y<BLOCK_SIZE; y++){
00345         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
00346         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
00347 
00348         int sums[10];
00349         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
00350         sums[1] = sums[0] - first  + dst[3];
00351         sums[2] = sums[1] - first  + dst[4];
00352         sums[3] = sums[2] - first  + dst[5];
00353         sums[4] = sums[3] - first  + dst[6];
00354         sums[5] = sums[4] - dst[0] + dst[7];
00355         sums[6] = sums[5] - dst[1] + last;
00356         sums[7] = sums[6] - dst[2] + last;
00357         sums[8] = sums[7] - dst[3] + last;
00358         sums[9] = sums[8] - dst[4] + last;
00359 
00360         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
00361         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
00362         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
00363         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
00364         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
00365         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
00366         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
00367         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
00368 
00369         dst+= stride;
00370     }
00371 }
00372 
00381 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
00382 {
00383     int y;
00384     static uint64_t *lut= NULL;
00385     if(lut==NULL)
00386     {
00387         int i;
00388         lut = av_malloc(256*8);
00389         for(i=0; i<256; i++)
00390         {
00391             int v= i < 128 ? 2*i : 2*(i-256);
00392 /*
00393 //Simulate 112242211 9-Tap filter
00394             uint64_t a= (v/16)  & 0xFF;
00395             uint64_t b= (v/8)   & 0xFF;
00396             uint64_t c= (v/4)   & 0xFF;
00397             uint64_t d= (3*v/8) & 0xFF;
00398 */
00399 //Simulate piecewise linear interpolation
00400             uint64_t a= (v/16)   & 0xFF;
00401             uint64_t b= (v*3/16) & 0xFF;
00402             uint64_t c= (v*5/16) & 0xFF;
00403             uint64_t d= (7*v/16) & 0xFF;
00404             uint64_t A= (0x100 - a)&0xFF;
00405             uint64_t B= (0x100 - b)&0xFF;
00406             uint64_t C= (0x100 - c)&0xFF;
00407             uint64_t D= (0x100 - c)&0xFF;
00408 
00409             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
00410                        (D<<24) | (C<<16) | (B<<8)  | (A);
00411             //lut[i] = (v<<32) | (v<<24);
00412         }
00413     }
00414 
00415     for(y=0; y<BLOCK_SIZE; y++){
00416         int a= src[1] - src[2];
00417         int b= src[3] - src[4];
00418         int c= src[5] - src[6];
00419 
00420         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
00421 
00422         if(d < QP){
00423             int v = d * FFSIGN(-b);
00424 
00425             src[1] +=v/8;
00426             src[2] +=v/4;
00427             src[3] +=3*v/8;
00428             src[4] -=3*v/8;
00429             src[5] -=v/4;
00430             src[6] -=v/8;
00431         }
00432         src+=stride;
00433     }
00434 }
00435 
00439 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
00440     int y;
00441     const int QP= c->QP;
00442     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00443     const int dcThreshold= dcOffset*2 + 1;
00444 //START_TIMER
00445     src+= step*4; // src points to begin of the 8x8 Block
00446     for(y=0; y<8; y++){
00447         int numEq= 0;
00448 
00449         if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
00450         if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
00451         if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
00452         if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
00453         if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
00454         if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
00455         if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
00456         if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
00457         if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
00458         if(numEq > c->ppMode.flatnessThreshold){
00459             int min, max, x;
00460 
00461             if(src[0] > src[step]){
00462                 max= src[0];
00463                 min= src[step];
00464             }else{
00465                 max= src[step];
00466                 min= src[0];
00467             }
00468             for(x=2; x<8; x+=2){
00469                 if(src[x*step] > src[(x+1)*step]){
00470                         if(src[x    *step] > max) max= src[ x   *step];
00471                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
00472                 }else{
00473                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
00474                         if(src[ x   *step] < min) min= src[ x   *step];
00475                 }
00476             }
00477             if(max-min < 2*QP){
00478                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
00479                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
00480 
00481                 int sums[10];
00482                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
00483                 sums[1] = sums[0] - first       + src[3*step];
00484                 sums[2] = sums[1] - first       + src[4*step];
00485                 sums[3] = sums[2] - first       + src[5*step];
00486                 sums[4] = sums[3] - first       + src[6*step];
00487                 sums[5] = sums[4] - src[0*step] + src[7*step];
00488                 sums[6] = sums[5] - src[1*step] + last;
00489                 sums[7] = sums[6] - src[2*step] + last;
00490                 sums[8] = sums[7] - src[3*step] + last;
00491                 sums[9] = sums[8] - src[4*step] + last;
00492 
00493                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
00494                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
00495                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
00496                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
00497                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
00498                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
00499                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
00500                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
00501             }
00502         }else{
00503             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
00504 
00505             if(FFABS(middleEnergy) < 8*QP){
00506                 const int q=(src[3*step] - src[4*step])/2;
00507                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
00508                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
00509 
00510                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00511                 d= FFMAX(d, 0);
00512 
00513                 d= (5*d + 32) >> 6;
00514                 d*= FFSIGN(-middleEnergy);
00515 
00516                 if(q>0){
00517                     d= d<0 ? 0 : d;
00518                     d= d>q ? q : d;
00519                 }else{
00520                     d= d>0 ? 0 : d;
00521                     d= d<q ? q : d;
00522                 }
00523 
00524                 src[3*step]-= d;
00525                 src[4*step]+= d;
00526             }
00527         }
00528 
00529         src += stride;
00530     }
00531 /*if(step==16){
00532     STOP_TIMER("step16")
00533 }else{
00534     STOP_TIMER("stepX")
00535 }*/
00536 }
00537 
00538 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
00539 //Plain C versions
00540 //we always compile C for testing which needs bitexactness
00541 #define COMPILE_C
00542 
00543 #if HAVE_ALTIVEC
00544 #define COMPILE_ALTIVEC
00545 #endif //HAVE_ALTIVEC
00546 
00547 #if ARCH_X86
00548 
00549 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00550 #define COMPILE_MMX
00551 #endif
00552 
00553 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
00554 #define COMPILE_MMX2
00555 #endif
00556 
00557 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00558 #define COMPILE_3DNOW
00559 #endif
00560 #endif /* ARCH_X86 */
00561 
00562 #undef HAVE_MMX
00563 #define HAVE_MMX 0
00564 #undef HAVE_MMX2
00565 #define HAVE_MMX2 0
00566 #undef HAVE_AMD3DNOW
00567 #define HAVE_AMD3DNOW 0
00568 #undef HAVE_ALTIVEC
00569 #define HAVE_ALTIVEC 0
00570 
00571 #ifdef COMPILE_C
00572 #define RENAME(a) a ## _C
00573 #include "postprocess_template.c"
00574 #endif
00575 
00576 #ifdef COMPILE_ALTIVEC
00577 #undef RENAME
00578 #undef HAVE_ALTIVEC
00579 #define HAVE_ALTIVEC 1
00580 #define RENAME(a) a ## _altivec
00581 #include "postprocess_altivec_template.c"
00582 #include "postprocess_template.c"
00583 #endif
00584 
00585 //MMX versions
00586 #ifdef COMPILE_MMX
00587 #undef RENAME
00588 #undef HAVE_MMX
00589 #define HAVE_MMX 1
00590 #define RENAME(a) a ## _MMX
00591 #include "postprocess_template.c"
00592 #endif
00593 
00594 //MMX2 versions
00595 #ifdef COMPILE_MMX2
00596 #undef RENAME
00597 #undef HAVE_MMX
00598 #undef HAVE_MMX2
00599 #define HAVE_MMX 1
00600 #define HAVE_MMX2 1
00601 #define RENAME(a) a ## _MMX2
00602 #include "postprocess_template.c"
00603 #endif
00604 
00605 //3DNOW versions
00606 #ifdef COMPILE_3DNOW
00607 #undef RENAME
00608 #undef HAVE_MMX
00609 #undef HAVE_MMX2
00610 #undef HAVE_AMD3DNOW
00611 #define HAVE_MMX 1
00612 #define HAVE_MMX2 0
00613 #define HAVE_AMD3DNOW 1
00614 #define RENAME(a) a ## _3DNow
00615 #include "postprocess_template.c"
00616 #endif
00617 
00618 // minor note: the HAVE_xyz is messed up after that line so do not use it.
00619 
00620 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00621         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
00622 {
00623     PPContext *c= (PPContext *)vc;
00624     PPMode *ppMode= (PPMode *)vm;
00625     c->ppMode= *ppMode; //FIXME
00626 
00627     if(ppMode->lumMode & BITEXACT)
00628         return postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00629 
00630     // Using ifs here as they are faster than function pointers although the
00631     // difference would not be measurable here but it is much better because
00632     // someone might exchange the CPU whithout restarting MPlayer ;)
00633 #if CONFIG_RUNTIME_CPUDETECT
00634 #if ARCH_X86
00635     // ordered per speed fastest first
00636     if(c->cpuCaps & PP_CPU_CAPS_MMX2)
00637         postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00638     else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
00639         postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00640     else if(c->cpuCaps & PP_CPU_CAPS_MMX)
00641         postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00642     else
00643         postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00644 #else
00645 #if HAVE_ALTIVEC
00646     if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
00647             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00648     else
00649 #endif
00650             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00651 #endif
00652 #else /* CONFIG_RUNTIME_CPUDETECT */
00653 #if   HAVE_MMX2
00654             postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00655 #elif HAVE_AMD3DNOW
00656             postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00657 #elif HAVE_MMX
00658             postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00659 #elif HAVE_ALTIVEC
00660             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00661 #else
00662             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00663 #endif
00664 #endif /* !CONFIG_RUNTIME_CPUDETECT */
00665 }
00666 
00667 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00668 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
00669 
00670 /* -pp Command line Help
00671 */
00672 #if LIBPOSTPROC_VERSION_INT < (52<<16)
00673 const char *const pp_help=
00674 #else
00675 const char pp_help[] =
00676 #endif
00677 "Available postprocessing filters:\n"
00678 "Filters                        Options\n"
00679 "short  long name       short   long option     Description\n"
00680 "*      *               a       autoq           CPU power dependent enabler\n"
00681 "                       c       chrom           chrominance filtering enabled\n"
00682 "                       y       nochrom         chrominance filtering disabled\n"
00683 "                       n       noluma          luma filtering disabled\n"
00684 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
00685 "       1. difference factor: default=32, higher -> more deblocking\n"
00686 "       2. flatness threshold: default=39, lower -> more deblocking\n"
00687 "                       the h & v deblocking filters share these\n"
00688 "                       so you can't set different thresholds for h / v\n"
00689 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
00690 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
00691 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
00692 "h1     x1hdeblock                              experimental h deblock filter 1\n"
00693 "v1     x1vdeblock                              experimental v deblock filter 1\n"
00694 "dr     dering                                  deringing filter\n"
00695 "al     autolevels                              automatic brightness / contrast\n"
00696 "                       f        fullyrange     stretch luminance to (0..255)\n"
00697 "lb     linblenddeint                           linear blend deinterlacer\n"
00698 "li     linipoldeint                            linear interpolating deinterlace\n"
00699 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
00700 "md     mediandeint                             median deinterlacer\n"
00701 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
00702 "l5     lowpass5                                FIR lowpass deinterlacer\n"
00703 "de     default                                 hb:a,vb:a,dr:a\n"
00704 "fa     fast                                    h1:a,v1:a,dr:a\n"
00705 "ac                                             ha:a:128:7,va:a,dr:a\n"
00706 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
00707 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
00708 "fq     forceQuant      <quantizer>             force quantizer\n"
00709 "Usage:\n"
00710 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
00711 "long form example:\n"
00712 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
00713 "short form example:\n"
00714 "vb:a/hb:a/lb                                   de,-vb\n"
00715 "more examples:\n"
00716 "tn:64:128:256\n"
00717 "\n"
00718 ;
00719 
00720 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
00721 {
00722     char temp[GET_MODE_BUFFER_SIZE];
00723     char *p= temp;
00724     static const char filterDelimiters[] = ",/";
00725     static const char optionDelimiters[] = ":";
00726     struct PPMode *ppMode;
00727     char *filterToken;
00728 
00729     if (!strcmp(name, "help")) {
00730         const char *p;
00731         for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
00732             av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
00733             av_log(NULL, AV_LOG_INFO, "%s", temp);
00734         }
00735         return NULL;
00736     }
00737 
00738     ppMode= av_malloc(sizeof(PPMode));
00739 
00740     ppMode->lumMode= 0;
00741     ppMode->chromMode= 0;
00742     ppMode->maxTmpNoise[0]= 700;
00743     ppMode->maxTmpNoise[1]= 1500;
00744     ppMode->maxTmpNoise[2]= 3000;
00745     ppMode->maxAllowedY= 234;
00746     ppMode->minAllowedY= 16;
00747     ppMode->baseDcDiff= 256/8;
00748     ppMode->flatnessThreshold= 56-16-1;
00749     ppMode->maxClippedThreshold= 0.01;
00750     ppMode->error=0;
00751 
00752     memset(temp, 0, GET_MODE_BUFFER_SIZE);
00753     av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
00754 
00755     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
00756 
00757     for(;;){
00758         char *filterName;
00759         int q= 1000000; //PP_QUALITY_MAX;
00760         int chrom=-1;
00761         int luma=-1;
00762         char *option;
00763         char *options[OPTIONS_ARRAY_SIZE];
00764         int i;
00765         int filterNameOk=0;
00766         int numOfUnknownOptions=0;
00767         int enable=1; //does the user want us to enabled or disabled the filter
00768 
00769         filterToken= strtok(p, filterDelimiters);
00770         if(filterToken == NULL) break;
00771         p+= strlen(filterToken) + 1; // p points to next filterToken
00772         filterName= strtok(filterToken, optionDelimiters);
00773         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
00774 
00775         if(*filterName == '-'){
00776             enable=0;
00777             filterName++;
00778         }
00779 
00780         for(;;){ //for all options
00781             option= strtok(NULL, optionDelimiters);
00782             if(option == NULL) break;
00783 
00784             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
00785             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
00786             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
00787             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
00788             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
00789             else{
00790                 options[numOfUnknownOptions] = option;
00791                 numOfUnknownOptions++;
00792             }
00793             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
00794         }
00795         options[numOfUnknownOptions] = NULL;
00796 
00797         /* replace stuff from the replace Table */
00798         for(i=0; replaceTable[2*i]!=NULL; i++){
00799             if(!strcmp(replaceTable[2*i], filterName)){
00800                 int newlen= strlen(replaceTable[2*i + 1]);
00801                 int plen;
00802                 int spaceLeft;
00803 
00804                 p--, *p=',';
00805 
00806                 plen= strlen(p);
00807                 spaceLeft= p - temp + plen;
00808                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
00809                     ppMode->error++;
00810                     break;
00811                 }
00812                 memmove(p + newlen, p, plen+1);
00813                 memcpy(p, replaceTable[2*i + 1], newlen);
00814                 filterNameOk=1;
00815             }
00816         }
00817 
00818         for(i=0; filters[i].shortName!=NULL; i++){
00819             if(   !strcmp(filters[i].longName, filterName)
00820                || !strcmp(filters[i].shortName, filterName)){
00821                 ppMode->lumMode &= ~filters[i].mask;
00822                 ppMode->chromMode &= ~filters[i].mask;
00823 
00824                 filterNameOk=1;
00825                 if(!enable) break; // user wants to disable it
00826 
00827                 if(q >= filters[i].minLumQuality && luma)
00828                     ppMode->lumMode|= filters[i].mask;
00829                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
00830                     if(q >= filters[i].minChromQuality)
00831                             ppMode->chromMode|= filters[i].mask;
00832 
00833                 if(filters[i].mask == LEVEL_FIX){
00834                     int o;
00835                     ppMode->minAllowedY= 16;
00836                     ppMode->maxAllowedY= 234;
00837                     for(o=0; options[o]!=NULL; o++){
00838                         if(  !strcmp(options[o],"fullyrange")
00839                            ||!strcmp(options[o],"f")){
00840                             ppMode->minAllowedY= 0;
00841                             ppMode->maxAllowedY= 255;
00842                             numOfUnknownOptions--;
00843                         }
00844                     }
00845                 }
00846                 else if(filters[i].mask == TEMP_NOISE_FILTER)
00847                 {
00848                     int o;
00849                     int numOfNoises=0;
00850 
00851                     for(o=0; options[o]!=NULL; o++){
00852                         char *tail;
00853                         ppMode->maxTmpNoise[numOfNoises]=
00854                             strtol(options[o], &tail, 0);
00855                         if(tail!=options[o]){
00856                             numOfNoises++;
00857                             numOfUnknownOptions--;
00858                             if(numOfNoises >= 3) break;
00859                         }
00860                     }
00861                 }
00862                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
00863                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
00864                     int o;
00865 
00866                     for(o=0; options[o]!=NULL && o<2; o++){
00867                         char *tail;
00868                         int val= strtol(options[o], &tail, 0);
00869                         if(tail==options[o]) break;
00870 
00871                         numOfUnknownOptions--;
00872                         if(o==0) ppMode->baseDcDiff= val;
00873                         else ppMode->flatnessThreshold= val;
00874                     }
00875                 }
00876                 else if(filters[i].mask == FORCE_QUANT){
00877                     int o;
00878                     ppMode->forcedQuant= 15;
00879 
00880                     for(o=0; options[o]!=NULL && o<1; o++){
00881                         char *tail;
00882                         int val= strtol(options[o], &tail, 0);
00883                         if(tail==options[o]) break;
00884 
00885                         numOfUnknownOptions--;
00886                         ppMode->forcedQuant= val;
00887                     }
00888                 }
00889             }
00890         }
00891         if(!filterNameOk) ppMode->error++;
00892         ppMode->error += numOfUnknownOptions;
00893     }
00894 
00895     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
00896     if(ppMode->error){
00897         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
00898         av_free(ppMode);
00899         return NULL;
00900     }
00901     return ppMode;
00902 }
00903 
00904 void pp_free_mode(pp_mode *mode){
00905     av_free(mode);
00906 }
00907 
00908 static void reallocAlign(void **p, int alignment, int size){
00909     av_free(*p);
00910     *p= av_mallocz(size);
00911 }
00912 
00913 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
00914     int mbWidth = (width+15)>>4;
00915     int mbHeight= (height+15)>>4;
00916     int i;
00917 
00918     c->stride= stride;
00919     c->qpStride= qpStride;
00920 
00921     reallocAlign((void **)&c->tempDst, 8, stride*24);
00922     reallocAlign((void **)&c->tempSrc, 8, stride*24);
00923     reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
00924     reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
00925     for(i=0; i<256; i++)
00926             c->yHistogram[i]= width*height/64*15/256;
00927 
00928     for(i=0; i<3; i++){
00929         //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
00930         reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
00931         reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
00932     }
00933 
00934     reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
00935     reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00936     reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00937     reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
00938 }
00939 
00940 static const char * context_to_name(void * ptr) {
00941     return "postproc";
00942 }
00943 
00944 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
00945 
00946 pp_context *pp_get_context(int width, int height, int cpuCaps){
00947     PPContext *c= av_malloc(sizeof(PPContext));
00948     int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
00949     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
00950 
00951     memset(c, 0, sizeof(PPContext));
00952     c->av_class = &av_codec_context_class;
00953     c->cpuCaps= cpuCaps;
00954     if(cpuCaps&PP_FORMAT){
00955         c->hChromaSubSample= cpuCaps&0x3;
00956         c->vChromaSubSample= (cpuCaps>>4)&0x3;
00957     }else{
00958         c->hChromaSubSample= 1;
00959         c->vChromaSubSample= 1;
00960     }
00961 
00962     reallocBuffers(c, width, height, stride, qpStride);
00963 
00964     c->frameNum=-1;
00965 
00966     return c;
00967 }
00968 
00969 void pp_free_context(void *vc){
00970     PPContext *c = (PPContext*)vc;
00971     int i;
00972 
00973     for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
00974     for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
00975 
00976     av_free(c->tempBlocks);
00977     av_free(c->yHistogram);
00978     av_free(c->tempDst);
00979     av_free(c->tempSrc);
00980     av_free(c->deintTemp);
00981     av_free(c->stdQPTable);
00982     av_free(c->nonBQPTable);
00983     av_free(c->forcedQPTable);
00984 
00985     memset(c, 0, sizeof(PPContext));
00986 
00987     av_free(c);
00988 }
00989 
00990 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
00991                      uint8_t * dst[3], const int dstStride[3],
00992                      int width, int height,
00993                      const QP_STORE_T *QP_store,  int QPStride,
00994                      pp_mode *vm,  void *vc, int pict_type)
00995 {
00996     int mbWidth = (width+15)>>4;
00997     int mbHeight= (height+15)>>4;
00998     PPMode *mode = (PPMode*)vm;
00999     PPContext *c = (PPContext*)vc;
01000     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
01001     int absQPStride = FFABS(QPStride);
01002 
01003     // c->stride and c->QPStride are always positive
01004     if(c->stride < minStride || c->qpStride < absQPStride)
01005         reallocBuffers(c, width, height,
01006                        FFMAX(minStride, c->stride),
01007                        FFMAX(c->qpStride, absQPStride));
01008 
01009     if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
01010         int i;
01011         QP_store= c->forcedQPTable;
01012         absQPStride = QPStride = 0;
01013         if(mode->lumMode & FORCE_QUANT)
01014             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
01015         else
01016             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
01017     }
01018 
01019     if(pict_type & PP_PICT_TYPE_QP2){
01020         int i;
01021         const int count= mbHeight * absQPStride;
01022         for(i=0; i<(count>>2); i++){
01023             ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
01024         }
01025         for(i<<=2; i<count; i++){
01026             c->stdQPTable[i] = QP_store[i]>>1;
01027         }
01028         QP_store= c->stdQPTable;
01029         QPStride= absQPStride;
01030     }
01031 
01032     if(0){
01033         int x,y;
01034         for(y=0; y<mbHeight; y++){
01035             for(x=0; x<mbWidth; x++){
01036                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
01037             }
01038             av_log(c, AV_LOG_INFO, "\n");
01039         }
01040         av_log(c, AV_LOG_INFO, "\n");
01041     }
01042 
01043     if((pict_type&7)!=3){
01044         if (QPStride >= 0){
01045             int i;
01046             const int count= mbHeight * QPStride;
01047             for(i=0; i<(count>>2); i++){
01048                 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
01049             }
01050             for(i<<=2; i<count; i++){
01051                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
01052             }
01053         } else {
01054             int i,j;
01055             for(i=0; i<mbHeight; i++) {
01056                 for(j=0; j<absQPStride; j++) {
01057                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
01058                 }
01059             }
01060         }
01061     }
01062 
01063     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
01064            mode->lumMode, mode->chromMode);
01065 
01066     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
01067                 width, height, QP_store, QPStride, 0, mode, c);
01068 
01069     width  = (width )>>c->hChromaSubSample;
01070     height = (height)>>c->vChromaSubSample;
01071 
01072     if(mode->chromMode){
01073         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
01074                     width, height, QP_store, QPStride, 1, mode, c);
01075         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
01076                     width, height, QP_store, QPStride, 2, mode, c);
01077     }
01078     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
01079         linecpy(dst[1], src[1], height, srcStride[1]);
01080         linecpy(dst[2], src[2], height, srcStride[2]);
01081     }else{
01082         int y;
01083         for(y=0; y<height; y++){
01084             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
01085             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
01086         }
01087     }
01088 }