00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 
00039 
00040 
00041 
00042 
00043 
00044 
00045 
00046 
00047 
00048 
00049 
00050 
00051 
00052 
00053 
00054 
00055 
00056 
00057 
00058 
00059 
00060 
00061 
00062 
00063 
00064 
00065 
00071 #include <stdlib.h>
00072 #include <stdio.h>
00073 #include "libavutil/common.h"
00074 #include "dsputil.h"
00075 
00076 #define DCTSIZE 8
00077 #define GLOBAL(x) x
00078 #define RIGHT_SHIFT(x, n) ((x) >> (n))
00079 #define SHIFT_TEMPS
00080 
00081 
00082 
00083 
00084 
00085 #if DCTSIZE != 8
00086   Sorry, this code only copes with 8x8 DCTs. 
00087 #endif
00088 
00089 
00090 
00091 
00092 
00093 
00094 
00095 
00096 
00097 
00098 
00099 
00100 
00101 
00102 
00103 
00104 
00105 
00106 
00107 
00108 #define CONST_BITS  8
00109 
00110 
00111 
00112 
00113 
00114 
00115 
00116 
00117 
00118 #if CONST_BITS == 8
00119 #define FIX_0_382683433  ((int32_t)   98)       
00120 #define FIX_0_541196100  ((int32_t)  139)       
00121 #define FIX_0_707106781  ((int32_t)  181)       
00122 #define FIX_1_306562965  ((int32_t)  334)       
00123 #else
00124 #define FIX_0_382683433  FIX(0.382683433)
00125 #define FIX_0_541196100  FIX(0.541196100)
00126 #define FIX_0_707106781  FIX(0.707106781)
00127 #define FIX_1_306562965  FIX(1.306562965)
00128 #endif
00129 
00130 
00131 
00132 
00133 
00134 
00135 
00136 #ifndef USE_ACCURATE_ROUNDING
00137 #undef DESCALE
00138 #define DESCALE(x,n)  RIGHT_SHIFT(x, n)
00139 #endif
00140 
00141 
00142 
00143 
00144 
00145 
00146 #define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
00147 
00148 static av_always_inline void row_fdct(DCTELEM * data){
00149   int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00150   int_fast16_t tmp10, tmp11, tmp12, tmp13;
00151   int_fast16_t z1, z2, z3, z4, z5, z11, z13;
00152   DCTELEM *dataptr;
00153   int ctr;
00154   SHIFT_TEMPS
00155 
00156   
00157 
00158   dataptr = data;
00159   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
00160     tmp0 = dataptr[0] + dataptr[7];
00161     tmp7 = dataptr[0] - dataptr[7];
00162     tmp1 = dataptr[1] + dataptr[6];
00163     tmp6 = dataptr[1] - dataptr[6];
00164     tmp2 = dataptr[2] + dataptr[5];
00165     tmp5 = dataptr[2] - dataptr[5];
00166     tmp3 = dataptr[3] + dataptr[4];
00167     tmp4 = dataptr[3] - dataptr[4];
00168 
00169     
00170 
00171     tmp10 = tmp0 + tmp3;        
00172     tmp13 = tmp0 - tmp3;
00173     tmp11 = tmp1 + tmp2;
00174     tmp12 = tmp1 - tmp2;
00175 
00176     dataptr[0] = tmp10 + tmp11; 
00177     dataptr[4] = tmp10 - tmp11;
00178 
00179     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); 
00180     dataptr[2] = tmp13 + z1;    
00181     dataptr[6] = tmp13 - z1;
00182 
00183     
00184 
00185     tmp10 = tmp4 + tmp5;        
00186     tmp11 = tmp5 + tmp6;
00187     tmp12 = tmp6 + tmp7;
00188 
00189     
00190     z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); 
00191     z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5;    
00192     z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5;    
00193     z3 = MULTIPLY(tmp11, FIX_0_707106781);         
00194 
00195     z11 = tmp7 + z3;            
00196     z13 = tmp7 - z3;
00197 
00198     dataptr[5] = z13 + z2;      
00199     dataptr[3] = z13 - z2;
00200     dataptr[1] = z11 + z4;
00201     dataptr[7] = z11 - z4;
00202 
00203     dataptr += DCTSIZE;         
00204   }
00205 }
00206 
00207 
00208 
00209 
00210 
00211 GLOBAL(void)
00212 fdct_ifast (DCTELEM * data)
00213 {
00214   int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00215   int_fast16_t tmp10, tmp11, tmp12, tmp13;
00216   int_fast16_t z1, z2, z3, z4, z5, z11, z13;
00217   DCTELEM *dataptr;
00218   int ctr;
00219   SHIFT_TEMPS
00220 
00221   row_fdct(data);
00222 
00223   
00224 
00225   dataptr = data;
00226   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
00227     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
00228     tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
00229     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
00230     tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
00231     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
00232     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
00233     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
00234     tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
00235 
00236     
00237 
00238     tmp10 = tmp0 + tmp3;        
00239     tmp13 = tmp0 - tmp3;
00240     tmp11 = tmp1 + tmp2;
00241     tmp12 = tmp1 - tmp2;
00242 
00243     dataptr[DCTSIZE*0] = tmp10 + tmp11; 
00244     dataptr[DCTSIZE*4] = tmp10 - tmp11;
00245 
00246     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); 
00247     dataptr[DCTSIZE*2] = tmp13 + z1; 
00248     dataptr[DCTSIZE*6] = tmp13 - z1;
00249 
00250     
00251 
00252     tmp10 = tmp4 + tmp5;        
00253     tmp11 = tmp5 + tmp6;
00254     tmp12 = tmp6 + tmp7;
00255 
00256     
00257     z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); 
00258     z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; 
00259     z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; 
00260     z3 = MULTIPLY(tmp11, FIX_0_707106781); 
00261 
00262     z11 = tmp7 + z3;            
00263     z13 = tmp7 - z3;
00264 
00265     dataptr[DCTSIZE*5] = z13 + z2; 
00266     dataptr[DCTSIZE*3] = z13 - z2;
00267     dataptr[DCTSIZE*1] = z11 + z4;
00268     dataptr[DCTSIZE*7] = z11 - z4;
00269 
00270     dataptr++;                  
00271   }
00272 }
00273 
00274 
00275 
00276 
00277 
00278 GLOBAL(void)
00279 fdct_ifast248 (DCTELEM * data)
00280 {
00281   int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00282   int_fast16_t tmp10, tmp11, tmp12, tmp13;
00283   int_fast16_t z1;
00284   DCTELEM *dataptr;
00285   int ctr;
00286   SHIFT_TEMPS
00287 
00288   row_fdct(data);
00289 
00290   
00291 
00292   dataptr = data;
00293   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
00294     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
00295     tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
00296     tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
00297     tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
00298     tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
00299     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
00300     tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
00301     tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
00302 
00303     
00304 
00305     tmp10 = tmp0 + tmp3;
00306     tmp11 = tmp1 + tmp2;
00307     tmp12 = tmp1 - tmp2;
00308     tmp13 = tmp0 - tmp3;
00309 
00310     dataptr[DCTSIZE*0] = tmp10 + tmp11;
00311     dataptr[DCTSIZE*4] = tmp10 - tmp11;
00312 
00313     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
00314     dataptr[DCTSIZE*2] = tmp13 + z1;
00315     dataptr[DCTSIZE*6] = tmp13 - z1;
00316 
00317     tmp10 = tmp4 + tmp7;
00318     tmp11 = tmp5 + tmp6;
00319     tmp12 = tmp5 - tmp6;
00320     tmp13 = tmp4 - tmp7;
00321 
00322     dataptr[DCTSIZE*1] = tmp10 + tmp11;
00323     dataptr[DCTSIZE*5] = tmp10 - tmp11;
00324 
00325     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
00326     dataptr[DCTSIZE*3] = tmp13 + z1;
00327     dataptr[DCTSIZE*7] = tmp13 - z1;
00328 
00329     dataptr++;                        
00330   }
00331 }
00332 
00333 
00334 #undef GLOBAL
00335 #undef CONST_BITS
00336 #undef DESCALE
00337 #undef FIX_0_541196100
00338 #undef FIX_1_306562965