00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 #include "libavutil/cpu.h"
00024 #include "libavcodec/dsputil.h"
00025 #include "dsputil_altivec.h"
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 
00039 
00040 
00041 
00042 
00043 
00044 
00045 
00046 
00047 static void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
00048 {
00049     register int misal = ((unsigned long)blocks & 0x00000010);
00050     register int i = 0;
00051     if (misal) {
00052         ((unsigned long*)blocks)[0] = 0L;
00053         ((unsigned long*)blocks)[1] = 0L;
00054         ((unsigned long*)blocks)[2] = 0L;
00055         ((unsigned long*)blocks)[3] = 0L;
00056         i += 16;
00057     }
00058     for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
00059         __asm__ volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
00060     }
00061     if (misal) {
00062         ((unsigned long*)blocks)[188] = 0L;
00063         ((unsigned long*)blocks)[189] = 0L;
00064         ((unsigned long*)blocks)[190] = 0L;
00065         ((unsigned long*)blocks)[191] = 0L;
00066         i += 16;
00067     }
00068 }
00069 
00070 
00071 
00072 #if HAVE_DCBZL
00073 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00074 {
00075     register int misal = ((unsigned long)blocks & 0x0000007f);
00076     register int i = 0;
00077     if (misal) {
00078         
00079         
00080         
00081         memset(blocks, 0, sizeof(DCTELEM)*6*64);
00082     }
00083     else
00084         for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
00085             __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
00086         }
00087 }
00088 #else
00089 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00090 {
00091     memset(blocks, 0, sizeof(DCTELEM)*6*64);
00092 }
00093 #endif
00094 
00095 #if HAVE_DCBZL
00096 
00097 
00098 
00099 
00100 
00101 static long check_dcbzl_effect(void)
00102 {
00103     register char *fakedata = av_malloc(1024);
00104     register char *fakedata_middle;
00105     register long zero = 0;
00106     register long i = 0;
00107     long count = 0;
00108 
00109     if (!fakedata) {
00110         return 0L;
00111     }
00112 
00113     fakedata_middle = (fakedata + 512);
00114 
00115     memset(fakedata, 0xFF, 1024);
00116 
00117     
00118 
00119     __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
00120 
00121     for (i = 0; i < 1024 ; i ++) {
00122         if (fakedata[i] == (char)0)
00123             count++;
00124     }
00125 
00126     av_free(fakedata);
00127 
00128     return count;
00129 }
00130 #else
00131 static long check_dcbzl_effect(void)
00132 {
00133   return 0;
00134 }
00135 #endif
00136 
00137 static void prefetch_ppc(void *mem, int stride, int h)
00138 {
00139     register const uint8_t *p = mem;
00140     do {
00141         __asm__ volatile ("dcbt 0,%0" : : "r" (p));
00142         p+= stride;
00143     } while(--h);
00144 }
00145 
00146 void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
00147 {
00148     const int high_bit_depth = avctx->bits_per_raw_sample > 8;
00149     int mm_flags = av_get_cpu_flags();
00150 
00151     if (avctx->dsp_mask) {
00152         if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
00153             mm_flags |= (avctx->dsp_mask & 0xffff);
00154         else
00155             mm_flags &= ~(avctx->dsp_mask & 0xffff);
00156     }
00157 
00158     
00159     c->prefetch = prefetch_ppc;
00160     if (!high_bit_depth) {
00161     switch (check_dcbzl_effect()) {
00162         case 32:
00163             c->clear_blocks = clear_blocks_dcbz32_ppc;
00164             break;
00165         case 128:
00166             c->clear_blocks = clear_blocks_dcbz128_ppc;
00167             break;
00168         default:
00169             break;
00170     }
00171     }
00172 
00173 #if HAVE_ALTIVEC
00174     if(CONFIG_H264_DECODER) ff_dsputil_h264_init_ppc(c, avctx);
00175 
00176     if (mm_flags & AV_CPU_FLAG_ALTIVEC) {
00177         ff_dsputil_init_altivec(c, avctx);
00178         ff_float_init_altivec(c, avctx);
00179         ff_int_init_altivec(c, avctx);
00180         c->gmc1 = ff_gmc1_altivec;
00181 
00182 #if CONFIG_ENCODERS
00183         if (avctx->bits_per_raw_sample <= 8 &&
00184             (avctx->dct_algo == FF_DCT_AUTO ||
00185              avctx->dct_algo == FF_DCT_ALTIVEC)) {
00186             c->fdct = ff_fdct_altivec;
00187         }
00188 #endif //CONFIG_ENCODERS
00189 
00190         if (avctx->lowres == 0 && avctx->bits_per_raw_sample <= 8) {
00191             if ((avctx->idct_algo == FF_IDCT_AUTO) ||
00192                 (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
00193                 c->idct_put = ff_idct_put_altivec;
00194                 c->idct_add = ff_idct_add_altivec;
00195                 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
00196             }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) &&
00197                      avctx->idct_algo==FF_IDCT_VP3){
00198                 c->idct_put = ff_vp3_idct_put_altivec;
00199                 c->idct_add = ff_vp3_idct_add_altivec;
00200                 c->idct     = ff_vp3_idct_altivec;
00201                 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
00202             }
00203         }
00204 
00205     }
00206 #endif 
00207 }