00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "libavcodec/dsputil.h"
00024
00025 #include "dsputil_ppc.h"
00026
00027 #include "dsputil_altivec.h"
00028
00029 void fdct_altivec(int16_t *block);
00030 void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
00031 int x16, int y16, int rounder);
00032 void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
00033 void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
00034
00035 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx);
00036
00037 void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx);
00038 void vc1dsp_init_altivec(DSPContext* c, AVCodecContext *avctx);
00039 void snow_init_altivec(DSPContext* c, AVCodecContext *avctx);
00040 void float_init_altivec(DSPContext* c, AVCodecContext *avctx);
00041 void int_init_altivec(DSPContext* c, AVCodecContext *avctx);
00042
00043 int mm_flags = 0;
00044
00045 int mm_support(void)
00046 {
00047 int result = 0;
00048 #if HAVE_ALTIVEC
00049 if (has_altivec()) {
00050 result |= FF_MM_ALTIVEC;
00051 }
00052 #endif
00053 return result;
00054 }
00055
00056 #if CONFIG_POWERPC_PERF
00057 unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
00058
00059 static unsigned char* perfname[] = {
00060 "ff_fft_calc_altivec",
00061 "gmc1_altivec",
00062 "dct_unquantize_h263_altivec",
00063 "fdct_altivec",
00064 "idct_add_altivec",
00065 "idct_put_altivec",
00066 "put_pixels16_altivec",
00067 "avg_pixels16_altivec",
00068 "avg_pixels8_altivec",
00069 "put_pixels8_xy2_altivec",
00070 "put_no_rnd_pixels8_xy2_altivec",
00071 "put_pixels16_xy2_altivec",
00072 "put_no_rnd_pixels16_xy2_altivec",
00073 "hadamard8_diff8x8_altivec",
00074 "hadamard8_diff16_altivec",
00075 "avg_pixels8_xy2_altivec",
00076 "clear_blocks_dcbz32_ppc",
00077 "clear_blocks_dcbz128_ppc",
00078 "put_h264_chroma_mc8_altivec",
00079 "avg_h264_chroma_mc8_altivec",
00080 "put_h264_qpel16_h_lowpass_altivec",
00081 "avg_h264_qpel16_h_lowpass_altivec",
00082 "put_h264_qpel16_v_lowpass_altivec",
00083 "avg_h264_qpel16_v_lowpass_altivec",
00084 "put_h264_qpel16_hv_lowpass_altivec",
00085 "avg_h264_qpel16_hv_lowpass_altivec",
00086 ""
00087 };
00088 #include <stdio.h>
00089 #endif
00090
00091 #if CONFIG_POWERPC_PERF
00092 void powerpc_display_perf_report(void)
00093 {
00094 int i, j;
00095 av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
00096 for(i = 0 ; i < powerpc_perf_total ; i++) {
00097 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
00098 if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
00099 av_log(NULL, AV_LOG_INFO,
00100 " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n",
00101 perfname[i],
00102 j+1,
00103 perfdata[j][i][powerpc_data_min],
00104 perfdata[j][i][powerpc_data_max],
00105 (double)perfdata[j][i][powerpc_data_sum] /
00106 (double)perfdata[j][i][powerpc_data_num],
00107 perfdata[j][i][powerpc_data_num]);
00108 }
00109 }
00110 }
00111 #endif
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133 void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
00134 {
00135 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1);
00136 register int misal = ((unsigned long)blocks & 0x00000010);
00137 register int i = 0;
00138 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
00139 #if 1
00140 if (misal) {
00141 ((unsigned long*)blocks)[0] = 0L;
00142 ((unsigned long*)blocks)[1] = 0L;
00143 ((unsigned long*)blocks)[2] = 0L;
00144 ((unsigned long*)blocks)[3] = 0L;
00145 i += 16;
00146 }
00147 for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
00148 __asm__ volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
00149 }
00150 if (misal) {
00151 ((unsigned long*)blocks)[188] = 0L;
00152 ((unsigned long*)blocks)[189] = 0L;
00153 ((unsigned long*)blocks)[190] = 0L;
00154 ((unsigned long*)blocks)[191] = 0L;
00155 i += 16;
00156 }
00157 #else
00158 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00159 #endif
00160 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
00161 }
00162
00163
00164
00165 #if HAVE_DCBZL
00166 void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00167 {
00168 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1);
00169 register int misal = ((unsigned long)blocks & 0x0000007f);
00170 register int i = 0;
00171 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
00172 #if 1
00173 if (misal) {
00174
00175
00176
00177 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00178 }
00179 else
00180 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
00181 __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
00182 }
00183 #else
00184 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00185 #endif
00186 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
00187 }
00188 #else
00189 void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00190 {
00191 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00192 }
00193 #endif
00194
00195 #if HAVE_DCBZL
00196
00197
00198
00199
00200
00201 long check_dcbzl_effect(void)
00202 {
00203 register char *fakedata = av_malloc(1024);
00204 register char *fakedata_middle;
00205 register long zero = 0;
00206 register long i = 0;
00207 long count = 0;
00208
00209 if (!fakedata) {
00210 return 0L;
00211 }
00212
00213 fakedata_middle = (fakedata + 512);
00214
00215 memset(fakedata, 0xFF, 1024);
00216
00217
00218
00219 __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
00220
00221 for (i = 0; i < 1024 ; i ++) {
00222 if (fakedata[i] == (char)0)
00223 count++;
00224 }
00225
00226 av_free(fakedata);
00227
00228 return count;
00229 }
00230 #else
00231 long check_dcbzl_effect(void)
00232 {
00233 return 0;
00234 }
00235 #endif
00236
00237 static void prefetch_ppc(void *mem, int stride, int h)
00238 {
00239 register const uint8_t *p = mem;
00240 do {
00241 __asm__ volatile ("dcbt 0,%0" : : "r" (p));
00242 p+= stride;
00243 } while(--h);
00244 }
00245
00246 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
00247 {
00248
00249 c->prefetch = prefetch_ppc;
00250 switch (check_dcbzl_effect()) {
00251 case 32:
00252 c->clear_blocks = clear_blocks_dcbz32_ppc;
00253 break;
00254 case 128:
00255 c->clear_blocks = clear_blocks_dcbz128_ppc;
00256 break;
00257 default:
00258 break;
00259 }
00260
00261 #if HAVE_ALTIVEC
00262 if(CONFIG_H264_DECODER) dsputil_h264_init_ppc(c, avctx);
00263
00264 if (has_altivec()) {
00265 mm_flags |= FF_MM_ALTIVEC;
00266
00267 dsputil_init_altivec(c, avctx);
00268 if(CONFIG_SNOW_DECODER) snow_init_altivec(c, avctx);
00269 if(CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER)
00270 vc1dsp_init_altivec(c, avctx);
00271 float_init_altivec(c, avctx);
00272 int_init_altivec(c, avctx);
00273 c->gmc1 = gmc1_altivec;
00274
00275 #if CONFIG_ENCODERS
00276 if (avctx->dct_algo == FF_DCT_AUTO ||
00277 avctx->dct_algo == FF_DCT_ALTIVEC) {
00278 c->fdct = fdct_altivec;
00279 }
00280 #endif //CONFIG_ENCODERS
00281
00282 if (avctx->lowres==0) {
00283 if ((avctx->idct_algo == FF_IDCT_AUTO) ||
00284 (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
00285 c->idct_put = idct_put_altivec;
00286 c->idct_add = idct_add_altivec;
00287 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
00288 }
00289 }
00290
00291 #if CONFIG_POWERPC_PERF
00292 {
00293 int i, j;
00294 for (i = 0 ; i < powerpc_perf_total ; i++) {
00295 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
00296 perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
00297 perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
00298 perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
00299 perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
00300 }
00301 }
00302 }
00303 #endif
00304 }
00305 #endif
00306 }