00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 #include "dsputil.h"
00029
00030
00033 static void vc1_v_overlap_c(uint8_t* src, int stride)
00034 {
00035 int i;
00036 int a, b, c, d;
00037 int d1, d2;
00038 int rnd = 1;
00039 for(i = 0; i < 8; i++) {
00040 a = src[-2*stride];
00041 b = src[-stride];
00042 c = src[0];
00043 d = src[stride];
00044 d1 = (a - d + 3 + rnd) >> 3;
00045 d2 = (a - d + b - c + 4 - rnd) >> 3;
00046
00047 src[-2*stride] = a - d1;
00048 src[-stride] = av_clip_uint8(b - d2);
00049 src[0] = av_clip_uint8(c + d2);
00050 src[stride] = d + d1;
00051 src++;
00052 rnd = !rnd;
00053 }
00054 }
00055
00058 static void vc1_h_overlap_c(uint8_t* src, int stride)
00059 {
00060 int i;
00061 int a, b, c, d;
00062 int d1, d2;
00063 int rnd = 1;
00064 for(i = 0; i < 8; i++) {
00065 a = src[-2];
00066 b = src[-1];
00067 c = src[0];
00068 d = src[1];
00069 d1 = (a - d + 3 + rnd) >> 3;
00070 d2 = (a - d + b - c + 4 - rnd) >> 3;
00071
00072 src[-2] = a - d1;
00073 src[-1] = av_clip_uint8(b - d2);
00074 src[0] = av_clip_uint8(c + d2);
00075 src[1] = d + d1;
00076 src += stride;
00077 rnd = !rnd;
00078 }
00079 }
00080
00081
00084 static void vc1_inv_trans_8x8_c(DCTELEM block[64])
00085 {
00086 int i;
00087 register int t1,t2,t3,t4,t5,t6,t7,t8;
00088 DCTELEM *src, *dst;
00089
00090 src = block;
00091 dst = block;
00092 for(i = 0; i < 8; i++){
00093 t1 = 12 * (src[0] + src[4]) + 4;
00094 t2 = 12 * (src[0] - src[4]) + 4;
00095 t3 = 16 * src[2] + 6 * src[6];
00096 t4 = 6 * src[2] - 16 * src[6];
00097
00098 t5 = t1 + t3;
00099 t6 = t2 + t4;
00100 t7 = t2 - t4;
00101 t8 = t1 - t3;
00102
00103 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7];
00104 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7];
00105 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7];
00106 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7];
00107
00108 dst[0] = (t5 + t1) >> 3;
00109 dst[1] = (t6 + t2) >> 3;
00110 dst[2] = (t7 + t3) >> 3;
00111 dst[3] = (t8 + t4) >> 3;
00112 dst[4] = (t8 - t4) >> 3;
00113 dst[5] = (t7 - t3) >> 3;
00114 dst[6] = (t6 - t2) >> 3;
00115 dst[7] = (t5 - t1) >> 3;
00116
00117 src += 8;
00118 dst += 8;
00119 }
00120
00121 src = block;
00122 dst = block;
00123 for(i = 0; i < 8; i++){
00124 t1 = 12 * (src[ 0] + src[32]) + 64;
00125 t2 = 12 * (src[ 0] - src[32]) + 64;
00126 t3 = 16 * src[16] + 6 * src[48];
00127 t4 = 6 * src[16] - 16 * src[48];
00128
00129 t5 = t1 + t3;
00130 t6 = t2 + t4;
00131 t7 = t2 - t4;
00132 t8 = t1 - t3;
00133
00134 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
00135 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
00136 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
00137 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
00138
00139 dst[ 0] = (t5 + t1) >> 7;
00140 dst[ 8] = (t6 + t2) >> 7;
00141 dst[16] = (t7 + t3) >> 7;
00142 dst[24] = (t8 + t4) >> 7;
00143 dst[32] = (t8 - t4 + 1) >> 7;
00144 dst[40] = (t7 - t3 + 1) >> 7;
00145 dst[48] = (t6 - t2 + 1) >> 7;
00146 dst[56] = (t5 - t1 + 1) >> 7;
00147
00148 src++;
00149 dst++;
00150 }
00151 }
00152
00155 static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, DCTELEM *block)
00156 {
00157 int i;
00158 register int t1,t2,t3,t4,t5,t6,t7,t8;
00159 DCTELEM *src, *dst;
00160 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00161
00162 src = block;
00163 dst = block;
00164 for(i = 0; i < 4; i++){
00165 t1 = 12 * (src[0] + src[4]) + 4;
00166 t2 = 12 * (src[0] - src[4]) + 4;
00167 t3 = 16 * src[2] + 6 * src[6];
00168 t4 = 6 * src[2] - 16 * src[6];
00169
00170 t5 = t1 + t3;
00171 t6 = t2 + t4;
00172 t7 = t2 - t4;
00173 t8 = t1 - t3;
00174
00175 t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7];
00176 t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7];
00177 t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7];
00178 t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7];
00179
00180 dst[0] = (t5 + t1) >> 3;
00181 dst[1] = (t6 + t2) >> 3;
00182 dst[2] = (t7 + t3) >> 3;
00183 dst[3] = (t8 + t4) >> 3;
00184 dst[4] = (t8 - t4) >> 3;
00185 dst[5] = (t7 - t3) >> 3;
00186 dst[6] = (t6 - t2) >> 3;
00187 dst[7] = (t5 - t1) >> 3;
00188
00189 src += 8;
00190 dst += 8;
00191 }
00192
00193 src = block;
00194 for(i = 0; i < 8; i++){
00195 t1 = 17 * (src[ 0] + src[16]) + 64;
00196 t2 = 17 * (src[ 0] - src[16]) + 64;
00197 t3 = 22 * src[ 8] + 10 * src[24];
00198 t4 = 22 * src[24] - 10 * src[ 8];
00199
00200 dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3) >> 7)];
00201 dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4) >> 7)];
00202 dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4) >> 7)];
00203 dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3) >> 7)];
00204
00205 src ++;
00206 dest++;
00207 }
00208 }
00209
00212 static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, DCTELEM *block)
00213 {
00214 int i;
00215 register int t1,t2,t3,t4,t5,t6,t7,t8;
00216 DCTELEM *src, *dst;
00217 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00218
00219 src = block;
00220 dst = block;
00221 for(i = 0; i < 8; i++){
00222 t1 = 17 * (src[0] + src[2]) + 4;
00223 t2 = 17 * (src[0] - src[2]) + 4;
00224 t3 = 22 * src[1] + 10 * src[3];
00225 t4 = 22 * src[3] - 10 * src[1];
00226
00227 dst[0] = (t1 + t3) >> 3;
00228 dst[1] = (t2 - t4) >> 3;
00229 dst[2] = (t2 + t4) >> 3;
00230 dst[3] = (t1 - t3) >> 3;
00231
00232 src += 8;
00233 dst += 8;
00234 }
00235
00236 src = block;
00237 for(i = 0; i < 4; i++){
00238 t1 = 12 * (src[ 0] + src[32]) + 64;
00239 t2 = 12 * (src[ 0] - src[32]) + 64;
00240 t3 = 16 * src[16] + 6 * src[48];
00241 t4 = 6 * src[16] - 16 * src[48];
00242
00243 t5 = t1 + t3;
00244 t6 = t2 + t4;
00245 t7 = t2 - t4;
00246 t8 = t1 - t3;
00247
00248 t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
00249 t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
00250 t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
00251 t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
00252
00253 dest[0*linesize] = cm[dest[0*linesize] + ((t5 + t1) >> 7)];
00254 dest[1*linesize] = cm[dest[1*linesize] + ((t6 + t2) >> 7)];
00255 dest[2*linesize] = cm[dest[2*linesize] + ((t7 + t3) >> 7)];
00256 dest[3*linesize] = cm[dest[3*linesize] + ((t8 + t4) >> 7)];
00257 dest[4*linesize] = cm[dest[4*linesize] + ((t8 - t4 + 1) >> 7)];
00258 dest[5*linesize] = cm[dest[5*linesize] + ((t7 - t3 + 1) >> 7)];
00259 dest[6*linesize] = cm[dest[6*linesize] + ((t6 - t2 + 1) >> 7)];
00260 dest[7*linesize] = cm[dest[7*linesize] + ((t5 - t1 + 1) >> 7)];
00261
00262 src ++;
00263 dest++;
00264 }
00265 }
00266
00269 static void vc1_inv_trans_4x4_c(uint8_t *dest, int linesize, DCTELEM *block)
00270 {
00271 int i;
00272 register int t1,t2,t3,t4;
00273 DCTELEM *src, *dst;
00274 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00275
00276 src = block;
00277 dst = block;
00278 for(i = 0; i < 4; i++){
00279 t1 = 17 * (src[0] + src[2]) + 4;
00280 t2 = 17 * (src[0] - src[2]) + 4;
00281 t3 = 22 * src[1] + 10 * src[3];
00282 t4 = 22 * src[3] - 10 * src[1];
00283
00284 dst[0] = (t1 + t3) >> 3;
00285 dst[1] = (t2 - t4) >> 3;
00286 dst[2] = (t2 + t4) >> 3;
00287 dst[3] = (t1 - t3) >> 3;
00288
00289 src += 8;
00290 dst += 8;
00291 }
00292
00293 src = block;
00294 for(i = 0; i < 4; i++){
00295 t1 = 17 * (src[ 0] + src[16]) + 64;
00296 t2 = 17 * (src[ 0] - src[16]) + 64;
00297 t3 = 22 * src[ 8] + 10 * src[24];
00298 t4 = 22 * src[24] - 10 * src[ 8];
00299
00300 dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3) >> 7)];
00301 dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4) >> 7)];
00302 dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4) >> 7)];
00303 dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3) >> 7)];
00304
00305 src ++;
00306 dest++;
00307 }
00308 }
00309
00310
00312 #define VC1_MSPEL_FILTER_16B(DIR, TYPE) \
00313 static av_always_inline int vc1_mspel_ ## DIR ## _filter_16bits(const TYPE *src, int stride, int mode) \
00314 { \
00315 switch(mode){ \
00316 case 0: \
00317 return 0; \
00318 case 1: \
00319 return -4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2]; \
00320 case 2: \
00321 return -src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2]; \
00322 case 3: \
00323 return -3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2]; \
00324 } \
00325 return 0; \
00326 }
00327
00328 VC1_MSPEL_FILTER_16B(ver, uint8_t);
00329 VC1_MSPEL_FILTER_16B(hor, int16_t);
00330
00331
00334 static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r)
00335 {
00336 switch(mode){
00337 case 0:
00338 return src[0];
00339 case 1:
00340 return (-4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2] + 32 - r) >> 6;
00341 case 2:
00342 return (-src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2] + 8 - r) >> 4;
00343 case 3:
00344 return (-3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2] + 32 - r) >> 6;
00345 }
00346 return 0;
00347 }
00348
00351 static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd)
00352 {
00353 int i, j;
00354
00355 if (vmode) {
00356 int r;
00357
00358 if (hmode) {
00359 static const int shift_value[] = { 0, 5, 1, 5 };
00360 int shift = (shift_value[hmode]+shift_value[vmode])>>1;
00361 int16_t tmp[11*8], *tptr = tmp;
00362
00363 r = (1<<(shift-1)) + rnd-1;
00364
00365 src -= 1;
00366 for(j = 0; j < 8; j++) {
00367 for(i = 0; i < 11; i++)
00368 tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode)+r)>>shift;
00369 src += stride;
00370 tptr += 11;
00371 }
00372
00373 r = 64-rnd;
00374 tptr = tmp+1;
00375 for(j = 0; j < 8; j++) {
00376 for(i = 0; i < 8; i++)
00377 dst[i] = av_clip_uint8((vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode)+r)>>7);
00378 dst += stride;
00379 tptr += 11;
00380 }
00381
00382 return;
00383 }
00384 else {
00385 r = 1-rnd;
00386
00387 for(j = 0; j < 8; j++) {
00388 for(i = 0; i < 8; i++)
00389 dst[i] = av_clip_uint8(vc1_mspel_filter(src + i, stride, vmode, r));
00390 src += stride;
00391 dst += stride;
00392 }
00393 return;
00394 }
00395 }
00396
00397
00398 for(j = 0; j < 8; j++) {
00399 for(i = 0; i < 8; i++)
00400 dst[i] = av_clip_uint8(vc1_mspel_filter(src + i, 1, hmode, rnd));
00401 dst += stride;
00402 src += stride;
00403 }
00404 }
00405
00406
00407
00408
00409 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
00410
00411 #define PUT_VC1_MSPEL(a, b)\
00412 static void put_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
00413 vc1_mspel_mc(dst, src, stride, a, b, rnd); \
00414 }
00415
00416 PUT_VC1_MSPEL(1, 0)
00417 PUT_VC1_MSPEL(2, 0)
00418 PUT_VC1_MSPEL(3, 0)
00419
00420 PUT_VC1_MSPEL(0, 1)
00421 PUT_VC1_MSPEL(1, 1)
00422 PUT_VC1_MSPEL(2, 1)
00423 PUT_VC1_MSPEL(3, 1)
00424
00425 PUT_VC1_MSPEL(0, 2)
00426 PUT_VC1_MSPEL(1, 2)
00427 PUT_VC1_MSPEL(2, 2)
00428 PUT_VC1_MSPEL(3, 2)
00429
00430 PUT_VC1_MSPEL(0, 3)
00431 PUT_VC1_MSPEL(1, 3)
00432 PUT_VC1_MSPEL(2, 3)
00433 PUT_VC1_MSPEL(3, 3)
00434
00435 void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) {
00436 dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c;
00437 dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c;
00438 dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c;
00439 dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c;
00440 dsp->vc1_h_overlap = vc1_h_overlap_c;
00441 dsp->vc1_v_overlap = vc1_v_overlap_c;
00442
00443 dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_c;
00444 dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_c;
00445 dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_c;
00446 dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_c;
00447 dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_c;
00448 dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_c;
00449 dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_c;
00450 dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_c;
00451 dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_c;
00452 dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_c;
00453 dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_c;
00454 dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_c;
00455 dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_c;
00456 dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_c;
00457 dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_c;
00458 dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_c;
00459 }