00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include "libavutil/x86_cpu.h"
00026 #include "libavcodec/avcodec.h"
00027 #include "libavcodec/dsputil.h"
00028 #include "libavcodec/mpegvideo.h"
00029 #include "dsputil_mmx.h"
00030
00031 extern uint16_t inv_zigzag_direct16[64];
00032
00033
00034 static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
00035 DCTELEM *block, int n, int qscale)
00036 {
00037 x86_reg level, qmul, qadd, nCoeffs;
00038
00039 qmul = qscale << 1;
00040
00041 assert(s->block_last_index[n]>=0 || s->h263_aic);
00042
00043 if (!s->h263_aic) {
00044 if (n < 4)
00045 level = block[0] * s->y_dc_scale;
00046 else
00047 level = block[0] * s->c_dc_scale;
00048 qadd = (qscale - 1) | 1;
00049 }else{
00050 qadd = 0;
00051 level= block[0];
00052 }
00053 if(s->ac_pred)
00054 nCoeffs=63;
00055 else
00056 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
00057
00058 __asm__ volatile(
00059 "movd %1, %%mm6 \n\t"
00060 "packssdw %%mm6, %%mm6 \n\t"
00061 "packssdw %%mm6, %%mm6 \n\t"
00062 "movd %2, %%mm5 \n\t"
00063 "pxor %%mm7, %%mm7 \n\t"
00064 "packssdw %%mm5, %%mm5 \n\t"
00065 "packssdw %%mm5, %%mm5 \n\t"
00066 "psubw %%mm5, %%mm7 \n\t"
00067 "pxor %%mm4, %%mm4 \n\t"
00068 ASMALIGN(4)
00069 "1: \n\t"
00070 "movq (%0, %3), %%mm0 \n\t"
00071 "movq 8(%0, %3), %%mm1 \n\t"
00072
00073 "pmullw %%mm6, %%mm0 \n\t"
00074 "pmullw %%mm6, %%mm1 \n\t"
00075
00076 "movq (%0, %3), %%mm2 \n\t"
00077 "movq 8(%0, %3), %%mm3 \n\t"
00078
00079 "pcmpgtw %%mm4, %%mm2 \n\t"
00080 "pcmpgtw %%mm4, %%mm3 \n\t"
00081
00082 "pxor %%mm2, %%mm0 \n\t"
00083 "pxor %%mm3, %%mm1 \n\t"
00084
00085 "paddw %%mm7, %%mm0 \n\t"
00086 "paddw %%mm7, %%mm1 \n\t"
00087
00088 "pxor %%mm0, %%mm2 \n\t"
00089 "pxor %%mm1, %%mm3 \n\t"
00090
00091 "pcmpeqw %%mm7, %%mm0 \n\t"
00092 "pcmpeqw %%mm7, %%mm1 \n\t"
00093
00094 "pandn %%mm2, %%mm0 \n\t"
00095 "pandn %%mm3, %%mm1 \n\t"
00096
00097 "movq %%mm0, (%0, %3) \n\t"
00098 "movq %%mm1, 8(%0, %3) \n\t"
00099
00100 "add $16, %3 \n\t"
00101 "jng 1b \n\t"
00102 ::"r" (block+nCoeffs), "rm"(qmul), "rm" (qadd), "r" (2*(-nCoeffs))
00103 : "memory"
00104 );
00105 block[0]= level;
00106 }
00107
00108
00109 static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
00110 DCTELEM *block, int n, int qscale)
00111 {
00112 x86_reg qmul, qadd, nCoeffs;
00113
00114 qmul = qscale << 1;
00115 qadd = (qscale - 1) | 1;
00116
00117 assert(s->block_last_index[n]>=0 || s->h263_aic);
00118
00119 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
00120
00121 __asm__ volatile(
00122 "movd %1, %%mm6 \n\t"
00123 "packssdw %%mm6, %%mm6 \n\t"
00124 "packssdw %%mm6, %%mm6 \n\t"
00125 "movd %2, %%mm5 \n\t"
00126 "pxor %%mm7, %%mm7 \n\t"
00127 "packssdw %%mm5, %%mm5 \n\t"
00128 "packssdw %%mm5, %%mm5 \n\t"
00129 "psubw %%mm5, %%mm7 \n\t"
00130 "pxor %%mm4, %%mm4 \n\t"
00131 ASMALIGN(4)
00132 "1: \n\t"
00133 "movq (%0, %3), %%mm0 \n\t"
00134 "movq 8(%0, %3), %%mm1 \n\t"
00135
00136 "pmullw %%mm6, %%mm0 \n\t"
00137 "pmullw %%mm6, %%mm1 \n\t"
00138
00139 "movq (%0, %3), %%mm2 \n\t"
00140 "movq 8(%0, %3), %%mm3 \n\t"
00141
00142 "pcmpgtw %%mm4, %%mm2 \n\t"
00143 "pcmpgtw %%mm4, %%mm3 \n\t"
00144
00145 "pxor %%mm2, %%mm0 \n\t"
00146 "pxor %%mm3, %%mm1 \n\t"
00147
00148 "paddw %%mm7, %%mm0 \n\t"
00149 "paddw %%mm7, %%mm1 \n\t"
00150
00151 "pxor %%mm0, %%mm2 \n\t"
00152 "pxor %%mm1, %%mm3 \n\t"
00153
00154 "pcmpeqw %%mm7, %%mm0 \n\t"
00155 "pcmpeqw %%mm7, %%mm1 \n\t"
00156
00157 "pandn %%mm2, %%mm0 \n\t"
00158 "pandn %%mm3, %%mm1 \n\t"
00159
00160 "movq %%mm0, (%0, %3) \n\t"
00161 "movq %%mm1, 8(%0, %3) \n\t"
00162
00163 "add $16, %3 \n\t"
00164 "jng 1b \n\t"
00165 ::"r" (block+nCoeffs), "rm"(qmul), "rm" (qadd), "r" (2*(-nCoeffs))
00166 : "memory"
00167 );
00168 }
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200 static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
00201 DCTELEM *block, int n, int qscale)
00202 {
00203 x86_reg nCoeffs;
00204 const uint16_t *quant_matrix;
00205 int block0;
00206
00207 assert(s->block_last_index[n]>=0);
00208
00209 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
00210
00211 if (n < 4)
00212 block0 = block[0] * s->y_dc_scale;
00213 else
00214 block0 = block[0] * s->c_dc_scale;
00215
00216 quant_matrix = s->intra_matrix;
00217 __asm__ volatile(
00218 "pcmpeqw %%mm7, %%mm7 \n\t"
00219 "psrlw $15, %%mm7 \n\t"
00220 "movd %2, %%mm6 \n\t"
00221 "packssdw %%mm6, %%mm6 \n\t"
00222 "packssdw %%mm6, %%mm6 \n\t"
00223 "mov %3, %%"REG_a" \n\t"
00224 ASMALIGN(4)
00225 "1: \n\t"
00226 "movq (%0, %%"REG_a"), %%mm0 \n\t"
00227 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
00228 "movq (%1, %%"REG_a"), %%mm4 \n\t"
00229 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
00230 "pmullw %%mm6, %%mm4 \n\t"
00231 "pmullw %%mm6, %%mm5 \n\t"
00232 "pxor %%mm2, %%mm2 \n\t"
00233 "pxor %%mm3, %%mm3 \n\t"
00234 "pcmpgtw %%mm0, %%mm2 \n\t"
00235 "pcmpgtw %%mm1, %%mm3 \n\t"
00236 "pxor %%mm2, %%mm0 \n\t"
00237 "pxor %%mm3, %%mm1 \n\t"
00238 "psubw %%mm2, %%mm0 \n\t"
00239 "psubw %%mm3, %%mm1 \n\t"
00240 "pmullw %%mm4, %%mm0 \n\t"
00241 "pmullw %%mm5, %%mm1 \n\t"
00242 "pxor %%mm4, %%mm4 \n\t"
00243 "pxor %%mm5, %%mm5 \n\t"
00244 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t"
00245 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t"
00246 "psraw $3, %%mm0 \n\t"
00247 "psraw $3, %%mm1 \n\t"
00248 "psubw %%mm7, %%mm0 \n\t"
00249 "psubw %%mm7, %%mm1 \n\t"
00250 "por %%mm7, %%mm0 \n\t"
00251 "por %%mm7, %%mm1 \n\t"
00252 "pxor %%mm2, %%mm0 \n\t"
00253 "pxor %%mm3, %%mm1 \n\t"
00254 "psubw %%mm2, %%mm0 \n\t"
00255 "psubw %%mm3, %%mm1 \n\t"
00256 "pandn %%mm0, %%mm4 \n\t"
00257 "pandn %%mm1, %%mm5 \n\t"
00258 "movq %%mm4, (%0, %%"REG_a") \n\t"
00259 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
00260
00261 "add $16, %%"REG_a" \n\t"
00262 "js 1b \n\t"
00263 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
00264 : "%"REG_a, "memory"
00265 );
00266 block[0]= block0;
00267 }
00268
00269 static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
00270 DCTELEM *block, int n, int qscale)
00271 {
00272 x86_reg nCoeffs;
00273 const uint16_t *quant_matrix;
00274
00275 assert(s->block_last_index[n]>=0);
00276
00277 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
00278
00279 quant_matrix = s->inter_matrix;
00280 __asm__ volatile(
00281 "pcmpeqw %%mm7, %%mm7 \n\t"
00282 "psrlw $15, %%mm7 \n\t"
00283 "movd %2, %%mm6 \n\t"
00284 "packssdw %%mm6, %%mm6 \n\t"
00285 "packssdw %%mm6, %%mm6 \n\t"
00286 "mov %3, %%"REG_a" \n\t"
00287 ASMALIGN(4)
00288 "1: \n\t"
00289 "movq (%0, %%"REG_a"), %%mm0 \n\t"
00290 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
00291 "movq (%1, %%"REG_a"), %%mm4 \n\t"
00292 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
00293 "pmullw %%mm6, %%mm4 \n\t"
00294 "pmullw %%mm6, %%mm5 \n\t"
00295 "pxor %%mm2, %%mm2 \n\t"
00296 "pxor %%mm3, %%mm3 \n\t"
00297 "pcmpgtw %%mm0, %%mm2 \n\t"
00298 "pcmpgtw %%mm1, %%mm3 \n\t"
00299 "pxor %%mm2, %%mm0 \n\t"
00300 "pxor %%mm3, %%mm1 \n\t"
00301 "psubw %%mm2, %%mm0 \n\t"
00302 "psubw %%mm3, %%mm1 \n\t"
00303 "paddw %%mm0, %%mm0 \n\t"
00304 "paddw %%mm1, %%mm1 \n\t"
00305 "paddw %%mm7, %%mm0 \n\t"
00306 "paddw %%mm7, %%mm1 \n\t"
00307 "pmullw %%mm4, %%mm0 \n\t"
00308 "pmullw %%mm5, %%mm1 \n\t"
00309 "pxor %%mm4, %%mm4 \n\t"
00310 "pxor %%mm5, %%mm5 \n\t"
00311 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t"
00312 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t"
00313 "psraw $4, %%mm0 \n\t"
00314 "psraw $4, %%mm1 \n\t"
00315 "psubw %%mm7, %%mm0 \n\t"
00316 "psubw %%mm7, %%mm1 \n\t"
00317 "por %%mm7, %%mm0 \n\t"
00318 "por %%mm7, %%mm1 \n\t"
00319 "pxor %%mm2, %%mm0 \n\t"
00320 "pxor %%mm3, %%mm1 \n\t"
00321 "psubw %%mm2, %%mm0 \n\t"
00322 "psubw %%mm3, %%mm1 \n\t"
00323 "pandn %%mm0, %%mm4 \n\t"
00324 "pandn %%mm1, %%mm5 \n\t"
00325 "movq %%mm4, (%0, %%"REG_a") \n\t"
00326 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
00327
00328 "add $16, %%"REG_a" \n\t"
00329 "js 1b \n\t"
00330 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
00331 : "%"REG_a, "memory"
00332 );
00333 }
00334
00335 static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
00336 DCTELEM *block, int n, int qscale)
00337 {
00338 x86_reg nCoeffs;
00339 const uint16_t *quant_matrix;
00340 int block0;
00341
00342 assert(s->block_last_index[n]>=0);
00343
00344 if(s->alternate_scan) nCoeffs= 63;
00345 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
00346
00347 if (n < 4)
00348 block0 = block[0] * s->y_dc_scale;
00349 else
00350 block0 = block[0] * s->c_dc_scale;
00351 quant_matrix = s->intra_matrix;
00352 __asm__ volatile(
00353 "pcmpeqw %%mm7, %%mm7 \n\t"
00354 "psrlw $15, %%mm7 \n\t"
00355 "movd %2, %%mm6 \n\t"
00356 "packssdw %%mm6, %%mm6 \n\t"
00357 "packssdw %%mm6, %%mm6 \n\t"
00358 "mov %3, %%"REG_a" \n\t"
00359 ASMALIGN(4)
00360 "1: \n\t"
00361 "movq (%0, %%"REG_a"), %%mm0 \n\t"
00362 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
00363 "movq (%1, %%"REG_a"), %%mm4 \n\t"
00364 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
00365 "pmullw %%mm6, %%mm4 \n\t"
00366 "pmullw %%mm6, %%mm5 \n\t"
00367 "pxor %%mm2, %%mm2 \n\t"
00368 "pxor %%mm3, %%mm3 \n\t"
00369 "pcmpgtw %%mm0, %%mm2 \n\t"
00370 "pcmpgtw %%mm1, %%mm3 \n\t"
00371 "pxor %%mm2, %%mm0 \n\t"
00372 "pxor %%mm3, %%mm1 \n\t"
00373 "psubw %%mm2, %%mm0 \n\t"
00374 "psubw %%mm3, %%mm1 \n\t"
00375 "pmullw %%mm4, %%mm0 \n\t"
00376 "pmullw %%mm5, %%mm1 \n\t"
00377 "pxor %%mm4, %%mm4 \n\t"
00378 "pxor %%mm5, %%mm5 \n\t"
00379 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t"
00380 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t"
00381 "psraw $3, %%mm0 \n\t"
00382 "psraw $3, %%mm1 \n\t"
00383 "pxor %%mm2, %%mm0 \n\t"
00384 "pxor %%mm3, %%mm1 \n\t"
00385 "psubw %%mm2, %%mm0 \n\t"
00386 "psubw %%mm3, %%mm1 \n\t"
00387 "pandn %%mm0, %%mm4 \n\t"
00388 "pandn %%mm1, %%mm5 \n\t"
00389 "movq %%mm4, (%0, %%"REG_a") \n\t"
00390 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
00391
00392 "add $16, %%"REG_a" \n\t"
00393 "jng 1b \n\t"
00394 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
00395 : "%"REG_a, "memory"
00396 );
00397 block[0]= block0;
00398
00399 }
00400
00401 static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
00402 DCTELEM *block, int n, int qscale)
00403 {
00404 x86_reg nCoeffs;
00405 const uint16_t *quant_matrix;
00406
00407 assert(s->block_last_index[n]>=0);
00408
00409 if(s->alternate_scan) nCoeffs= 63;
00410 else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
00411
00412 quant_matrix = s->inter_matrix;
00413 __asm__ volatile(
00414 "pcmpeqw %%mm7, %%mm7 \n\t"
00415 "psrlq $48, %%mm7 \n\t"
00416 "movd %2, %%mm6 \n\t"
00417 "packssdw %%mm6, %%mm6 \n\t"
00418 "packssdw %%mm6, %%mm6 \n\t"
00419 "mov %3, %%"REG_a" \n\t"
00420 ASMALIGN(4)
00421 "1: \n\t"
00422 "movq (%0, %%"REG_a"), %%mm0 \n\t"
00423 "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
00424 "movq (%1, %%"REG_a"), %%mm4 \n\t"
00425 "movq 8(%1, %%"REG_a"), %%mm5 \n\t"
00426 "pmullw %%mm6, %%mm4 \n\t"
00427 "pmullw %%mm6, %%mm5 \n\t"
00428 "pxor %%mm2, %%mm2 \n\t"
00429 "pxor %%mm3, %%mm3 \n\t"
00430 "pcmpgtw %%mm0, %%mm2 \n\t"
00431 "pcmpgtw %%mm1, %%mm3 \n\t"
00432 "pxor %%mm2, %%mm0 \n\t"
00433 "pxor %%mm3, %%mm1 \n\t"
00434 "psubw %%mm2, %%mm0 \n\t"
00435 "psubw %%mm3, %%mm1 \n\t"
00436 "paddw %%mm0, %%mm0 \n\t"
00437 "paddw %%mm1, %%mm1 \n\t"
00438 "pmullw %%mm4, %%mm0 \n\t"
00439 "pmullw %%mm5, %%mm1 \n\t"
00440 "paddw %%mm4, %%mm0 \n\t"
00441 "paddw %%mm5, %%mm1 \n\t"
00442 "pxor %%mm4, %%mm4 \n\t"
00443 "pxor %%mm5, %%mm5 \n\t"
00444 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t"
00445 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t"
00446 "psrlw $4, %%mm0 \n\t"
00447 "psrlw $4, %%mm1 \n\t"
00448 "pxor %%mm2, %%mm0 \n\t"
00449 "pxor %%mm3, %%mm1 \n\t"
00450 "psubw %%mm2, %%mm0 \n\t"
00451 "psubw %%mm3, %%mm1 \n\t"
00452 "pandn %%mm0, %%mm4 \n\t"
00453 "pandn %%mm1, %%mm5 \n\t"
00454 "pxor %%mm4, %%mm7 \n\t"
00455 "pxor %%mm5, %%mm7 \n\t"
00456 "movq %%mm4, (%0, %%"REG_a") \n\t"
00457 "movq %%mm5, 8(%0, %%"REG_a") \n\t"
00458
00459 "add $16, %%"REG_a" \n\t"
00460 "jng 1b \n\t"
00461 "movd 124(%0, %3), %%mm0 \n\t"
00462 "movq %%mm7, %%mm6 \n\t"
00463 "psrlq $32, %%mm7 \n\t"
00464 "pxor %%mm6, %%mm7 \n\t"
00465 "movq %%mm7, %%mm6 \n\t"
00466 "psrlq $16, %%mm7 \n\t"
00467 "pxor %%mm6, %%mm7 \n\t"
00468 "pslld $31, %%mm7 \n\t"
00469 "psrlq $15, %%mm7 \n\t"
00470 "pxor %%mm7, %%mm0 \n\t"
00471 "movd %%mm0, 124(%0, %3) \n\t"
00472
00473 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "r" (-2*nCoeffs)
00474 : "%"REG_a, "memory"
00475 );
00476 }
00477
00478 static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){
00479 const int intra= s->mb_intra;
00480 int *sum= s->dct_error_sum[intra];
00481 uint16_t *offset= s->dct_offset[intra];
00482
00483 s->dct_count[intra]++;
00484
00485 __asm__ volatile(
00486 "pxor %%mm7, %%mm7 \n\t"
00487 "1: \n\t"
00488 "pxor %%mm0, %%mm0 \n\t"
00489 "pxor %%mm1, %%mm1 \n\t"
00490 "movq (%0), %%mm2 \n\t"
00491 "movq 8(%0), %%mm3 \n\t"
00492 "pcmpgtw %%mm2, %%mm0 \n\t"
00493 "pcmpgtw %%mm3, %%mm1 \n\t"
00494 "pxor %%mm0, %%mm2 \n\t"
00495 "pxor %%mm1, %%mm3 \n\t"
00496 "psubw %%mm0, %%mm2 \n\t"
00497 "psubw %%mm1, %%mm3 \n\t"
00498 "movq %%mm2, %%mm4 \n\t"
00499 "movq %%mm3, %%mm5 \n\t"
00500 "psubusw (%2), %%mm2 \n\t"
00501 "psubusw 8(%2), %%mm3 \n\t"
00502 "pxor %%mm0, %%mm2 \n\t"
00503 "pxor %%mm1, %%mm3 \n\t"
00504 "psubw %%mm0, %%mm2 \n\t"
00505 "psubw %%mm1, %%mm3 \n\t"
00506 "movq %%mm2, (%0) \n\t"
00507 "movq %%mm3, 8(%0) \n\t"
00508 "movq %%mm4, %%mm2 \n\t"
00509 "movq %%mm5, %%mm3 \n\t"
00510 "punpcklwd %%mm7, %%mm4 \n\t"
00511 "punpckhwd %%mm7, %%mm2 \n\t"
00512 "punpcklwd %%mm7, %%mm5 \n\t"
00513 "punpckhwd %%mm7, %%mm3 \n\t"
00514 "paddd (%1), %%mm4 \n\t"
00515 "paddd 8(%1), %%mm2 \n\t"
00516 "paddd 16(%1), %%mm5 \n\t"
00517 "paddd 24(%1), %%mm3 \n\t"
00518 "movq %%mm4, (%1) \n\t"
00519 "movq %%mm2, 8(%1) \n\t"
00520 "movq %%mm5, 16(%1) \n\t"
00521 "movq %%mm3, 24(%1) \n\t"
00522 "add $16, %0 \n\t"
00523 "add $32, %1 \n\t"
00524 "add $16, %2 \n\t"
00525 "cmp %3, %0 \n\t"
00526 " jb 1b \n\t"
00527 : "+r" (block), "+r" (sum), "+r" (offset)
00528 : "r"(block+64)
00529 );
00530 }
00531
00532 static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
00533 const int intra= s->mb_intra;
00534 int *sum= s->dct_error_sum[intra];
00535 uint16_t *offset= s->dct_offset[intra];
00536
00537 s->dct_count[intra]++;
00538
00539 __asm__ volatile(
00540 "pxor %%xmm7, %%xmm7 \n\t"
00541 "1: \n\t"
00542 "pxor %%xmm0, %%xmm0 \n\t"
00543 "pxor %%xmm1, %%xmm1 \n\t"
00544 "movdqa (%0), %%xmm2 \n\t"
00545 "movdqa 16(%0), %%xmm3 \n\t"
00546 "pcmpgtw %%xmm2, %%xmm0 \n\t"
00547 "pcmpgtw %%xmm3, %%xmm1 \n\t"
00548 "pxor %%xmm0, %%xmm2 \n\t"
00549 "pxor %%xmm1, %%xmm3 \n\t"
00550 "psubw %%xmm0, %%xmm2 \n\t"
00551 "psubw %%xmm1, %%xmm3 \n\t"
00552 "movdqa %%xmm2, %%xmm4 \n\t"
00553 "movdqa %%xmm3, %%xmm5 \n\t"
00554 "psubusw (%2), %%xmm2 \n\t"
00555 "psubusw 16(%2), %%xmm3 \n\t"
00556 "pxor %%xmm0, %%xmm2 \n\t"
00557 "pxor %%xmm1, %%xmm3 \n\t"
00558 "psubw %%xmm0, %%xmm2 \n\t"
00559 "psubw %%xmm1, %%xmm3 \n\t"
00560 "movdqa %%xmm2, (%0) \n\t"
00561 "movdqa %%xmm3, 16(%0) \n\t"
00562 "movdqa %%xmm4, %%xmm6 \n\t"
00563 "movdqa %%xmm5, %%xmm0 \n\t"
00564 "punpcklwd %%xmm7, %%xmm4 \n\t"
00565 "punpckhwd %%xmm7, %%xmm6 \n\t"
00566 "punpcklwd %%xmm7, %%xmm5 \n\t"
00567 "punpckhwd %%xmm7, %%xmm0 \n\t"
00568 "paddd (%1), %%xmm4 \n\t"
00569 "paddd 16(%1), %%xmm6 \n\t"
00570 "paddd 32(%1), %%xmm5 \n\t"
00571 "paddd 48(%1), %%xmm0 \n\t"
00572 "movdqa %%xmm4, (%1) \n\t"
00573 "movdqa %%xmm6, 16(%1) \n\t"
00574 "movdqa %%xmm5, 32(%1) \n\t"
00575 "movdqa %%xmm0, 48(%1) \n\t"
00576 "add $32, %0 \n\t"
00577 "add $64, %1 \n\t"
00578 "add $32, %2 \n\t"
00579 "cmp %3, %0 \n\t"
00580 " jb 1b \n\t"
00581 : "+r" (block), "+r" (sum), "+r" (offset)
00582 : "r"(block+64)
00583 );
00584 }
00585
00586 #if HAVE_SSSE3
00587 #define HAVE_SSSE3_BAK
00588 #endif
00589 #undef HAVE_SSSE3
00590 #define HAVE_SSSE3 0
00591
00592 #undef HAVE_SSE2
00593 #undef HAVE_MMX2
00594 #define HAVE_SSE2 0
00595 #define HAVE_MMX2 0
00596 #define RENAME(a) a ## _MMX
00597 #define RENAMEl(a) a ## _mmx
00598 #include "mpegvideo_mmx_template.c"
00599
00600 #undef HAVE_MMX2
00601 #define HAVE_MMX2 1
00602 #undef RENAME
00603 #undef RENAMEl
00604 #define RENAME(a) a ## _MMX2
00605 #define RENAMEl(a) a ## _mmx2
00606 #include "mpegvideo_mmx_template.c"
00607
00608 #undef HAVE_SSE2
00609 #define HAVE_SSE2 1
00610 #undef RENAME
00611 #undef RENAMEl
00612 #define RENAME(a) a ## _SSE2
00613 #define RENAMEl(a) a ## _sse2
00614 #include "mpegvideo_mmx_template.c"
00615
00616 #ifdef HAVE_SSSE3_BAK
00617 #undef HAVE_SSSE3
00618 #define HAVE_SSSE3 1
00619 #undef RENAME
00620 #undef RENAMEl
00621 #define RENAME(a) a ## _SSSE3
00622 #define RENAMEl(a) a ## _sse2
00623 #include "mpegvideo_mmx_template.c"
00624 #endif
00625
00626 void MPV_common_init_mmx(MpegEncContext *s)
00627 {
00628 if (mm_flags & FF_MM_MMX) {
00629 const int dct_algo = s->avctx->dct_algo;
00630
00631 s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
00632 s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
00633 s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
00634 s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx;
00635 if(!(s->flags & CODEC_FLAG_BITEXACT))
00636 s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
00637 s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
00638
00639 if (mm_flags & FF_MM_SSE2) {
00640 s->denoise_dct= denoise_dct_sse2;
00641 } else {
00642 s->denoise_dct= denoise_dct_mmx;
00643 }
00644
00645 if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
00646 #if HAVE_SSSE3
00647 if(mm_flags & FF_MM_SSSE3){
00648 s->dct_quantize= dct_quantize_SSSE3;
00649 } else
00650 #endif
00651 if(mm_flags & FF_MM_SSE2){
00652 s->dct_quantize= dct_quantize_SSE2;
00653 } else if(mm_flags & FF_MM_MMXEXT){
00654 s->dct_quantize= dct_quantize_MMX2;
00655 } else {
00656 s->dct_quantize= dct_quantize_MMX;
00657 }
00658 }
00659 }
00660 }