44     0x0200020002000200LL,};
 
   48     0x0004000400040004LL,};
 
   61 DECLARE_ALIGNED(8, 
const uint64_t, ff_bgr2YCoeff)   = 0x000020E540830C8BULL;
 
   62 DECLARE_ALIGNED(8, 
const uint64_t, ff_bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
 
   63 DECLARE_ALIGNED(8, 
const uint64_t, ff_bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
 
   65 DECLARE_ALIGNED(8, 
const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
 
   66 DECLARE_ALIGNED(8, 
const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
 
   73 #define COMPILE_TEMPLATE_MMXEXT 0 
   74 #define RENAME(a) a ## _mmx 
   79 #if HAVE_MMXEXT_INLINE 
   81 #undef COMPILE_TEMPLATE_MMXEXT 
   82 #define COMPILE_TEMPLATE_MMXEXT 1 
   83 #define RENAME(a) a ## _mmxext 
  107     const int firstLumSrcY= vLumFilterPos[
dstY]; 
 
  108     const int firstChrSrcY= vChrFilterPos[chrDstY]; 
 
  116     if (dstY < dstH - 2) {
 
  117         const int16_t **lumSrcPtr= (
const int16_t **)(
void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + 
vLumBufSize;
 
  118         const int16_t **chrUSrcPtr= (
const int16_t **)(
void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + 
vChrBufSize;
 
  119         const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && 
alpPixBuf) ? (
const int16_t **)(
void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
 
  122         if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->
srcH) {
 
  123             const int16_t **tmpY = (
const int16_t **) lumPixBuf + 2 * vLumBufSize;
 
  124             int neg = -firstLumSrcY, i, 
end = 
FFMIN(c->
srcH - firstLumSrcY, vLumFilterSize);
 
  125             for (i = 0; i < neg;            i++)
 
  126                 tmpY[i] = lumSrcPtr[neg];
 
  127             for (     ; i < 
end;            i++)
 
  128                 tmpY[i] = lumSrcPtr[i];
 
  134                 const int16_t **tmpA = (
const int16_t **) alpPixBuf + 2 * vLumBufSize;
 
  135                 for (i = 0; i < neg;            i++)
 
  136                     tmpA[i] = alpSrcPtr[neg];
 
  137                 for (     ; i < 
end;            i++)
 
  138                     tmpA[i] = alpSrcPtr[i];
 
  140                     tmpA[i] = tmpA[i - 1];
 
  144         if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->
chrSrcH) {
 
  145             const int16_t **tmpU = (
const int16_t **) chrUPixBuf + 2 * vChrBufSize;
 
  146             int neg = -firstChrSrcY, i, end = 
FFMIN(c->
chrSrcH - firstChrSrcY, vChrFilterSize);
 
  147             for (i = 0; i < neg;            i++) {
 
  148                 tmpU[i] = chrUSrcPtr[neg];
 
  150             for (     ; i < 
end;            i++) {
 
  151                 tmpU[i] = chrUSrcPtr[i];
 
  154                 tmpU[i] = tmpU[i - 1];
 
  162                 *(
const void**)&lumMmxFilter[s*i              ]= lumSrcPtr[i  ];
 
  163                 *(
const void**)&lumMmxFilter[s*i+
APCK_PTR2/4  ]= lumSrcPtr[i+(vLumFilterSize>1)];
 
  165                 lumMmxFilter[s*i+
APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
 
  166                 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
 
  167                 if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
 
  168                     *(
const void**)&alpMmxFilter[s*i              ]= alpSrcPtr[i  ];
 
  169                     *(
const void**)&alpMmxFilter[s*i+
APCK_PTR2/4  ]= alpSrcPtr[i+(vLumFilterSize>1)];
 
  175                 *(
const void**)&chrMmxFilter[s*i              ]= chrUSrcPtr[i  ];
 
  176                 *(
const void**)&chrMmxFilter[s*i+
APCK_PTR2/4  ]= chrUSrcPtr[i+(vChrFilterSize>1)];
 
  178                 chrMmxFilter[s*i+
APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i    ]
 
  179                 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
 
  183                 *(
const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
 
  186                 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001
U;
 
  187                 if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
 
  188                     *(
const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
 
  190                     alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
 
  194                 *(
const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
 
  197                 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001
U;
 
  204 static void yuv2yuvX_sse3(
const int16_t *
filter, 
int filterSize,
 
  208     if(((
int)dest) & 15){
 
  209         return yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset);
 
  212         __asm__ 
volatile(
"movq       (%0), %%xmm3\n\t" 
  213                          "movdqa    %%xmm3, %%xmm4\n\t" 
  214                          "psrlq       $24, %%xmm3\n\t" 
  215                          "psllq       $40, %%xmm4\n\t" 
  216                          "por       %%xmm4, %%xmm3\n\t" 
  220         __asm__ 
volatile(
"movq       (%0), %%xmm3\n\t" 
  226         "pxor      %%xmm0, %%xmm0\n\t" 
  227         "punpcklbw %%xmm0, %%xmm3\n\t" 
  228         "movd          %0, %%xmm1\n\t" 
  229         "punpcklwd %%xmm1, %%xmm1\n\t" 
  230         "punpckldq %%xmm1, %%xmm1\n\t" 
  231         "punpcklqdq %%xmm1, %%xmm1\n\t" 
  232         "psllw         $3, %%xmm1\n\t" 
  233         "paddw     %%xmm1, %%xmm3\n\t" 
  234         "psraw         $4, %%xmm3\n\t" 
  238         "movdqa    %%xmm3, %%xmm4\n\t" 
  239         "movdqa    %%xmm3, %%xmm7\n\t" 
  241         "mov                                 %0, %%"REG_d
"  \n\t"\
 
  242         "mov                        (%%"REG_d
"), %%"REG_S
"  \n\t"\
 
  245         "movddup                  8(%%"REG_d
"), %%xmm0      \n\t" \
 
  246         "movdqa              (%%"REG_S
", %%"REG_c
", 2), %%xmm2      \n\t" \
 
  247         "movdqa            16(%%"REG_S
", %%"REG_c
", 2), %%xmm5      \n\t" \
 
  248         "add                                $16, %%"REG_d
"  \n\t"\
 
  249         "mov                        (%%"REG_d
"), %%"REG_S
"  \n\t"\
 
  250         "test                         %%"REG_S
", %%"REG_S
"  \n\t"\
 
  251         "pmulhw                           %%xmm0, %%xmm2      \n\t"\
 
  252         "pmulhw                           %%xmm0, %%xmm5      \n\t"\
 
  253         "paddw                            %%xmm2, %%xmm3      \n\t"\
 
  254         "paddw                            %%xmm5, %%xmm4      \n\t"\
 
  256         "psraw                               $3, %%xmm3      \n\t"\
 
  257         "psraw                               $3, %%xmm4      \n\t"\
 
  258         "packuswb                         %%xmm4, %%xmm3      \n\t" 
  259         "movntdq                          %%xmm3, (%1, %%"REG_c
")\n\t" 
  260         "add                         $16, %%"REG_c
"         \n\t"\
 
  261         "cmp                          %2, %%"REG_c
"         \n\t"\
 
  262         "movdqa    %%xmm7, %%xmm3\n\t" 
  263         "movdqa    %%xmm7, %%xmm4\n\t" 
  264         "mov                                 %0, %%"REG_d
"  \n\t"\
 
  265         "mov                        (%%"REG_d
"), %%"REG_S
"  \n\t"\
 
  268            "r" (dest-offset), 
"g" ((
x86_reg)(dstW+offset)), 
"m" (offset)
 
  269         : 
XMM_CLOBBERS(
"%xmm0" , 
"%xmm1" , 
"%xmm2" , 
"%xmm3" , 
"%xmm4" , 
"%xmm5" , 
"%xmm7" ,)
 
  270          "%"REG_d, 
"%"REG_S, 
"%"REG_c
 
  277 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \ 
  278 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \ 
  279                                                 SwsContext *c, int16_t *data, \ 
  280                                                 int dstW, const uint8_t *src, \ 
  281                                                 const int16_t *filter, \ 
  282                                                 const int32_t *filterPos, int filterSize) 
  284 #define SCALE_FUNCS(filter_n, opt) \ 
  285     SCALE_FUNC(filter_n,  8, 15, opt); \ 
  286     SCALE_FUNC(filter_n,  9, 15, opt); \ 
  287     SCALE_FUNC(filter_n, 10, 15, opt); \ 
  288     SCALE_FUNC(filter_n, 12, 15, opt); \ 
  289     SCALE_FUNC(filter_n, 14, 15, opt); \ 
  290     SCALE_FUNC(filter_n, 16, 15, opt); \ 
  291     SCALE_FUNC(filter_n,  8, 19, opt); \ 
  292     SCALE_FUNC(filter_n,  9, 19, opt); \ 
  293     SCALE_FUNC(filter_n, 10, 19, opt); \ 
  294     SCALE_FUNC(filter_n, 12, 19, opt); \ 
  295     SCALE_FUNC(filter_n, 14, 19, opt); \ 
  296     SCALE_FUNC(filter_n, 16, 19, opt) 
  298 #define SCALE_FUNCS_MMX(opt) \ 
  299     SCALE_FUNCS(4, opt); \ 
  300     SCALE_FUNCS(8, opt); \ 
  303 #define SCALE_FUNCS_SSE(opt) \ 
  304     SCALE_FUNCS(4, opt); \ 
  305     SCALE_FUNCS(8, opt); \ 
  306     SCALE_FUNCS(X4, opt); \ 
  316 #define VSCALEX_FUNC(size, opt) \ 
  317 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \ 
  318                                         const int16_t **src, uint8_t *dest, int dstW, \ 
  319                                         const uint8_t *dither, int offset) 
  320 #define VSCALEX_FUNCS(opt) \ 
  321     VSCALEX_FUNC(8,  opt); \ 
  322     VSCALEX_FUNC(9,  opt); \ 
  323     VSCALEX_FUNC(10, opt) 
  333 #define VSCALE_FUNC(size, opt) \ 
  334 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \ 
  335                                         const uint8_t *dither, int offset) 
  336 #define VSCALE_FUNCS(opt1, opt2) \ 
  337     VSCALE_FUNC(8,  opt1); \ 
  338     VSCALE_FUNC(9,  opt2); \ 
  339     VSCALE_FUNC(10, opt2); \ 
  340     VSCALE_FUNC(16, opt1) 
  349 #define INPUT_Y_FUNC(fmt, opt) \ 
  350 void ff_ ## fmt ## ToY_  ## opt(uint8_t *dst, const uint8_t *src, \ 
  351                                 const uint8_t *unused1, const uint8_t *unused2, \ 
  352                                 int w, uint32_t *unused) 
  353 #define INPUT_UV_FUNC(fmt, opt) \ 
  354 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \ 
  355                                 const uint8_t *unused0, \ 
  356                                 const uint8_t *src1, \ 
  357                                 const uint8_t *src2, \ 
  358                                 int w, uint32_t *unused) 
  359 #define INPUT_FUNC(fmt, opt) \ 
  360     INPUT_Y_FUNC(fmt, opt); \ 
  361     INPUT_UV_FUNC(fmt, opt) 
  362 #define INPUT_FUNCS(opt) \ 
  363     INPUT_FUNC(uyvy, opt); \ 
  364     INPUT_FUNC(yuyv, opt); \ 
  365     INPUT_UV_FUNC(nv12, opt); \ 
  366     INPUT_UV_FUNC(nv21, opt); \ 
  367     INPUT_FUNC(rgba, opt); \ 
  368     INPUT_FUNC(bgra, opt); \ 
  369     INPUT_FUNC(argb, opt); \ 
  370     INPUT_FUNC(abgr, opt); \ 
  371     INPUT_FUNC(rgb24, opt); \ 
  372     INPUT_FUNC(bgr24, opt) 
  387         sws_init_swscale_mmx(c);
 
  389 #if HAVE_MMXEXT_INLINE 
  391         sws_init_swscale_mmxext(c);
 
  398 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \ 
  399     if (c->srcBpc == 8) { \ 
  400         hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \ 
  401                                      ff_hscale8to19_ ## filtersize ## _ ## opt1; \ 
  402     } else if (c->srcBpc == 9) { \ 
  403         hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \ 
  404                                      ff_hscale9to19_ ## filtersize ## _ ## opt1; \ 
  405     } else if (c->srcBpc == 10) { \ 
  406         hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \ 
  407                                      ff_hscale10to19_ ## filtersize ## _ ## opt1; \ 
  408     } else if (c->srcBpc == 12) { \ 
  409         hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \ 
  410                                      ff_hscale12to19_ ## filtersize ## _ ## opt1; \ 
  411     } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth_minus1<15)) { \ 
  412         hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \ 
  413                                      ff_hscale14to19_ ## filtersize ## _ ## opt1; \ 
  415         av_assert0(c->srcBpc == 16);\ 
  416         hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \ 
  417                                      ff_hscale16to19_ ## filtersize ## _ ## opt1; \ 
  420 #define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ 
  421     switch (filtersize) { \ 
  422     case 4:  ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ 
  423     case 8:  ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ 
  424     default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \ 
  426 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \ 
  428     case 16:                          do_16_case;                          break; \ 
  429     case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \ 
  430     case 9:  if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_  ## opt; break; \ 
  431     default: if (condition_8bit)     break; \ 
  433 #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \ 
  435     case 16: if (!isBE(c->dstFormat))            vscalefn = ff_yuv2plane1_16_ ## opt1; break; \ 
  436     case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \ 
  437     case 9:  if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_  ## opt2;  break; \ 
  438     case 8:                                      vscalefn = ff_yuv2plane1_8_  ## opt1;  break; \ 
  439     default: av_assert0(c->dstBpc>8); \ 
  441 #define case_rgb(x, X, opt) \ 
  442         case AV_PIX_FMT_ ## X: \ 
  443             c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \ 
  444             if (!c->chrSrcHSubSample) \ 
  445                 c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \ 
  487 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ 
  488     switch (filtersize) { \ 
  489     case 4:  ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ 
  490     case 8:  ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ 
  491     default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \ 
  492              else                ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \ 
  499                             HAVE_ALIGNED_STACK || ARCH_X86_64);
 
  548                             HAVE_ALIGNED_STACK || ARCH_X86_64);
 
  555                             HAVE_ALIGNED_STACK || ARCH_X86_64);