28 static void gmc_mmx(uint8_t *dst, 
const uint8_t *
src,
 
   29                     int stride, 
int h, 
int ox, 
int oy,
 
   30                     int dxx, 
int dxy, 
int dyx, 
int dyy,
 
   34     const int ix   = ox  >> (16 + 
shift);
 
   35     const int iy   = oy  >> (16 + 
shift);
 
   36     const int oxs  = ox  >> 4;
 
   37     const int oys  = oy  >> 4;
 
   38     const int dxxs = dxx >> 4;
 
   39     const int dxys = dxy >> 4;
 
   40     const int dyxs = dyx >> 4;
 
   41     const int dyys = dyy >> 4;
 
   42     const uint16_t r4[4]   = { 
r, 
r, 
r, 
r };
 
   43     const uint16_t dxy4[4] = { dxys, dxys, dxys, dxys };
 
   44     const uint16_t dyy4[4] = { dyys, dyys, dyys, dyys };
 
   46 #define MAX_STRIDE 4096U 
   51     const int dxw = (dxx - (1 << (16 + 
shift))) * (
w - 1);
 
   52     const int dyh = (dyy - (1 << (16 + 
shift))) * (
h - 1);
 
   53     const int dxh = dxy * (
h - 1);
 
   54     const int dyw = dyx * (
w - 1);
 
   55     int need_emu  =  (unsigned) ix >= 
width  - 
w || 
width < 
w ||
 
   60         ((ox ^ (ox + dxw)) | (ox ^ (ox + dxh)) | (ox ^ (ox + dxw + dxh)) |
 
   61          (oy ^ (oy + dyw)) | (oy ^ (oy + dyh)) | (oy ^ (oy + dyw + dyh))) >> (16 + 
shift) ||
 
   63         (dxx | dxy | dyx | dyy) & 15 ||
 
   66         ff_gmc_c(dst, 
src, 
stride, 
h, ox, oy, dxx, dxy, dyx, dyy,
 
   79         "pxor      %%mm7, %%mm7         \n\t" 
   80         "punpcklwd %%mm6, %%mm6         \n\t" 
   81         "punpcklwd %%mm6, %%mm6         \n\t" 
   84     for (x = 0; x < 
w; x += 4) {
 
   85         uint16_t dx4[4] = { oxs - dxys + dxxs * (x + 0),
 
   86                             oxs - dxys + dxxs * (x + 1),
 
   87                             oxs - dxys + dxxs * (x + 2),
 
   88                             oxs - dxys + dxxs * (x + 3) };
 
   89         uint16_t dy4[4] = { oys - dyys + dyxs * (x + 0),
 
   90                             oys - dyys + dyxs * (x + 1),
 
   91                             oys - dyys + dyxs * (x + 2),
 
   92                             oys - dyys + dyxs * (x + 3) };
 
   94         for (y = 0; y < 
h; y++) {
 
   98                 "paddw     %2, %%mm4    \n\t" 
   99                 "paddw     %3, %%mm5    \n\t" 
  100                 "movq   %%mm4, %0       \n\t" 
  101                 "movq   %%mm5, %1       \n\t" 
  102                 "psrlw    $12, %%mm4    \n\t" 
  103                 "psrlw    $12, %%mm5    \n\t" 
  104                 : 
"+m" (*dx4), 
"+m" (*dy4)
 
  105                 : 
"m" (*dxy4), 
"m" (*dyy4));
 
  108                 "movq      %%mm6, %%mm2 \n\t" 
  109                 "movq      %%mm6, %%mm1 \n\t" 
  110                 "psubw     %%mm4, %%mm2 \n\t" 
  111                 "psubw     %%mm5, %%mm1 \n\t" 
  112                 "movq      %%mm2, %%mm0 \n\t" 
  113                 "movq      %%mm4, %%mm3 \n\t" 
  114                 "pmullw    %%mm1, %%mm0 \n\t"  
  115                 "pmullw    %%mm5, %%mm3 \n\t"  
  116                 "pmullw    %%mm5, %%mm2 \n\t"  
  117                 "pmullw    %%mm4, %%mm1 \n\t"  
  119                 "movd         %4, %%mm5 \n\t" 
  120                 "movd         %3, %%mm4 \n\t" 
  121                 "punpcklbw %%mm7, %%mm5 \n\t" 
  122                 "punpcklbw %%mm7, %%mm4 \n\t" 
  123                 "pmullw    %%mm5, %%mm3 \n\t"  
  124                 "pmullw    %%mm4, %%mm2 \n\t"  
  126                 "movd         %2, %%mm5 \n\t" 
  127                 "movd         %1, %%mm4 \n\t" 
  128                 "punpcklbw %%mm7, %%mm5 \n\t" 
  129                 "punpcklbw %%mm7, %%mm4 \n\t" 
  130                 "pmullw    %%mm5, %%mm1 \n\t"  
  131                 "pmullw    %%mm4, %%mm0 \n\t"  
  132                 "paddw        %5, %%mm1 \n\t" 
  133                 "paddw     %%mm3, %%mm2 \n\t" 
  134                 "paddw     %%mm1, %%mm0 \n\t" 
  135                 "paddw     %%mm2, %%mm0 \n\t" 
  137                 "psrlw        %6, %%mm0 \n\t" 
  138                 "packuswb  %%mm0, %%mm0 \n\t" 
  139                 "movd      %%mm0, %0    \n\t" 
  141                 : 
"=m" (dst[x + y * 
stride])
 
  142                 : 
"m" (
src[0]), 
"m" (
src[1]),