33                         ptrdiff_t line_size, 
int h);
 
   35                            ptrdiff_t line_size, 
int h);
 
   37                               int dstStride, 
int src1Stride, 
int h);
 
   39                               int dstStride, 
int src1Stride, 
int h);
 
   41                               int dstStride, 
int src1Stride, 
int h);
 
   43                               int dstStride, 
int src1Stride, 
int h);
 
   45                                int dstStride, 
int src1Stride, 
int h);
 
   47                                int dstStride, 
int src1Stride, 
int h);
 
   48 #define ff_put_pixels8_l2_sse2  ff_put_pixels8_l2_mmxext 
   49 #define ff_avg_pixels8_l2_sse2  ff_avg_pixels8_l2_mmxext 
   50 #define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext 
   51 #define ff_avg_pixels16_l2_sse2 ff_avg_pixels16_l2_mmxext 
   52 #define ff_put_pixels16_mmxext  ff_put_pixels16_mmx 
   53 #define ff_put_pixels8_mmxext   ff_put_pixels8_mmx 
   54 #define ff_put_pixels4_mmxext   ff_put_pixels4_mmx 
   56 #define DEF_QPEL(OPNAME)\ 
   57 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\ 
   58 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\ 
   59 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\ 
   60 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride);\ 
   61 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_mmxext(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride);\ 
   62 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_ssse3(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride);\ 
   63 void ff_ ## OPNAME ## _h264_qpel4_v_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\ 
   64 void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_op_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h);\ 
   65 void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_sse2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h);\ 
   66 void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_v_mmxext(uint8_t *src, int16_t *tmp, int srcStride);\ 
   67 void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_h_mmxext(int16_t *tmp, uint8_t *dst, int dstStride);\ 
   68 void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_mmxext(uint8_t *src, int16_t *tmp, int srcStride, int size);\ 
   69 void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_sse2(uint8_t *src, int16_t *tmp, int srcStride, int size);\ 
   70 void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_op_mmxext(uint8_t *dst, int16_t *tmp, int dstStride, int unused, int h);\ 
   71 void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size);\ 
   72 void ff_ ## OPNAME ## _pixels4_l2_shift5_mmxext(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h);\ 
   73 void ff_ ## OPNAME ## _pixels8_l2_shift5_mmxext(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h); 
   78 #define QPEL_H264(OPNAME, OP, MMX)\ 
   79 static av_always_inline void ff_ ## OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 
   81     src -= 2*srcStride+2;\ 
   83         ff_ ## OPNAME ## h264_qpel4_hv_lowpass_v_mmxext(src, tmp, srcStride);\ 
   88     ff_ ## OPNAME ## h264_qpel4_hv_lowpass_h_mmxext(tmp, dst, dstStride);\ 
   91 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ 
   93     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_op_mmxext(dst, src, dstStride, srcStride, h);\ 
   96     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_op_mmxext(dst, src, dstStride, srcStride, h);\ 
   98 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){\ 
  100     src -= 2*srcStride+2;\ 
  102         ff_ ## OPNAME ## h264_qpel8or16_hv1_lowpass_op_mmxext(src, tmp, srcStride, size);\ 
  107 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\ 
  110     ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_op_mmxext(dst, tmp, dstStride, 0, size);\ 
  116 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 
  117     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 8);\ 
  119 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 
  120     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 16);\ 
  121     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 
  124 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 
  125     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\ 
  126     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 
  129     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\ 
  130     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 
  133 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ 
  134     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst  , src  , src2  , dstStride, src2Stride);\ 
  135     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ 
  138     src2 += 8*src2Stride;\ 
  139     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst  , src  , src2  , dstStride, src2Stride);\ 
  140     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ 
  143 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ 
  144     ff_put_h264_qpel8or16_hv1_lowpass_ ## MMX(tmp, src, tmpStride, srcStride, size);\ 
  145     ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\ 
  147 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 
  148     ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst  , tmp  , src  , dstStride, tmpStride, srcStride, 8);\ 
  151 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 
  152     ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst  , tmp  , src  , dstStride, tmpStride, srcStride, 16);\ 
  155 static av_always_inline void ff_ ## OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ 
  157     ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst  , src16  , src8  , dstStride, src8Stride, h);\ 
  158     ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst+8, src16+8, src8+8, dstStride, src8Stride, h);\ 
  163 #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ 
  165 void ff_avg_h264_qpel16_h_lowpass_l2_ssse3(
uint8_t *dst, 
uint8_t *
src, 
uint8_t *src2, 
int dstStride, 
int src2Stride);
 
  166 void ff_put_h264_qpel16_h_lowpass_l2_ssse3(
uint8_t *dst, 
uint8_t *
src, 
uint8_t *src2, 
int dstStride, 
int src2Stride);
 
  169 #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ 
  170 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ 
  171     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst  , src  , src2  , dstStride, src2Stride);\ 
  172     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ 
  175     src2 += 8*src2Stride;\ 
  176     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst  , src  , src2  , dstStride, src2Stride);\ 
  177     ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ 
  179 #endif // ARCH_X86_64 
  181 #define QPEL_H264_H_XMM(OPNAME, OP, MMX)\ 
  182 QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ 
  183 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 
  184     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\ 
  185     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 
  188     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\ 
  189     ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 
  192 #define QPEL_H264_V_XMM(OPNAME, OP, MMX)\ 
  193 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 
  194     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 8);\ 
  196 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 
  197     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 16);\ 
  198     ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 
  201 static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp,
 
  208     src -= 2*srcStride+2;
 
  210         ff_put_h264_qpel8or16_hv1_lowpass_op_sse2(src, tmp, srcStride, size);
 
  216 #define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\ 
  217 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ 
  218     put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\ 
  219     ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\ 
  221 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 
  222     ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 8);\ 
  224 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 
  225     ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\ 
  228 #define ff_put_h264_qpel8_h_lowpass_l2_sse2  ff_put_h264_qpel8_h_lowpass_l2_mmxext 
  229 #define ff_avg_h264_qpel8_h_lowpass_l2_sse2  ff_avg_h264_qpel8_h_lowpass_l2_mmxext 
  230 #define ff_put_h264_qpel16_h_lowpass_l2_sse2 ff_put_h264_qpel16_h_lowpass_l2_mmxext 
  231 #define ff_avg_h264_qpel16_h_lowpass_l2_sse2 ff_avg_h264_qpel16_h_lowpass_l2_mmxext 
  233 #define ff_put_h264_qpel8_v_lowpass_ssse3  ff_put_h264_qpel8_v_lowpass_sse2 
  234 #define ff_avg_h264_qpel8_v_lowpass_ssse3  ff_avg_h264_qpel8_v_lowpass_sse2 
  235 #define ff_put_h264_qpel16_v_lowpass_ssse3 ff_put_h264_qpel16_v_lowpass_sse2 
  236 #define ff_avg_h264_qpel16_v_lowpass_ssse3 ff_avg_h264_qpel16_v_lowpass_sse2 
  238 #define ff_put_h264_qpel8or16_hv2_lowpass_sse2 ff_put_h264_qpel8or16_hv2_lowpass_mmxext 
  239 #define ff_avg_h264_qpel8or16_hv2_lowpass_sse2 ff_avg_h264_qpel8or16_hv2_lowpass_mmxext 
  241 #define H264_MC(OPNAME, SIZE, MMX, ALIGN) \ 
  242 H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\ 
  243 H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\ 
  244 H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\ 
  245 H264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\ 
  247 static void put_h264_qpel16_mc00_sse2 (
uint8_t *dst, 
uint8_t *src,
 
  252 static void avg_h264_qpel16_mc00_sse2 (
uint8_t *dst, 
uint8_t *src,
 
  257 #define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmxext 
  258 #define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmxext 
  260 #define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \ 
  261 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 
  263     ff_ ## OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\ 
  266 #define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \ 
  267 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 
  269     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\ 
  272 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 
  274     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\ 
  277 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 
  279     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\ 
  282 #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \ 
  283 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 
  285     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ 
  286     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ 
  287     ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\ 
  290 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 
  292     ff_ ## OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\ 
  295 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 
  297     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ 
  298     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ 
  299     ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\ 
  302 #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \ 
  303 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 
  305     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ 
  306     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ 
  307     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ 
  310 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 
  312     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ 
  313     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ 
  314     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ 
  317 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 
  319     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ 
  320     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ 
  321     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ 
  324 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 
  326     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ 
  327     ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ 
  328     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ 
  331 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 
  333     DECLARE_ALIGNED(ALIGN, uint16_t, temp)[SIZE*(SIZE<8?12:24)];\ 
  334     ff_ ## OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\ 
  337 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 
  339     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ 
  340     uint8_t * const halfHV= temp;\ 
  341     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ 
  342     av_assert2(((int)temp & 7) == 0);\ 
  343     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ 
  344     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\ 
  347 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 
  349     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ 
  350     uint8_t * const halfHV= temp;\ 
  351     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ 
  352     av_assert2(((int)temp & 7) == 0);\ 
  353     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ 
  354     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\ 
  357 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 
  359     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ 
  360     uint8_t * const halfHV= temp;\ 
  361     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ 
  362     av_assert2(((int)temp & 7) == 0);\ 
  363     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ 
  364     ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+2, halfHV, stride, SIZE, SIZE);\ 
  367 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ 
  369     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ 
  370     uint8_t * const halfHV= temp;\ 
  371     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ 
  372     av_assert2(((int)temp & 7) == 0);\ 
  373     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ 
  374     ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+3, halfHV, stride, SIZE, SIZE);\ 
  377 #define H264_MC_4816(MMX)\ 
  378 H264_MC(put_, 4, MMX, 8)\ 
  379 H264_MC(put_, 8, MMX, 8)\ 
  380 H264_MC(put_, 16,MMX, 8)\ 
  381 H264_MC(avg_, 4, MMX, 8)\ 
  382 H264_MC(avg_, 8, MMX, 8)\ 
  383 H264_MC(avg_, 16,MMX, 8)\ 
  385 #define H264_MC_816(QPEL, XMM)\ 
  386 QPEL(put_, 8, XMM, 16)\ 
  387 QPEL(put_, 16,XMM, 16)\ 
  388 QPEL(avg_, 8, XMM, 16)\ 
  389 QPEL(avg_, 16,XMM, 16)\ 
  391 QPEL_H264(put_,        PUT_OP, mmxext)
 
  392 QPEL_H264(avg_, AVG_MMXEXT_OP, mmxext)
 
  393 QPEL_H264_V_XMM(put_,       PUT_OP, sse2)
 
  394 QPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2)
 
  395 QPEL_H264_HV_XMM(put_,       PUT_OP, sse2)
 
  396 QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, sse2)
 
  397 QPEL_H264_H_XMM(put_,       PUT_OP, ssse3)
 
  398 QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3)
 
  399 QPEL_H264_HV_XMM(put_,       PUT_OP, ssse3)
 
  400 QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3)
 
  403 H264_MC_816(H264_MC_V, sse2)
 
  404 H264_MC_816(H264_MC_HV, sse2)
 
  405 H264_MC_816(H264_MC_H, ssse3)
 
  406 H264_MC_816(H264_MC_HV, ssse3)
 
  410 #define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \ 
  411 void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \ 
  412     (uint8_t *dst, uint8_t *src, ptrdiff_t stride); 
  414 #define LUMA_MC_ALL(DEPTH, TYPE, OPT) \ 
  415     LUMA_MC_OP(put,  4, DEPTH, TYPE, OPT) \ 
  416     LUMA_MC_OP(avg,  4, DEPTH, TYPE, OPT) \ 
  417     LUMA_MC_OP(put,  8, DEPTH, TYPE, OPT) \ 
  418     LUMA_MC_OP(avg,  8, DEPTH, TYPE, OPT) \ 
  419     LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \ 
  420     LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT) 
  422 #define LUMA_MC_816(DEPTH, TYPE, OPT) \ 
  423     LUMA_MC_OP(put,  8, DEPTH, TYPE, OPT) \ 
  424     LUMA_MC_OP(avg,  8, DEPTH, TYPE, OPT) \ 
  425     LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \ 
  426     LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT) 
  428 LUMA_MC_ALL(10, mc00, mmxext)
 
  429 LUMA_MC_ALL(10, mc10, mmxext)
 
  430 LUMA_MC_ALL(10, mc20, mmxext)
 
  431 LUMA_MC_ALL(10, mc30, mmxext)
 
  432 LUMA_MC_ALL(10, mc01, mmxext)
 
  433 LUMA_MC_ALL(10, mc11, mmxext)
 
  434 LUMA_MC_ALL(10, mc21, mmxext)
 
  435 LUMA_MC_ALL(10, mc31, mmxext)
 
  436 LUMA_MC_ALL(10, mc02, mmxext)
 
  437 LUMA_MC_ALL(10, mc12, mmxext)
 
  438 LUMA_MC_ALL(10, mc22, mmxext)
 
  439 LUMA_MC_ALL(10, mc32, mmxext)
 
  440 LUMA_MC_ALL(10, mc03, mmxext)
 
  441 LUMA_MC_ALL(10, mc13, mmxext)
 
  442 LUMA_MC_ALL(10, mc23, mmxext)
 
  443 LUMA_MC_ALL(10, mc33, mmxext)
 
  445 LUMA_MC_816(10, mc00, sse2)
 
  446 LUMA_MC_816(10, mc10, sse2)
 
  447 LUMA_MC_816(10, mc10, sse2_cache64)
 
  448 LUMA_MC_816(10, mc10, ssse3_cache64)
 
  449 LUMA_MC_816(10, mc20, sse2)
 
  450 LUMA_MC_816(10, mc20, sse2_cache64)
 
  451 LUMA_MC_816(10, mc20, ssse3_cache64)
 
  452 LUMA_MC_816(10, mc30, sse2)
 
  453 LUMA_MC_816(10, mc30, sse2_cache64)
 
  454 LUMA_MC_816(10, mc30, ssse3_cache64)
 
  455 LUMA_MC_816(10, mc01, sse2)
 
  456 LUMA_MC_816(10, mc11, sse2)
 
  457 LUMA_MC_816(10, mc21, sse2)
 
  458 LUMA_MC_816(10, mc31, sse2)
 
  459 LUMA_MC_816(10, mc02, sse2)
 
  460 LUMA_MC_816(10, mc12, sse2)
 
  461 LUMA_MC_816(10, mc22, sse2)
 
  462 LUMA_MC_816(10, mc32, sse2)
 
  463 LUMA_MC_816(10, mc03, sse2)
 
  464 LUMA_MC_816(10, mc13, sse2)
 
  465 LUMA_MC_816(10, mc23, sse2)
 
  466 LUMA_MC_816(10, mc33, sse2)
 
  468 #define QPEL16_OPMC(OP, MC, MMX)\ 
  469 void ff_ ## OP ## _h264_qpel16_ ## MC ## _10_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride){\ 
  470     ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst   , src   , stride);\ 
  471     ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\ 
  474     ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst   , src   , stride);\ 
  475     ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\ 
  478 #define QPEL16_OP(MC, MMX)\ 
  479 QPEL16_OPMC(put, MC, MMX)\ 
  480 QPEL16_OPMC(avg, MC, MMX) 
  483 QPEL16_OP(mc00, MMX)\ 
  484 QPEL16_OP(mc01, MMX)\ 
  485 QPEL16_OP(mc02, MMX)\ 
  486 QPEL16_OP(mc03, MMX)\ 
  487 QPEL16_OP(mc10, MMX)\ 
  488 QPEL16_OP(mc11, MMX)\ 
  489 QPEL16_OP(mc12, MMX)\ 
  490 QPEL16_OP(mc13, MMX)\ 
  491 QPEL16_OP(mc20, MMX)\ 
  492 QPEL16_OP(mc21, MMX)\ 
  493 QPEL16_OP(mc22, MMX)\ 
  494 QPEL16_OP(mc23, MMX)\ 
  495 QPEL16_OP(mc30, MMX)\ 
  496 QPEL16_OP(mc31, MMX)\ 
  497 QPEL16_OP(mc32, MMX)\ 
  500 #if ARCH_X86_32 // ARCH_X86_64 implies SSE2+ 
  506 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)                          \ 
  508     c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \ 
  509     c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \ 
  510     c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \ 
  511     c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \ 
  512     c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \ 
  513     c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \ 
  514     c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \ 
  515     c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \ 
  516     c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \ 
  517     c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \ 
  518     c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \ 
  519     c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \ 
  520     c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \ 
  521     c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \ 
  522     c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \ 
  523     c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \ 
  526 #define H264_QPEL_FUNCS(x, y, CPU)                                                            \ 
  528         c->put_h264_qpel_pixels_tab[0][x + y * 4] = put_h264_qpel16_mc ## x ## y ## _ ## CPU; \ 
  529         c->put_h264_qpel_pixels_tab[1][x + y * 4] = put_h264_qpel8_mc  ## x ## y ## _ ## CPU; \ 
  530         c->avg_h264_qpel_pixels_tab[0][x + y * 4] = avg_h264_qpel16_mc ## x ## y ## _ ## CPU; \ 
  531         c->avg_h264_qpel_pixels_tab[1][x + y * 4] = avg_h264_qpel8_mc  ## x ## y ## _ ## CPU; \ 
  534 #define H264_QPEL_FUNCS_10(x, y, CPU)                                                               \ 
  536         c->put_h264_qpel_pixels_tab[0][x + y * 4] = ff_put_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \ 
  537         c->put_h264_qpel_pixels_tab[1][x + y * 4] = ff_put_h264_qpel8_mc  ## x ## y ## _10_ ## CPU; \ 
  538         c->avg_h264_qpel_pixels_tab[0][x + y * 4] = ff_avg_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \ 
  539         c->avg_h264_qpel_pixels_tab[1][x + y * 4] = ff_avg_h264_qpel8_mc  ## x ## y ## _10_ ## CPU; \ 
  545     int high_bit_depth = bit_depth > 8;
 
  549         if (!high_bit_depth) {
 
  556         } 
else if (bit_depth == 10) {
 
  574         if (!high_bit_depth) {
 
  589         if (bit_depth == 10) {
 
  601         if (!high_bit_depth) {
 
  616         if (bit_depth == 10) {
 
  628         if (bit_depth == 10) {