00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 #ifdef COMPILE_TEMPLATE_SSE
00022 #define MM "%%xmm"
00023 #define MOV  "movq"
00024 #define MOVQ "movdqa"
00025 #define MOVQU "movdqu"
00026 #define STEP 8
00027 #define LOAD(mem,dst) \
00028             MOV"       "mem", "dst" \n\t"\
00029             "punpcklbw "MM"7, "dst" \n\t"
00030 #define PSRL1(reg) "psrldq $1, "reg" \n\t"
00031 #define PSRL2(reg) "psrldq $2, "reg" \n\t"
00032 #define PSHUF(src,dst) "movdqa "dst", "src" \n\t"\
00033                        "psrldq $2, "src"     \n\t"
00034 #else
00035 #define MM "%%mm"
00036 #define MOV  "movd"
00037 #define MOVQ "movq"
00038 #define MOVQU "movq"
00039 #define STEP 4
00040 #define LOAD(mem,dst) \
00041             MOV"       "mem", "dst" \n\t"\
00042             "punpcklbw "MM"7, "dst" \n\t"
00043 #define PSRL1(reg) "psrlq $8, "reg" \n\t"
00044 #define PSRL2(reg) "psrlq $16, "reg" \n\t"
00045 #define PSHUF(src,dst) "pshufw $9, "dst", "src" \n\t"
00046 #endif
00047 
00048 #ifdef COMPILE_TEMPLATE_SSSE3
00049 #define PABS(tmp,dst) \
00050             "pabsw     "dst", "dst" \n\t"
00051 #else
00052 #define PABS(tmp,dst) \
00053             "pxor     "tmp", "tmp" \n\t"\
00054             "psubw    "dst", "tmp" \n\t"\
00055             "pmaxsw   "tmp", "dst" \n\t"
00056 #endif
00057 
00058 #define CHECK(pj,mj) \
00059             MOVQU" "#pj"(%[cur],%[mrefs]), "MM"2 \n\t" \
00060             MOVQU" "#mj"(%[cur],%[prefs]), "MM"3 \n\t" \
00061             MOVQ"      "MM"2, "MM"4 \n\t"\
00062             MOVQ"      "MM"2, "MM"5 \n\t"\
00063             "pxor      "MM"3, "MM"4 \n\t"\
00064             "pavgb     "MM"3, "MM"5 \n\t"\
00065             "pand     "MANGLE(pb_1)", "MM"4 \n\t"\
00066             "psubusb   "MM"4, "MM"5 \n\t"\
00067             PSRL1(MM"5")                 \
00068             "punpcklbw "MM"7, "MM"5 \n\t" \
00069             MOVQ"      "MM"2, "MM"4 \n\t"\
00070             "psubusb   "MM"3, "MM"2 \n\t"\
00071             "psubusb   "MM"4, "MM"3 \n\t"\
00072             "pmaxub    "MM"3, "MM"2 \n\t"\
00073             MOVQ"      "MM"2, "MM"3 \n\t"\
00074             MOVQ"      "MM"2, "MM"4 \n\t" \
00075             PSRL1(MM"3")                  \
00076             PSRL2(MM"4")                  \
00077             "punpcklbw "MM"7, "MM"2 \n\t"\
00078             "punpcklbw "MM"7, "MM"3 \n\t"\
00079             "punpcklbw "MM"7, "MM"4 \n\t"\
00080             "paddw     "MM"3, "MM"2 \n\t"\
00081             "paddw     "MM"4, "MM"2 \n\t" 
00082 
00083 #define CHECK1 \
00084             MOVQ"      "MM"0, "MM"3 \n\t"\
00085             "pcmpgtw   "MM"2, "MM"3 \n\t" \
00086             "pminsw    "MM"2, "MM"0 \n\t" \
00087             MOVQ"      "MM"3, "MM"6 \n\t"\
00088             "pand      "MM"3, "MM"5 \n\t"\
00089             "pandn     "MM"1, "MM"3 \n\t"\
00090             "por       "MM"5, "MM"3 \n\t"\
00091             MOVQ"      "MM"3, "MM"1 \n\t" 
00092 
00093 #define CHECK2 
00094 \
00095             "paddw    "MANGLE(pw_1)", "MM"6 \n\t"\
00096             "psllw     $14,   "MM"6 \n\t"\
00097             "paddsw    "MM"6, "MM"2 \n\t"\
00098             MOVQ"      "MM"0, "MM"3 \n\t"\
00099             "pcmpgtw   "MM"2, "MM"3 \n\t"\
00100             "pminsw    "MM"2, "MM"0 \n\t"\
00101             "pand      "MM"3, "MM"5 \n\t"\
00102             "pandn     "MM"1, "MM"3 \n\t"\
00103             "por       "MM"5, "MM"3 \n\t"\
00104             MOVQ"      "MM"3, "MM"1 \n\t"
00105 
00106 void RENAME(ff_yadif_filter_line)(uint8_t *dst,
00107                                   uint8_t *prev, uint8_t *cur, uint8_t *next,
00108                                   int w, int prefs, int mrefs, int parity, int mode)
00109 {
00110     uint8_t tmp[5*16];
00111     uint8_t *tmpA= (uint8_t*)(((uint64_t)(tmp+15)) & ~15);
00112     int x;
00113 
00114 #define FILTER\
00115     for(x=0; x<w; x+=STEP){\
00116         __asm__ volatile(\
00117             "pxor      "MM"7, "MM"7 \n\t"\
00118             LOAD("(%[cur],%[mrefs])", MM"0") \
00119             LOAD("(%[cur],%[prefs])", MM"1") \
00120             LOAD("(%["prev2"])", MM"2") \
00121             LOAD("(%["next2"])", MM"3") \
00122             MOVQ"      "MM"3, "MM"4 \n\t"\
00123             "paddw     "MM"2, "MM"3 \n\t"\
00124             "psraw     $1,    "MM"3 \n\t" \
00125             MOVQ"      "MM"0, (%[tmpA]) \n\t" \
00126             MOVQ"      "MM"3, 16(%[tmpA]) \n\t" \
00127             MOVQ"      "MM"1, 32(%[tmpA]) \n\t" \
00128             "psubw     "MM"4, "MM"2 \n\t"\
00129             PABS(      MM"4", MM"2") \
00130             LOAD("(%[prev],%[mrefs])", MM"3") \
00131             LOAD("(%[prev],%[prefs])", MM"4") \
00132             "psubw     "MM"0, "MM"3 \n\t"\
00133             "psubw     "MM"1, "MM"4 \n\t"\
00134             PABS(      MM"5", MM"3")\
00135             PABS(      MM"5", MM"4")\
00136             "paddw     "MM"4, "MM"3 \n\t" \
00137             "psrlw     $1,    "MM"2 \n\t"\
00138             "psrlw     $1,    "MM"3 \n\t"\
00139             "pmaxsw    "MM"3, "MM"2 \n\t"\
00140             LOAD("(%[next],%[mrefs])", MM"3") \
00141             LOAD("(%[next],%[prefs])", MM"4") \
00142             "psubw     "MM"0, "MM"3 \n\t"\
00143             "psubw     "MM"1, "MM"4 \n\t"\
00144             PABS(      MM"5", MM"3")\
00145             PABS(      MM"5", MM"4")\
00146             "paddw     "MM"4, "MM"3 \n\t" \
00147             "psrlw     $1,    "MM"3 \n\t"\
00148             "pmaxsw    "MM"3, "MM"2 \n\t"\
00149             MOVQ"      "MM"2, 48(%[tmpA]) \n\t" \
00150 \
00151             "paddw     "MM"0, "MM"1 \n\t"\
00152             "paddw     "MM"0, "MM"0 \n\t"\
00153             "psubw     "MM"1, "MM"0 \n\t"\
00154             "psrlw     $1,    "MM"1 \n\t" \
00155             PABS(      MM"2", MM"0")      \
00156 \
00157             MOVQU" -1(%[cur],%[mrefs]), "MM"2 \n\t" \
00158             MOVQU" -1(%[cur],%[prefs]), "MM"3 \n\t" \
00159             MOVQ"      "MM"2, "MM"4 \n\t"\
00160             "psubusb   "MM"3, "MM"2 \n\t"\
00161             "psubusb   "MM"4, "MM"3 \n\t"\
00162             "pmaxub    "MM"3, "MM"2 \n\t"\
00163             PSHUF(MM"3", MM"2") \
00164             "punpcklbw "MM"7, "MM"2 \n\t" \
00165             "punpcklbw "MM"7, "MM"3 \n\t" \
00166             "paddw     "MM"2, "MM"0 \n\t"\
00167             "paddw     "MM"3, "MM"0 \n\t"\
00168             "psubw    "MANGLE(pw_1)", "MM"0 \n\t" \
00169 \
00170             CHECK(-2,0)\
00171             CHECK1\
00172             CHECK(-3,1)\
00173             CHECK2\
00174             CHECK(0,-2)\
00175             CHECK1\
00176             CHECK(1,-3)\
00177             CHECK2\
00178 \
00179             \
00180             MOVQ"    48(%[tmpA]), "MM"6 \n\t" \
00181             "cmpl      $2, %[mode] \n\t"\
00182             "jge       1f \n\t"\
00183             LOAD("(%["prev2"],%[mrefs],2)", MM"2") \
00184             LOAD("(%["next2"],%[mrefs],2)", MM"4") \
00185             LOAD("(%["prev2"],%[prefs],2)", MM"3") \
00186             LOAD("(%["next2"],%[prefs],2)", MM"5") \
00187             "paddw     "MM"4, "MM"2 \n\t"\
00188             "paddw     "MM"5, "MM"3 \n\t"\
00189             "psrlw     $1,    "MM"2 \n\t" \
00190             "psrlw     $1,    "MM"3 \n\t" \
00191             MOVQ"    (%[tmpA]), "MM"4 \n\t" \
00192             MOVQ"    16(%[tmpA]), "MM"5 \n\t" \
00193             MOVQ"    32(%[tmpA]), "MM"7 \n\t" \
00194             "psubw     "MM"4, "MM"2 \n\t" \
00195             "psubw     "MM"7, "MM"3 \n\t" \
00196             MOVQ"      "MM"5, "MM"0 \n\t"\
00197             "psubw     "MM"4, "MM"5 \n\t" \
00198             "psubw     "MM"7, "MM"0 \n\t" \
00199             MOVQ"      "MM"2, "MM"4 \n\t"\
00200             "pminsw    "MM"3, "MM"2 \n\t"\
00201             "pmaxsw    "MM"4, "MM"3 \n\t"\
00202             "pmaxsw    "MM"5, "MM"2 \n\t"\
00203             "pminsw    "MM"5, "MM"3 \n\t"\
00204             "pmaxsw    "MM"0, "MM"2 \n\t" \
00205             "pminsw    "MM"0, "MM"3 \n\t" \
00206             "pxor      "MM"4, "MM"4 \n\t"\
00207             "pmaxsw    "MM"3, "MM"6 \n\t"\
00208             "psubw     "MM"2, "MM"4 \n\t" \
00209             "pmaxsw    "MM"4, "MM"6 \n\t" \
00210             "1: \n\t"\
00211 \
00212             MOVQ"    16(%[tmpA]), "MM"2 \n\t" \
00213             MOVQ"      "MM"2, "MM"3 \n\t"\
00214             "psubw     "MM"6, "MM"2 \n\t" \
00215             "paddw     "MM"6, "MM"3 \n\t" \
00216             "pmaxsw    "MM"2, "MM"1 \n\t"\
00217             "pminsw    "MM"3, "MM"1 \n\t" \
00218             "packuswb  "MM"1, "MM"1 \n\t"\
00219 \
00220             :\
00221             :[tmpA] "r"(tmpA),\
00222              [prev] "r"(prev),\
00223              [cur]  "r"(cur),\
00224              [next] "r"(next),\
00225              [prefs]"r"((x86_reg)prefs),\
00226              [mrefs]"r"((x86_reg)mrefs),\
00227              [mode] "g"(mode)\
00228         );\
00229         __asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
00230         dst += STEP;\
00231         prev+= STEP;\
00232         cur += STEP;\
00233         next+= STEP;\
00234     }
00235 
00236     if (parity) {
00237 #define prev2 "prev"
00238 #define next2 "cur"
00239         FILTER
00240 #undef prev2
00241 #undef next2
00242     } else {
00243 #define prev2 "cur"
00244 #define next2 "next"
00245         FILTER
00246 #undef prev2
00247 #undef next2
00248     }
00249 }
00250 #undef STEP
00251 #undef MM
00252 #undef MOV
00253 #undef MOVQ
00254 #undef MOVQU
00255 #undef PSHUF
00256 #undef PSRL1
00257 #undef PSRL2
00258 #undef LOAD
00259 #undef PABS
00260 #undef CHECK
00261 #undef CHECK1
00262 #undef CHECK2
00263 #undef FILTER
00264