Go to the documentation of this file.
   33 #define VC1_INV_TRANCS_8_TYPE1(o1, o2, r1, r2, r3, r4, c0)                  \ 
   34         "li         %[tmp0],    "#r1"                                 \n\t" \ 
   35         "mtc1       %[tmp0],    %[ftmp13]                             \n\t" \ 
   36         "punpcklwd  %[ftmp13],  %[ftmp13],  %[ftmp13]                 \n\t" \ 
   37         "li         %[tmp0],    "#r2"                                 \n\t" \ 
   38         "mtc1       %[tmp0],    %[ftmp14]                             \n\t" \ 
   39         "punpcklwd  %[ftmp14],  %[ftmp14],  %[ftmp14]                 \n\t" \ 
   40         "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp13]                 \n\t" \ 
   41         "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp14]                 \n\t" \ 
   42         "paddw      %[ftmp1],   %[ftmp1],   %[ftmp2]                  \n\t" \ 
   43         "pmaddhw    %[ftmp2],   %[ftmp6],   %[ftmp13]                 \n\t" \ 
   44         "pmaddhw    %[ftmp3],   %[ftmp8],   %[ftmp14]                 \n\t" \ 
   45         "paddw      %[ftmp2],   %[ftmp2],   %[ftmp3]                  \n\t" \ 
   47         "li         %[tmp0],    "#r3"                                 \n\t" \ 
   48         "mtc1       %[tmp0],    %[ftmp13]                             \n\t" \ 
   49         "punpcklwd  %[ftmp13],  %[ftmp13],  %[ftmp13]                 \n\t" \ 
   50         "li         %[tmp0],    "#r4"                                 \n\t" \ 
   51         "mtc1       %[tmp0],    %[ftmp14]                             \n\t" \ 
   52         "punpcklwd  %[ftmp14],  %[ftmp14],  %[ftmp14]                 \n\t" \ 
   53         "pmaddhw    %[ftmp3],   %[ftmp9],   %[ftmp13]                 \n\t" \ 
   54         "pmaddhw    %[ftmp4],   %[ftmp11],  %[ftmp14]                 \n\t" \ 
   55         "paddw      %[ftmp3],   %[ftmp3],   %[ftmp4]                  \n\t" \ 
   56         "pmaddhw    %[ftmp4],   %[ftmp10],  %[ftmp13]                 \n\t" \ 
   57         "pmaddhw    %[ftmp13],  %[ftmp12],  %[ftmp14]                 \n\t" \ 
   58         "paddw      %[ftmp4],   %[ftmp4],   %[ftmp13]                 \n\t" \ 
   60         "paddw      %[ftmp1],   %[ftmp1],   "#c0"                     \n\t" \ 
   61         "paddw      %[ftmp2],   %[ftmp2],   "#c0"                     \n\t" \ 
   62         "paddw      %[ftmp13],  %[ftmp1],   %[ftmp3]                  \n\t" \ 
   63         "psubw      %[ftmp14],  %[ftmp1],   %[ftmp3]                  \n\t" \ 
   64         "paddw      %[ftmp1],   %[ftmp2],   %[ftmp4]                  \n\t" \ 
   65         "psubw      %[ftmp3],   %[ftmp2],   %[ftmp4]                  \n\t" \ 
   66         "psraw      %[ftmp13],  %[ftmp13],  %[ftmp0]                  \n\t" \ 
   67         "psraw      %[ftmp1],   %[ftmp1],   %[ftmp0]                  \n\t" \ 
   68         "psraw      %[ftmp14],  %[ftmp14],  %[ftmp0]                  \n\t" \ 
   69         "psraw      %[ftmp3],   %[ftmp3],   %[ftmp0]                  \n\t" \ 
   70         "punpcklhw  %[ftmp2],   %[ftmp13],  %[ftmp1]                  \n\t" \ 
   71         "punpckhhw  %[ftmp4],   %[ftmp13],  %[ftmp1]                  \n\t" \ 
   72         "punpcklhw  "#o1",      %[ftmp2],   %[ftmp4]                  \n\t" \ 
   73         "punpcklhw  %[ftmp2],   %[ftmp14],  %[ftmp3]                  \n\t" \ 
   74         "punpckhhw  %[ftmp4],   %[ftmp14],  %[ftmp3]                  \n\t" \ 
   75         "punpcklhw  "#o2",      %[ftmp2],   %[ftmp4]                  \n\t" 
   77 #define VC1_INV_TRANCS_8_TYPE2(o1, o2, r1, r2, r3, r4, c0, c1)              \ 
   78         "li         %[tmp0],    "#r1"                                 \n\t" \ 
   79         "mtc1       %[tmp0],    %[ftmp13]                             \n\t" \ 
   80         "punpcklwd  %[ftmp13],  %[ftmp13],  %[ftmp13]                 \n\t" \ 
   81         "li         %[tmp0],    "#r2"                                 \n\t" \ 
   82         "mtc1       %[tmp0],    %[ftmp14]                             \n\t" \ 
   83         "punpcklwd  %[ftmp14],  %[ftmp14],  %[ftmp14]                 \n\t" \ 
   84         "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp13]                 \n\t" \ 
   85         "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp14]                 \n\t" \ 
   86         "paddw      %[ftmp1],   %[ftmp1],   %[ftmp2]                  \n\t" \ 
   87         "pmaddhw    %[ftmp2],   %[ftmp6],   %[ftmp13]                 \n\t" \ 
   88         "pmaddhw    %[ftmp3],   %[ftmp8],   %[ftmp14]                 \n\t" \ 
   89         "paddw      %[ftmp2],   %[ftmp2],   %[ftmp3]                  \n\t" \ 
   91         "li         %[tmp0],    "#r3"                                 \n\t" \ 
   92         "mtc1       %[tmp0],    %[ftmp13]                             \n\t" \ 
   93         "punpcklwd  %[ftmp13],  %[ftmp13],  %[ftmp13]                 \n\t" \ 
   94         "li         %[tmp0],    "#r4"                                 \n\t" \ 
   95         "mtc1       %[tmp0],    %[ftmp14]                             \n\t" \ 
   96         "punpcklwd  %[ftmp14],  %[ftmp14],  %[ftmp14]                 \n\t" \ 
   97         "pmaddhw    %[ftmp3],   %[ftmp9],   %[ftmp13]                 \n\t" \ 
   98         "pmaddhw    %[ftmp4],   %[ftmp11],  %[ftmp14]                 \n\t" \ 
   99         "paddw      %[ftmp3],   %[ftmp3],   %[ftmp4]                  \n\t" \ 
  100         "pmaddhw    %[ftmp4],   %[ftmp10],  %[ftmp13]                 \n\t" \ 
  101         "pmaddhw    %[ftmp13],  %[ftmp12],  %[ftmp14]                 \n\t" \ 
  102         "paddw      %[ftmp4],   %[ftmp4],   %[ftmp13]                 \n\t" \ 
  104         "paddw      %[ftmp13],  %[ftmp1],   %[ftmp3]                  \n\t" \ 
  105         "psubw      %[ftmp14],  %[ftmp1],   %[ftmp3]                  \n\t" \ 
  106         "paddw      %[ftmp14],  %[ftmp14],  "#c1"                     \n\t" \ 
  107         "paddw      %[ftmp1],   %[ftmp2],   %[ftmp4]                  \n\t" \ 
  108         "psubw      %[ftmp3],   %[ftmp2],   %[ftmp4]                  \n\t" \ 
  109         "paddw      %[ftmp3],   %[ftmp3],   "#c1"                     \n\t" \ 
  110         "paddw      %[ftmp13],  %[ftmp13],  "#c0"                     \n\t" \ 
  111         "paddw      %[ftmp14],  %[ftmp14],  "#c0"                     \n\t" \ 
  112         "paddw      %[ftmp1],   %[ftmp1],   "#c0"                     \n\t" \ 
  113         "paddw      %[ftmp3],   %[ftmp3],   "#c0"                     \n\t" \ 
  114         "psraw      %[ftmp13],  %[ftmp13],  %[ftmp0]                  \n\t" \ 
  115         "psraw      %[ftmp1],   %[ftmp1],   %[ftmp0]                  \n\t" \ 
  116         "psraw      %[ftmp14],  %[ftmp14],  %[ftmp0]                  \n\t" \ 
  117         "psraw      %[ftmp3],   %[ftmp3],   %[ftmp0]                  \n\t" \ 
  118         "punpcklhw  %[ftmp2],   %[ftmp13],  %[ftmp1]                  \n\t" \ 
  119         "punpckhhw  %[ftmp4],   %[ftmp13],  %[ftmp1]                  \n\t" \ 
  120         "punpcklhw  "#o1",      %[ftmp2],   %[ftmp4]                  \n\t" \ 
  121         "punpcklhw  %[ftmp2],   %[ftmp14],  %[ftmp3]                  \n\t" \ 
  122         "punpckhhw  %[ftmp4],   %[ftmp14],  %[ftmp3]                  \n\t" \ 
  123         "punpcklhw  "#o2",      %[ftmp2],   %[ftmp4]                  \n\t" 
  134     dc = (3 * 
dc +  1) >> 1;
 
  135     dc = (3 * 
dc + 16) >> 5;
 
  139         "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t" 
  140         "pshufh     %[dc],      %[dc],          %[ftmp0]                \n\t" 
  141         "li         %[count],   0x02                                    \n\t" 
  144         MMI_LDC1(%[ftmp1], %[dest], 0x00)
 
  145         PTR_ADDU   "%[addr0],   %[dest],        %[linesize]             \n\t" 
  146         MMI_LDC1(%[ftmp2], %[addr0], 0x00)
 
  147         PTR_ADDU   "%[addr0],   %[addr0],       %[linesize]             \n\t" 
  148         MMI_LDC1(%[ftmp3], %[addr0], 0x00)
 
  149         PTR_ADDU   "%[addr0],   %[addr0],       %[linesize]             \n\t" 
  150         MMI_LDC1(%[ftmp4], %[addr0], 0x00)
 
  152         "punpckhbh  %[ftmp5],   %[ftmp1],       %[ftmp0]                \n\t" 
  153         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t" 
  154         "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]                \n\t" 
  155         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t" 
  156         "punpckhbh  %[ftmp7],   %[ftmp3],       %[ftmp0]                \n\t" 
  157         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t" 
  158         "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]                \n\t" 
  159         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t" 
  161         "paddsh     %[ftmp1],   %[ftmp1],       %[dc]                   \n\t" 
  162         "paddsh     %[ftmp2],   %[ftmp2],       %[dc]                   \n\t" 
  163         "paddsh     %[ftmp3],   %[ftmp3],       %[dc]                   \n\t" 
  164         "paddsh     %[ftmp4],   %[ftmp4],       %[dc]                   \n\t" 
  165         "paddsh     %[ftmp5],   %[ftmp5],       %[dc]                   \n\t" 
  166         "paddsh     %[ftmp6],   %[ftmp6],       %[dc]                   \n\t" 
  167         "paddsh     %[ftmp7],   %[ftmp7],       %[dc]                   \n\t" 
  168         "paddsh     %[ftmp8],   %[ftmp8],       %[dc]                   \n\t" 
  170         "packushb   %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t" 
  171         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t" 
  172         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t" 
  173         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t" 
  175         MMI_SDC1(%[ftmp1], %[dest], 0x00)
 
  176         PTR_ADDU   "%[addr0],   %[dest],        %[linesize]             \n\t" 
  177         MMI_SDC1(%[ftmp2], %[addr0], 0x00)
 
  178         PTR_ADDU   "%[addr0],   %[addr0],       %[linesize]             \n\t" 
  179         MMI_SDC1(%[ftmp3], %[addr0], 0x00)
 
  180         PTR_ADDU   "%[addr0],   %[addr0],       %[linesize]             \n\t" 
  181         MMI_SDC1(%[ftmp4], %[addr0], 0x00)
 
  183         "addiu      %[count],   %[count],       -0x01                   \n\t" 
  184         PTR_ADDU   "%[dest],    %[addr0],       %[linesize]             \n\t" 
  185         "bnez       %[count],   1b                                      \n\t" 
  186         : [ftmp0]
"=&f"(ftmp[0]),        [ftmp1]
"=&f"(ftmp[1]),
 
  187           [ftmp2]
"=&f"(ftmp[2]),        [ftmp3]
"=&f"(ftmp[3]),
 
  188           [ftmp4]
"=&f"(ftmp[4]),        [ftmp5]
"=&f"(ftmp[5]),
 
  189           [ftmp6]
"=&f"(ftmp[6]),        [ftmp7]
"=&f"(ftmp[7]),
 
  190           [ftmp8]
"=&f"(ftmp[8]),
 
  191           [addr0]
"=&r"(addr[0]),
 
  192           [count]
"=&r"(count),          [dest]
"+&r"(dest)
 
  193         : [linesize]
"r"((
mips_reg)linesize),
 
  199 #if _MIPS_SIM != _ABIO32 
  208         "li         %[tmp0],    0x03                                    \n\t" 
  209         "mtc1       %[tmp0],    %[ftmp0]                                \n\t" 
  212         MMI_LDC1(%[ftmp1], %[
block], 0x00)
 
  213         MMI_LDC1(%[ftmp11], %[
block], 0x10)
 
  214         MMI_LDC1(%[ftmp2], %[
block], 0x20)
 
  215         MMI_LDC1(%[ftmp12], %[
block], 0x30)
 
  216         MMI_LDC1(%[ftmp3], %[
block], 0x40)
 
  217         MMI_LDC1(%[ftmp13], %[
block], 0x50)
 
  218         MMI_LDC1(%[ftmp4], %[
block], 0x60)
 
  219         MMI_LDC1(%[ftmp14], %[
block], 0x70)
 
  220         "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t" 
  221         "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t" 
  222         "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t" 
  223         "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t" 
  225         "punpcklhw  %[ftmp9],  %[ftmp11],  %[ftmp12]                    \n\t" 
  226         "punpckhhw  %[ftmp10], %[ftmp11],  %[ftmp12]                    \n\t" 
  227         "punpcklhw  %[ftmp11], %[ftmp13],  %[ftmp14]                    \n\t" 
  228         "punpckhhw  %[ftmp12], %[ftmp13],  %[ftmp14]                    \n\t" 
  232                                0x000f0010, 0x00040009, %[
ff_pw_4])
 
  236                                0xfffc000f, 0xfff7fff0, %[
ff_pw_4])
 
  240                                0xfff00009, 0x000f0004, %[
ff_pw_4])
 
  244                                0xfff70004, 0xfff0000f, %[
ff_pw_4])
 
  246         TRANSPOSE_4H(%[ftmp15], %[ftmp16], %[ftmp17], %[ftmp18],
 
  247                      %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
 
  249         TRANSPOSE_4H(%[ftmp19], %[ftmp20], %[ftmp21], %[ftmp22],
 
  250                      %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
 
  252         MMI_SDC1(%[ftmp15], %[
temp], 0x00)
 
  253         MMI_SDC1(%[ftmp19], %[
temp], 0x08)
 
  254         MMI_SDC1(%[ftmp16], %[
temp], 0x10)
 
  255         MMI_SDC1(%[ftmp20], %[
temp], 0x18)
 
  256         MMI_SDC1(%[ftmp17], %[
temp], 0x20)
 
  257         MMI_SDC1(%[ftmp21], %[
temp], 0x28)
 
  258         MMI_SDC1(%[ftmp18], %[
temp], 0x30)
 
  259         MMI_SDC1(%[ftmp22], %[
temp], 0x38)
 
  262         MMI_LDC1(%[ftmp1], %[
block], 0x08)
 
  263         MMI_LDC1(%[ftmp11], %[
block], 0x18)
 
  264         MMI_LDC1(%[ftmp2], %[
block], 0x28)
 
  265         MMI_LDC1(%[ftmp12], %[
block], 0x38)
 
  266         MMI_LDC1(%[ftmp3], %[
block], 0x48)
 
  267         MMI_LDC1(%[ftmp13], %[
block], 0x58)
 
  268         MMI_LDC1(%[ftmp4], %[
block], 0x68)
 
  269         MMI_LDC1(%[ftmp14], %[
block], 0x78)
 
  270         "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t" 
  271         "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t" 
  272         "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t" 
  273         "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t" 
  275         "punpcklhw  %[ftmp9],   %[ftmp11],  %[ftmp12]                   \n\t" 
  276         "punpckhhw  %[ftmp10],  %[ftmp11],  %[ftmp12]                   \n\t" 
  277         "punpcklhw  %[ftmp11],  %[ftmp13],  %[ftmp14]                   \n\t" 
  278         "punpckhhw  %[ftmp12],  %[ftmp13],  %[ftmp14]                   \n\t" 
  282                                0x000f0010, 0x00040009, %[
ff_pw_4])
 
  286                                0xfffc000f, 0xfff7fff0, %[
ff_pw_4])
 
  290                                0xfff00009, 0x000f0004, %[
ff_pw_4])
 
  294                                0xfff70004, 0xfff0000f, %[
ff_pw_4])
 
  296         TRANSPOSE_4H(%[ftmp15], %[ftmp16], %[ftmp17], %[ftmp18],
 
  297                      %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
 
  299         TRANSPOSE_4H(%[ftmp19], %[ftmp20], %[ftmp21], %[ftmp22],
 
  300                      %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
 
  302         MMI_SDC1(%[ftmp19], %[
temp], 0x48)
 
  303         MMI_SDC1(%[ftmp20], %[
temp], 0x58)
 
  304         MMI_SDC1(%[ftmp21], %[
temp], 0x68)
 
  305         MMI_SDC1(%[ftmp22], %[
temp], 0x78)
 
  309         "li         %[tmp0],    0x07                                    \n\t" 
  310         "mtc1       %[tmp0],    %[ftmp0]                                \n\t" 
  313         MMI_LDC1(%[ftmp1], %[
temp], 0x00)
 
  314         MMI_LDC1(%[ftmp11], %[
temp], 0x10)
 
  315         MMI_LDC1(%[ftmp2], %[
temp], 0x20)
 
  316         MMI_LDC1(%[ftmp12], %[
temp], 0x30)
 
  317         "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t" 
  318         "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t" 
  319         "punpcklhw  %[ftmp7],   %[ftmp15],  %[ftmp17]                   \n\t" 
  320         "punpckhhw  %[ftmp8],   %[ftmp15],  %[ftmp17]                   \n\t" 
  322         "punpcklhw  %[ftmp9],   %[ftmp11],  %[ftmp12]                   \n\t" 
  323         "punpckhhw  %[ftmp10],  %[ftmp11],  %[ftmp12]                   \n\t" 
  324         "punpcklhw  %[ftmp11],  %[ftmp16],  %[ftmp18]                   \n\t" 
  325         "punpckhhw  %[ftmp12],  %[ftmp16],  %[ftmp18]                   \n\t" 
  343         MMI_SDC1(%[ftmp15], %[
block], 0x00)
 
  344         MMI_SDC1(%[ftmp16], %[
block], 0x10)
 
  345         MMI_SDC1(%[ftmp17], %[
block], 0x20)
 
  346         MMI_SDC1(%[ftmp18], %[
block], 0x30)
 
  347         MMI_SDC1(%[ftmp19], %[
block], 0x40)
 
  348         MMI_SDC1(%[ftmp20], %[
block], 0x50)
 
  349         MMI_SDC1(%[ftmp21], %[
block], 0x60)
 
  350         MMI_SDC1(%[ftmp22], %[
block], 0x70)
 
  353         MMI_LDC1(%[ftmp1], %[
temp], 0x08)
 
  354         MMI_LDC1(%[ftmp11], %[
temp], 0x18)
 
  355         MMI_LDC1(%[ftmp2], %[
temp], 0x28)
 
  356         MMI_LDC1(%[ftmp12], %[
temp], 0x38)
 
  357         MMI_LDC1(%[ftmp3], %[
temp], 0x48)
 
  358         MMI_LDC1(%[ftmp13], %[
temp], 0x58)
 
  359         MMI_LDC1(%[ftmp4], %[
temp], 0x68)
 
  360         MMI_LDC1(%[ftmp14], %[
temp], 0x78)
 
  361         "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t" 
  362         "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t" 
  363         "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t" 
  364         "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t" 
  366         "punpcklhw  %[ftmp9],   %[ftmp11],  %[ftmp12]                   \n\t" 
  367         "punpckhhw  %[ftmp10],  %[ftmp11],  %[ftmp12]                   \n\t" 
  368         "punpcklhw  %[ftmp11],  %[ftmp13],  %[ftmp14]                   \n\t" 
  369         "punpckhhw  %[ftmp12],  %[ftmp13],  %[ftmp14]                   \n\t" 
  387         MMI_SDC1(%[ftmp15], %[
block], 0x08)
 
  388         MMI_SDC1(%[ftmp16], %[
block], 0x18)
 
  389         MMI_SDC1(%[ftmp17], %[
block], 0x28)
 
  390         MMI_SDC1(%[ftmp18], %[
block], 0x38)
 
  391         MMI_SDC1(%[ftmp19], %[
block], 0x48)
 
  392         MMI_SDC1(%[ftmp20], %[
block], 0x58)
 
  393         MMI_SDC1(%[ftmp21], %[
block], 0x68)
 
  394         MMI_SDC1(%[ftmp22], %[
block], 0x78)
 
  396         : [ftmp0]
"=&f"(ftmp[0]),        [ftmp1]
"=&f"(ftmp[1]),
 
  397           [ftmp2]
"=&f"(ftmp[2]),        [ftmp3]
"=&f"(ftmp[3]),
 
  398           [ftmp4]
"=&f"(ftmp[4]),        [ftmp5]
"=&f"(ftmp[5]),
 
  399           [ftmp6]
"=&f"(ftmp[6]),        [ftmp7]
"=&f"(ftmp[7]),
 
  400           [ftmp8]
"=&f"(ftmp[8]),        [ftmp9]
"=&f"(ftmp[9]),
 
  401           [ftmp10]
"=&f"(ftmp[10]),      [ftmp11]
"=&f"(ftmp[11]),
 
  402           [ftmp12]
"=&f"(ftmp[12]),      [ftmp13]
"=&f"(ftmp[13]),
 
  403           [ftmp14]
"=&f"(ftmp[14]),      [ftmp15]
"=&f"(ftmp[15]),
 
  404           [ftmp16]
"=&f"(ftmp[16]),      [ftmp17]
"=&f"(ftmp[17]),
 
  405           [ftmp18]
"=&f"(ftmp[18]),      [ftmp19]
"=&f"(ftmp[19]),
 
  406           [ftmp20]
"=&f"(ftmp[20]),      [ftmp21]
"=&f"(ftmp[21]),
 
  407           [ftmp22]
"=&f"(ftmp[22]),
 
  424     dc = ( 3 * 
dc +  1) >> 1;
 
  425     dc = (17 * 
dc + 64) >> 7;
 
  429         "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t" 
  430         "pshufh     %[dc],      %[dc],          %[ftmp0]                \n\t" 
  432         MMI_LDC1(%[ftmp1], %[dest0], 0x00)
 
  433         MMI_LDC1(%[ftmp2], %[dest1], 0x00)
 
  434         MMI_LDC1(%[ftmp3], %[dest2], 0x00)
 
  435         MMI_LDC1(%[ftmp4], %[dest3], 0x00)
 
  437         "punpckhbh  %[ftmp5],   %[ftmp1],       %[ftmp0]                \n\t" 
  438         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t" 
  439         "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]                \n\t" 
  440         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t" 
  441         "punpckhbh  %[ftmp7],   %[ftmp3],       %[ftmp0]                \n\t" 
  442         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t" 
  443         "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]                \n\t" 
  444         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t" 
  446         "paddsh     %[ftmp1],   %[ftmp1],       %[dc]                   \n\t" 
  447         "paddsh     %[ftmp2],   %[ftmp2],       %[dc]                   \n\t" 
  448         "paddsh     %[ftmp3],   %[ftmp3],       %[dc]                   \n\t" 
  449         "paddsh     %[ftmp4],   %[ftmp4],       %[dc]                   \n\t" 
  450         "paddsh     %[ftmp5],   %[ftmp5],       %[dc]                   \n\t" 
  451         "paddsh     %[ftmp6],   %[ftmp6],       %[dc]                   \n\t" 
  452         "paddsh     %[ftmp7],   %[ftmp7],       %[dc]                   \n\t" 
  453         "paddsh     %[ftmp8],   %[ftmp8],       %[dc]                   \n\t" 
  455         "packushb   %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t" 
  456         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t" 
  457         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t" 
  458         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp8]                \n\t" 
  460         MMI_SDC1(%[ftmp1], %[dest0], 0x00)
 
  461         MMI_SDC1(%[ftmp2], %[dest1], 0x00)
 
  462         MMI_SDC1(%[ftmp3], %[dest2], 0x00)
 
  463         MMI_SDC1(%[ftmp4], %[dest3], 0x00)
 
  464         : [ftmp0]
"=&f"(ftmp[0]),        [ftmp1]
"=&f"(ftmp[1]),
 
  465           [ftmp2]
"=&f"(ftmp[2]),        [ftmp3]
"=&f"(ftmp[3]),
 
  466           [ftmp4]
"=&f"(ftmp[4]),        [ftmp5]
"=&f"(ftmp[5]),
 
  467           [ftmp6]
"=&f"(ftmp[6]),        [ftmp7]
"=&f"(ftmp[7]),
 
  468           [ftmp8]
"=&f"(ftmp[8])
 
  469         : [dest0]
"r"(dest+0*linesize),  [dest1]
"r"(dest+1*linesize),
 
  470           [dest2]
"r"(dest+2*linesize),  [dest3]
"r"(dest+3*linesize),
 
  476 #if _MIPS_SIM != _ABIO32 
  484     int16_t 
coeff[64] = {12, 16,  16,  15,  12,   9,   6,   4,
 
  485                          12, 15,   6,  -4, -12, -16, -16,  -9,
 
  486                          12,  9,  -6, -16, -12,   4,  16,  15,
 
  487                          12,  4, -16,  -9,  12,  15,  -6, -16,
 
  488                          12, -4, -16,   9,  12, -15,  -6,  16,
 
  489                          12, -9,  -6,  16, -12,  -4,  16, -15,
 
  490                          12, -15,  6,   4, -12,  16, -16,   9,
 
  491                          12, -16, 16, -15,  12,  -9,   6,  -4};
 
  495         "li         %[tmp0],    0x03                                    \n\t" 
  496         "mtc1       %[tmp0],    %[ftmp0]                                \n\t" 
  499         MMI_LDC1(%[ftmp1], %[
src], 0x00)
 
  500         MMI_LDC1(%[ftmp2], %[
src], 0x08)
 
  503         MMI_LDC1(%[ftmp3], %[
coeff], 0x00)
 
  504         MMI_LDC1(%[ftmp4], %[
coeff], 0x08)
 
  505         MMI_LDC1(%[ftmp5], %[
coeff], 0x10)
 
  506         MMI_LDC1(%[ftmp6], %[
coeff], 0x18)
 
  507         "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp3]                    \n\t" 
  508         "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp4]                    \n\t" 
  509         "paddw      %[ftmp9],   %[ftmp7],   %[ftmp8]                    \n\t" 
  510         "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp5]                    \n\t" 
  511         "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp6]                    \n\t" 
  512         "paddw      %[ftmp10],  %[ftmp7],   %[ftmp8]                    \n\t" 
  513         "punpcklwd  %[ftmp7],   %[ftmp9],   %[ftmp10]                   \n\t" 
  514         "punpckhwd  %[ftmp8],   %[ftmp9],   %[ftmp10]                   \n\t" 
  515         "paddw      %[ftmp11],  %[ftmp7],   %[ftmp8]                    \n\t" 
  516         "paddw      %[ftmp11],  %[ftmp11],  %[ff_pw_4]                  \n\t" 
  519         MMI_LDC1(%[ftmp3], %[
coeff], 0x20)
 
  520         MMI_LDC1(%[ftmp4], %[
coeff], 0x28)
 
  521         MMI_LDC1(%[ftmp5], %[
coeff], 0x30)
 
  522         MMI_LDC1(%[ftmp6], %[
coeff], 0x38)
 
  523         "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp3]                    \n\t" 
  524         "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp4]                    \n\t" 
  525         "paddw      %[ftmp9],   %[ftmp7],   %[ftmp8]                    \n\t" 
  526         "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp5]                    \n\t" 
  527         "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp6]                    \n\t" 
  528         "paddw      %[ftmp10],  %[ftmp7],   %[ftmp8]                    \n\t" 
  529         "punpcklwd  %[ftmp7],   %[ftmp9],   %[ftmp10]                   \n\t" 
  530         "punpckhwd  %[ftmp8],   %[ftmp9],   %[ftmp10]                   \n\t" 
  531         "paddw      %[ftmp12],  %[ftmp7],   %[ftmp8]                    \n\t" 
  532         "paddw      %[ftmp12],  %[ftmp12],  %[ff_pw_4]                  \n\t" 
  535         MMI_LDC1(%[ftmp3], %[
coeff], 0x40)
 
  536         MMI_LDC1(%[ftmp4], %[
coeff], 0x48)
 
  537         MMI_LDC1(%[ftmp5], %[
coeff], 0x50)
 
  538         MMI_LDC1(%[ftmp6], %[
coeff], 0x58)
 
  539         "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp3]                    \n\t" 
  540         "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp4]                    \n\t" 
  541         "paddw      %[ftmp9],   %[ftmp7],   %[ftmp8]                    \n\t" 
  542         "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp5]                    \n\t" 
  543         "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp6]                    \n\t" 
  544         "paddw      %[ftmp10],  %[ftmp7],   %[ftmp8]                    \n\t" 
  545         "punpcklwd  %[ftmp7],   %[ftmp9],   %[ftmp10]                   \n\t" 
  546         "punpckhwd  %[ftmp8],   %[ftmp9],   %[ftmp10]                   \n\t" 
  547         "paddw      %[ftmp13],  %[ftmp7],   %[ftmp8]                    \n\t" 
  548         "paddw      %[ftmp13],  %[ftmp13],  %[ff_pw_4]                  \n\t" 
  551         MMI_LDC1(%[ftmp3], %[
coeff], 0x60)
 
  552         MMI_LDC1(%[ftmp4], %[
coeff], 0x68)
 
  553         MMI_LDC1(%[ftmp5], %[
coeff], 0x70)
 
  554         MMI_LDC1(%[ftmp6], %[
coeff], 0x78)
 
  555         "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp3]                    \n\t" 
  556         "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp4]                    \n\t" 
  557         "paddw      %[ftmp9],   %[ftmp7],   %[ftmp8]                    \n\t" 
  558         "pmaddhw    %[ftmp7],   %[ftmp1],   %[ftmp5]                    \n\t" 
  559         "pmaddhw    %[ftmp8],   %[ftmp2],   %[ftmp6]                    \n\t" 
  560         "paddw      %[ftmp10],  %[ftmp7],   %[ftmp8]                    \n\t" 
  561         "punpcklwd  %[ftmp7],   %[ftmp9],   %[ftmp10]                   \n\t" 
  562         "punpckhwd  %[ftmp8],   %[ftmp9],   %[ftmp10]                   \n\t" 
  563         "paddw      %[ftmp14],  %[ftmp7],   %[ftmp8]                    \n\t" 
  564         "paddw      %[ftmp14],  %[ftmp14],  %[ff_pw_4]                  \n\t" 
  567         "psraw      %[ftmp11],  %[ftmp11],  %[ftmp0]                    \n\t" 
  568         "psraw      %[ftmp12],  %[ftmp12],  %[ftmp0]                    \n\t" 
  569         "psraw      %[ftmp13],  %[ftmp13],  %[ftmp0]                    \n\t" 
  570         "psraw      %[ftmp14],  %[ftmp14],  %[ftmp0]                    \n\t" 
  571         "punpcklhw  %[ftmp7],   %[ftmp11],  %[ftmp12]                   \n\t" 
  572         "punpckhhw  %[ftmp8],   %[ftmp11],  %[ftmp12]                   \n\t" 
  573         "punpcklhw  %[ftmp9],   %[ftmp7],   %[ftmp8]                    \n\t" 
  574         "punpcklhw  %[ftmp7],   %[ftmp13],  %[ftmp14]                   \n\t" 
  575         "punpckhhw  %[ftmp8],   %[ftmp13],  %[ftmp14]                   \n\t" 
  576         "punpcklhw  %[ftmp10],  %[ftmp7],   %[ftmp8]                    \n\t" 
  577         MMI_SDC1(%[ftmp9], %[
dst], 0x00)
 
  578         MMI_SDC1(%[ftmp10], %[
dst], 0x08)
 
  582         "addiu      %[count],   %[count],   -0x01                       \n\t" 
  583         "bnez       %[count],   1b                                      \n\t" 
  584         : [ftmp0]
"=&f"(ftmp[0]),        [ftmp1]
"=&f"(ftmp[1]),
 
  585           [ftmp2]
"=&f"(ftmp[2]),        [ftmp3]
"=&f"(ftmp[3]),
 
  586           [ftmp4]
"=&f"(ftmp[4]),        [ftmp5]
"=&f"(ftmp[5]),
 
  587           [ftmp6]
"=&f"(ftmp[6]),        [ftmp7]
"=&f"(ftmp[7]),
 
  588           [ftmp8]
"=&f"(ftmp[8]),        [ftmp9]
"=&f"(ftmp[9]),
 
  589           [ftmp10]
"=&f"(ftmp[10]),      [ftmp11]
"=&f"(ftmp[11]),
 
  590           [ftmp12]
"=&f"(ftmp[12]),      [ftmp13]
"=&f"(ftmp[13]),
 
  591           [ftmp14]
"=&f"(ftmp[14]),      [tmp0]
"=&r"(
tmp[0]),
 
  601         "li         %[tmp0],    0x44                                    \n\t" 
  602         "mtc1       %[tmp0],    %[ftmp15]                               \n\t" 
  605         "li         %[tmp0],    0x07                                    \n\t" 
  606         "mtc1       %[tmp0],    %[ftmp0]                                \n\t" 
  607         MMI_LDC1(%[ftmp1], %[
src], 0x00)
 
  608         MMI_LDC1(%[ftmp2], %[
src], 0x10)
 
  609         MMI_LDC1(%[ftmp3], %[
src], 0x20)
 
  610         MMI_LDC1(%[ftmp4], %[
src], 0x30)
 
  611         "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t" 
  612         "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t" 
  613         "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t" 
  614         "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t" 
  617         "li         %[tmp0],    0x00160011                              \n\t" 
  618         "mtc1       %[tmp0],    %[ftmp3]                                \n\t" 
  619         "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t" 
  620         "li         %[tmp0],    0x000a0011                              \n\t" 
  621         "mtc1       %[tmp0],    %[ftmp4]                                \n\t" 
  622         "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t" 
  623         "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t" 
  624         "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t" 
  625         "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t" 
  626         "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t" 
  627         "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t" 
  628         "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t" 
  629         "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t" 
  630         "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t" 
  631         "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t" 
  632         "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t" 
  633         "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t" 
  634         "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t" 
  635         "punpcklhw  %[ftmp11],  %[ftmp1],   %[ftmp2]                    \n\t" 
  638         "li         %[tmp0],    0x000a0011                              \n\t" 
  639         "mtc1       %[tmp0],    %[ftmp3]                                \n\t" 
  640         "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t" 
  641         "li         %[tmp0],    0xffeaffef                              \n\t" 
  642         "mtc1       %[tmp0],    %[ftmp4]                                \n\t" 
  643         "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t" 
  644         "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t" 
  645         "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t" 
  646         "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t" 
  647         "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t" 
  648         "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t" 
  649         "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t" 
  650         "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t" 
  651         "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t" 
  652         "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t" 
  653         "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t" 
  654         "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t" 
  655         "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t" 
  656         "punpcklhw  %[ftmp12],  %[ftmp1],   %[ftmp2]                    \n\t" 
  659         "li         %[tmp0],    0xfff60011                              \n\t" 
  660         "mtc1       %[tmp0],    %[ftmp3]                                \n\t" 
  661         "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t" 
  662         "li         %[tmp0],    0x0016ffef                              \n\t" 
  663         "mtc1       %[tmp0],    %[ftmp4]                                \n\t" 
  664         "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t" 
  665         "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t" 
  666         "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t" 
  667         "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t" 
  668         "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t" 
  669         "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t" 
  670         "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t" 
  671         "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t" 
  672         "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t" 
  673         "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t" 
  674         "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t" 
  675         "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t" 
  676         "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t" 
  677         "punpcklhw  %[ftmp13],  %[ftmp1],   %[ftmp2]                    \n\t" 
  680         "li         %[tmp0],    0xffea0011                              \n\t" 
  681         "mtc1       %[tmp0],    %[ftmp3]                                \n\t" 
  682         "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t" 
  683         "li         %[tmp0],    0xfff60011                              \n\t" 
  684         "mtc1       %[tmp0],    %[ftmp4]                                \n\t" 
  685         "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t" 
  686         "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t" 
  687         "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t" 
  688         "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t" 
  689         "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t" 
  690         "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t" 
  691         "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t" 
  692         "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t" 
  693         "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t" 
  694         "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t" 
  695         "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t" 
  696         "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t" 
  697         "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t" 
  698         "punpcklhw  %[ftmp14],  %[ftmp1],   %[ftmp2]                    \n\t" 
  700         MMI_LWC1(%[ftmp1], %[dest], 0x00)
 
  701         PTR_ADDU    "%[tmp0],   %[dest],    %[linesize]                 \n\t" 
  702         MMI_LWC1(%[ftmp2], %[tmp0], 0x00)
 
  703         PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t" 
  704         MMI_LWC1(%[ftmp3], %[tmp0], 0x00)
 
  705         PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t" 
  706         MMI_LWC1(%[ftmp4], %[tmp0], 0x00)
 
  707         "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t" 
  708         "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t" 
  709         "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t" 
  710         "punpcklbh  %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t" 
  711         "punpcklbh  %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t" 
  712         "paddh      %[ftmp1],   %[ftmp1],   %[ftmp11]                   \n\t" 
  713         "paddh      %[ftmp2],   %[ftmp2],   %[ftmp12]                   \n\t" 
  714         "paddh      %[ftmp3],   %[ftmp3],   %[ftmp13]                   \n\t" 
  715         "paddh      %[ftmp4],   %[ftmp4],   %[ftmp14]                   \n\t" 
  716         "packushb   %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t" 
  717         "packushb   %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t" 
  718         "packushb   %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t" 
  719         "packushb   %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t" 
  720         MMI_SWC1(%[ftmp1], %[dest], 0x00)
 
  721         PTR_ADDU   "%[tmp0],    %[dest],    %[linesize]                 \n\t" 
  722         MMI_SWC1(%[ftmp2], %[tmp0], 0x00)
 
  723         PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t" 
  724         MMI_SWC1(%[ftmp3], %[tmp0], 0x00)
 
  725         PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t" 
  726         MMI_SWC1(%[ftmp4], %[tmp0], 0x00)
 
  729         "li         %[tmp0],    0x07                                    \n\t" 
  730         "mtc1       %[tmp0],    %[ftmp0]                                \n\t" 
  731         MMI_LDC1(%[ftmp1], %[
src], 0x08)
 
  732         MMI_LDC1(%[ftmp2], %[
src], 0x18)
 
  733         MMI_LDC1(%[ftmp3], %[
src], 0x28)
 
  734         MMI_LDC1(%[ftmp4], %[
src], 0x38)
 
  735         "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t" 
  736         "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t" 
  737         "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t" 
  738         "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t" 
  741         "li         %[tmp0],    0x00160011                              \n\t" 
  742         "mtc1       %[tmp0],    %[ftmp3]                                \n\t" 
  743         "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t" 
  744         "li         %[tmp0],    0x000a0011                              \n\t" 
  745         "mtc1       %[tmp0],    %[ftmp4]                                \n\t" 
  746         "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t" 
  747         "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t" 
  748         "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t" 
  749         "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t" 
  750         "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t" 
  751         "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t" 
  752         "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t" 
  753         "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t" 
  754         "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t" 
  755         "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t" 
  756         "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t" 
  757         "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t" 
  758         "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t" 
  759         "punpcklhw  %[ftmp11],  %[ftmp1],   %[ftmp2]                    \n\t" 
  762         "li         %[tmp0],    0x000a0011                              \n\t" 
  763         "mtc1       %[tmp0],    %[ftmp3]                                \n\t" 
  764         "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t" 
  765         "li         %[tmp0],    0xffeaffef                              \n\t" 
  766         "mtc1       %[tmp0],    %[ftmp4]                                \n\t" 
  767         "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t" 
  768         "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t" 
  769         "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t" 
  770         "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t" 
  771         "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t" 
  772         "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t" 
  773         "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t" 
  774         "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t" 
  775         "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t" 
  776         "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t" 
  777         "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t" 
  778         "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t" 
  779         "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t" 
  780         "punpcklhw  %[ftmp12],  %[ftmp1],   %[ftmp2]                    \n\t" 
  783         "li         %[tmp0],    0xfff60011                              \n\t" 
  784         "mtc1       %[tmp0],    %[ftmp3]                                \n\t" 
  785         "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t" 
  786         "li         %[tmp0],    0x0016ffef                              \n\t" 
  787         "mtc1       %[tmp0],    %[ftmp4]                                \n\t" 
  788         "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t" 
  789         "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t" 
  790         "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t" 
  791         "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t" 
  792         "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t" 
  793         "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t" 
  794         "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t" 
  795         "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t" 
  796         "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t" 
  797         "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t" 
  798         "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t" 
  799         "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t" 
  800         "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t" 
  801         "punpcklhw  %[ftmp13],  %[ftmp1],   %[ftmp2]                    \n\t" 
  804         "li         %[tmp0],    0xffea0011                              \n\t" 
  805         "mtc1       %[tmp0],    %[ftmp3]                                \n\t" 
  806         "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t" 
  807         "li         %[tmp0],    0xfff60011                              \n\t" 
  808         "mtc1       %[tmp0],    %[ftmp4]                                \n\t" 
  809         "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t" 
  810         "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t" 
  811         "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t" 
  812         "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t" 
  813         "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t" 
  814         "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t" 
  815         "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t" 
  816         "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t" 
  817         "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t" 
  818         "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t" 
  819         "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t" 
  820         "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t" 
  821         "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t" 
  822         "punpcklhw  %[ftmp14],  %[ftmp1],   %[ftmp2]                    \n\t" 
  824         MMI_LWC1(%[ftmp1], %[dest], 0x04)
 
  825         PTR_ADDU    "%[tmp0],   %[dest],    %[linesize]                 \n\t" 
  826         MMI_LWC1(%[ftmp2], %[tmp0], 0x04)
 
  827         PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t" 
  828         MMI_LWC1(%[ftmp3], %[tmp0], 0x04)
 
  829         PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t" 
  830         MMI_LWC1(%[ftmp4], %[tmp0], 0x04)
 
  831         "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t" 
  832         "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t" 
  833         "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t" 
  834         "punpcklbh  %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t" 
  835         "punpcklbh  %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t" 
  836         "paddh      %[ftmp1],   %[ftmp1],   %[ftmp11]                   \n\t" 
  837         "paddh      %[ftmp2],   %[ftmp2],   %[ftmp12]                   \n\t" 
  838         "paddh      %[ftmp3],   %[ftmp3],   %[ftmp13]                   \n\t" 
  839         "paddh      %[ftmp4],   %[ftmp4],   %[ftmp14]                   \n\t" 
  840         "packushb   %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t" 
  841         "packushb   %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t" 
  842         "packushb   %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t" 
  843         "packushb   %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t" 
  844         MMI_SWC1(%[ftmp1], %[dest], 0x04)
 
  845         PTR_ADDU   "%[tmp0],    %[dest],    %[linesize]                 \n\t" 
  846         MMI_SWC1(%[ftmp2], %[tmp0], 0x04)
 
  847         PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t" 
  848         MMI_SWC1(%[ftmp3], %[tmp0], 0x04)
 
  849         PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t" 
  850         MMI_SWC1(%[ftmp4], %[tmp0], 0x04)
 
  852         : [ftmp0]
"=&f"(ftmp[0]),        [ftmp1]
"=&f"(ftmp[1]),
 
  853           [ftmp2]
"=&f"(ftmp[2]),        [ftmp3]
"=&f"(ftmp[3]),
 
  854           [ftmp4]
"=&f"(ftmp[4]),        [ftmp5]
"=&f"(ftmp[5]),
 
  855           [ftmp6]
"=&f"(ftmp[6]),        [ftmp7]
"=&f"(ftmp[7]),
 
  856           [ftmp8]
"=&f"(ftmp[8]),        [ftmp9]
"=&f"(ftmp[9]),
 
  857           [ftmp10]
"=&f"(ftmp[10]),      [ftmp11]
"=&f"(ftmp[11]),
 
  858           [ftmp12]
"=&f"(ftmp[12]),      [ftmp13]
"=&f"(ftmp[13]),
 
  859           [ftmp14]
"=&f"(ftmp[14]),      [ftmp15]
"=&f"(ftmp[15]),
 
  862           [
src]
"r"(
src), [dest]
"r"(dest), [linesize]
"r"(linesize)
 
  876     dc = (17 * 
dc +  4) >> 3;
 
  877     dc = (12 * 
dc + 64) >> 7;
 
  881         "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t" 
  882         "pshufh     %[dc],      %[dc],          %[ftmp0]                \n\t" 
  884         MMI_LWC1(%[ftmp1], %[dest0], 0x00)
 
  885         MMI_LWC1(%[ftmp2], %[dest1], 0x00)
 
  886         MMI_LWC1(%[ftmp3], %[dest2], 0x00)
 
  887         MMI_LWC1(%[ftmp4], %[dest3], 0x00)
 
  888         MMI_LWC1(%[ftmp5], %[dest4], 0x00)
 
  889         MMI_LWC1(%[ftmp6], %[dest5], 0x00)
 
  890         MMI_LWC1(%[ftmp7], %[dest6], 0x00)
 
  891         MMI_LWC1(%[ftmp8], %[dest7], 0x00)
 
  893         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t" 
  894         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t" 
  895         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t" 
  896         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t" 
  897         "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t" 
  898         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t" 
  899         "punpcklbh  %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t" 
  900         "punpcklbh  %[ftmp8],   %[ftmp8],       %[ftmp0]                \n\t" 
  902         "paddsh     %[ftmp1],   %[ftmp1],       %[dc]                   \n\t" 
  903         "paddsh     %[ftmp2],   %[ftmp2],       %[dc]                   \n\t" 
  904         "paddsh     %[ftmp3],   %[ftmp3],       %[dc]                   \n\t" 
  905         "paddsh     %[ftmp4],   %[ftmp4],       %[dc]                   \n\t" 
  906         "paddsh     %[ftmp5],   %[ftmp5],       %[dc]                   \n\t" 
  907         "paddsh     %[ftmp6],   %[ftmp6],       %[dc]                   \n\t" 
  908         "paddsh     %[ftmp7],   %[ftmp7],       %[dc]                   \n\t" 
  909         "paddsh     %[ftmp8],   %[ftmp8],       %[dc]                   \n\t" 
  911         "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t" 
  912         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t" 
  913         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t" 
  914         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t" 
  915         "packushb   %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t" 
  916         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t" 
  917         "packushb   %[ftmp7],   %[ftmp7],       %[ftmp0]                \n\t" 
  918         "packushb   %[ftmp8],   %[ftmp8],       %[ftmp0]                \n\t" 
  920         MMI_SWC1(%[ftmp1], %[dest0], 0x00)
 
  921         MMI_SWC1(%[ftmp2], %[dest1], 0x00)
 
  922         MMI_SWC1(%[ftmp3], %[dest2], 0x00)
 
  923         MMI_SWC1(%[ftmp4], %[dest3], 0x00)
 
  924         MMI_SWC1(%[ftmp5], %[dest4], 0x00)
 
  925         MMI_SWC1(%[ftmp6], %[dest5], 0x00)
 
  926         MMI_SWC1(%[ftmp7], %[dest6], 0x00)
 
  927         MMI_SWC1(%[ftmp8], %[dest7], 0x00)
 
  928         : [ftmp0]
"=&f"(ftmp[0]),        [ftmp1]
"=&f"(ftmp[1]),
 
  929           [ftmp2]
"=&f"(ftmp[2]),        [ftmp3]
"=&f"(ftmp[3]),
 
  930           [ftmp4]
"=&f"(ftmp[4]),        [ftmp5]
"=&f"(ftmp[5]),
 
  931           [ftmp6]
"=&f"(ftmp[6]),        [ftmp7]
"=&f"(ftmp[7]),
 
  933           [ftmp8]
"=&f"(ftmp[8])
 
  934         : [dest0]
"r"(dest+0*linesize),  [dest1]
"r"(dest+1*linesize),
 
  935           [dest2]
"r"(dest+2*linesize),  [dest3]
"r"(dest+3*linesize),
 
  936           [dest4]
"r"(dest+4*linesize),  [dest5]
"r"(dest+5*linesize),
 
  937           [dest6]
"r"(dest+6*linesize),  [dest7]
"r"(dest+7*linesize),
 
  943 #if _MIPS_SIM != _ABIO32 
  949     uint64_t count = 8, 
tmp[1];
 
  950     int16_t 
coeff[16] = {17, 22, 17, 10,
 
  958         "li         %[tmp0],    0x03                                    \n\t" 
  959         "mtc1       %[tmp0],    %[ftmp0]                                \n\t" 
  961         MMI_LDC1(%[ftmp2], %[
coeff], 0x00)
 
  962         MMI_LDC1(%[ftmp3], %[
coeff], 0x08)
 
  963         MMI_LDC1(%[ftmp4], %[
coeff], 0x10)
 
  964         MMI_LDC1(%[ftmp5], %[
coeff], 0x18)
 
  967         MMI_LDC1(%[ftmp1], %[
src], 0x00)
 
  968         "pmaddhw    %[ftmp6],   %[ftmp2],   %[ftmp1]                    \n\t" 
  969         "pmaddhw    %[ftmp7],   %[ftmp3],   %[ftmp1]                    \n\t" 
  970         "pmaddhw    %[ftmp8],   %[ftmp4],   %[ftmp1]                    \n\t" 
  971         "pmaddhw    %[ftmp9],   %[ftmp5],   %[ftmp1]                    \n\t" 
  972         "punpcklwd  %[ftmp10],  %[ftmp6],   %[ftmp7]                    \n\t" 
  973         "punpckhwd  %[ftmp11],  %[ftmp6],   %[ftmp7]                    \n\t" 
  974         "punpcklwd  %[ftmp6],   %[ftmp8],   %[ftmp9]                    \n\t" 
  975         "punpckhwd  %[ftmp7],   %[ftmp8],   %[ftmp9]                    \n\t" 
  976         "paddw      %[ftmp8],   %[ftmp10],  %[ftmp11]                   \n\t" 
  977         "paddw      %[ftmp9],   %[ftmp6],   %[ftmp7]                    \n\t" 
  978         "paddw      %[ftmp8],   %[ftmp8],   %[ff_pw_4]                  \n\t" 
  979         "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_4]                  \n\t" 
  980         "psraw      %[ftmp8],   %[ftmp8],   %[ftmp0]                    \n\t" 
  981         "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t" 
  982         "punpcklhw  %[ftmp6],   %[ftmp8],   %[ftmp9]                    \n\t" 
  983         "punpckhhw  %[ftmp7],   %[ftmp8],   %[ftmp9]                    \n\t" 
  984         "punpcklhw  %[ftmp8],   %[ftmp6],   %[ftmp7]                    \n\t" 
  985         MMI_SDC1(%[ftmp8], %[
dst], 0x00)
 
  989         "addiu      %[count],   %[count],   -0x01                       \n\t" 
  990         "bnez       %[count],   1b                                      \n\t" 
  991         : [ftmp0]
"=&f"(ftmp[0]),        [ftmp1]
"=&f"(ftmp[1]),
 
  992           [ftmp2]
"=&f"(ftmp[2]),        [ftmp3]
"=&f"(ftmp[3]),
 
  993           [ftmp4]
"=&f"(ftmp[4]),        [ftmp5]
"=&f"(ftmp[5]),
 
  994           [ftmp6]
"=&f"(ftmp[6]),        [ftmp7]
"=&f"(ftmp[7]),
 
  995           [ftmp8]
"=&f"(ftmp[8]),        [ftmp9]
"=&f"(ftmp[9]),
 
  996           [ftmp10]
"=&f"(ftmp[10]),      [ftmp11]
"=&f"(ftmp[11]),
 
  997           [tmp0]
"=&r"(
tmp[0]),          [count]
"+&r"(count),
 
 1007         "li         %[tmp0],    0x07                                    \n\t" 
 1008         "mtc1       %[tmp0],    %[ftmp0]                                \n\t" 
 1010         MMI_LDC1(%[ftmp1], %[
src], 0x00)
 
 1011         MMI_LDC1(%[ftmp2], %[
src], 0x20)
 
 1012         MMI_LDC1(%[ftmp3], %[
src], 0x40)
 
 1013         MMI_LDC1(%[ftmp4], %[
src], 0x60)
 
 1014         "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t" 
 1015         "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t" 
 1016         "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t" 
 1017         "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t" 
 1019         MMI_LDC1(%[ftmp1], %[
src], 0x10)
 
 1020         MMI_LDC1(%[ftmp2], %[
src], 0x30)
 
 1021         MMI_LDC1(%[ftmp3], %[
src], 0x50)
 
 1022         MMI_LDC1(%[ftmp4], %[
src], 0x70)
 
 1023         "punpcklhw  %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t" 
 1024         "punpckhhw  %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t" 
 1025         "punpcklhw  %[ftmp11],  %[ftmp3],   %[ftmp4]                    \n\t" 
 1026         "punpckhhw  %[ftmp12],  %[ftmp3],   %[ftmp4]                    \n\t" 
 1044         MMI_LWC1(%[ftmp1], %[dest], 0x00)
 
 1045         PTR_ADDU  "%[tmp0],   %[dest],    %[linesize]                 \n\t" 
 1046         MMI_LWC1(%[ftmp2], %[tmp0], 0x00)
 
 1047         PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t" 
 1048         MMI_LWC1(%[ftmp3], %[tmp0], 0x00)
 
 1049         PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t" 
 1050         MMI_LWC1(%[ftmp4], %[tmp0], 0x00)
 
 1051         PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t" 
 1052         MMI_LWC1(%[ftmp5], %[tmp0], 0x00)
 
 1053         PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t" 
 1054         MMI_LWC1(%[ftmp6], %[tmp0], 0x00)
 
 1055         PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t" 
 1056         MMI_LWC1(%[ftmp7], %[tmp0], 0x00)
 
 1057         PTR_ADDU  "%[tmp0],   %[tmp0],    %[linesize]                 \n\t" 
 1058         MMI_LWC1(%[ftmp8], %[tmp0], 0x00)
 
 1059         "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t" 
 1060         "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t" 
 1061         "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t" 
 1062         "punpcklbh  %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t" 
 1063         "punpcklbh  %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t" 
 1064         "punpcklbh  %[ftmp5],   %[ftmp5],   %[ftmp0]                    \n\t" 
 1065         "punpcklbh  %[ftmp6],   %[ftmp6],   %[ftmp0]                    \n\t" 
 1066         "punpcklbh  %[ftmp7],   %[ftmp7],   %[ftmp0]                    \n\t" 
 1067         "punpcklbh  %[ftmp8],   %[ftmp8],   %[ftmp0]                    \n\t" 
 1069         "paddh      %[ftmp1],   %[ftmp1],   %[ftmp15]                   \n\t" 
 1070         "paddh      %[ftmp2],   %[ftmp2],   %[ftmp16]                   \n\t" 
 1071         "paddh      %[ftmp3],   %[ftmp3],   %[ftmp17]                   \n\t" 
 1072         "paddh      %[ftmp4],   %[ftmp4],   %[ftmp18]                   \n\t" 
 1073         "paddh      %[ftmp5],   %[ftmp5],   %[ftmp19]                   \n\t" 
 1074         "paddh      %[ftmp6],   %[ftmp6],   %[ftmp20]                   \n\t" 
 1075         "paddh      %[ftmp7],   %[ftmp7],   %[ftmp21]                   \n\t" 
 1076         "paddh      %[ftmp8],   %[ftmp8],   %[ftmp22]                   \n\t" 
 1078         "packushb   %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t" 
 1079         "packushb   %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t" 
 1080         "packushb   %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t" 
 1081         "packushb   %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t" 
 1082         "packushb   %[ftmp5],   %[ftmp5],   %[ftmp0]                    \n\t" 
 1083         "packushb   %[ftmp6],   %[ftmp6],   %[ftmp0]                    \n\t" 
 1084         "packushb   %[ftmp7],   %[ftmp7],   %[ftmp0]                    \n\t" 
 1085         "packushb   %[ftmp8],   %[ftmp8],   %[ftmp0]                    \n\t" 
 1087         MMI_SWC1(%[ftmp1], %[dest], 0x00)
 
 1088         PTR_ADDU   "%[tmp0],    %[dest],    %[linesize]                 \n\t" 
 1089         MMI_SWC1(%[ftmp2], %[tmp0], 0x00)
 
 1090         PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t" 
 1091         MMI_SWC1(%[ftmp3], %[tmp0], 0x00)
 
 1092         PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t" 
 1093         MMI_SWC1(%[ftmp4], %[tmp0], 0x00)
 
 1094         PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t" 
 1095         MMI_SWC1(%[ftmp5], %[tmp0], 0x00)
 
 1096         PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t" 
 1097         MMI_SWC1(%[ftmp6], %[tmp0], 0x00)
 
 1098         PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t" 
 1099         MMI_SWC1(%[ftmp7], %[tmp0], 0x00)
 
 1100         PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t" 
 1101         MMI_SWC1(%[ftmp8], %[tmp0], 0x00)
 
 1103         : [ftmp0]
"=&f"(ftmp[0]),        [ftmp1]
"=&f"(ftmp[1]),
 
 1104           [ftmp2]
"=&f"(ftmp[2]),        [ftmp3]
"=&f"(ftmp[3]),
 
 1105           [ftmp4]
"=&f"(ftmp[4]),        [ftmp5]
"=&f"(ftmp[5]),
 
 1106           [ftmp6]
"=&f"(ftmp[6]),        [ftmp7]
"=&f"(ftmp[7]),
 
 1107           [ftmp8]
"=&f"(ftmp[8]),        [ftmp9]
"=&f"(ftmp[9]),
 
 1108           [ftmp10]
"=&f"(ftmp[10]),      [ftmp11]
"=&f"(ftmp[11]),
 
 1109           [ftmp12]
"=&f"(ftmp[12]),      [ftmp13]
"=&f"(ftmp[13]),
 
 1110           [ftmp14]
"=&f"(ftmp[14]),      [ftmp15]
"=&f"(ftmp[15]),
 
 1111           [ftmp16]
"=&f"(ftmp[16]),      [ftmp17]
"=&f"(ftmp[17]),
 
 1112           [ftmp18]
"=&f"(ftmp[18]),      [ftmp19]
"=&f"(ftmp[19]),
 
 1113           [ftmp20]
"=&f"(ftmp[20]),      [ftmp21]
"=&f"(ftmp[21]),
 
 1114           [ftmp22]
"=&f"(ftmp[22]),
 
 1117           [
src]
"r"(
src), [dest]
"r"(dest), [linesize]
"r"(linesize)
 
 1131     dc = (17 * 
dc +  4) >> 3;
 
 1132     dc = (17 * 
dc + 64) >> 7;
 
 1136         "pxor       %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t" 
 1137         "pshufh     %[dc],      %[dc],          %[ftmp0]                \n\t" 
 1139         MMI_LWC1(%[ftmp1], %[dest0], 0x00)
 
 1140         MMI_LWC1(%[ftmp2], %[dest1], 0x00)
 
 1141         MMI_LWC1(%[ftmp3], %[dest2], 0x00)
 
 1142         MMI_LWC1(%[ftmp4], %[dest3], 0x00)
 
 1144         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t" 
 1145         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t" 
 1146         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t" 
 1147         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t" 
 1149         "paddsh     %[ftmp1],   %[ftmp1],       %[dc]                   \n\t" 
 1150         "paddsh     %[ftmp2],   %[ftmp2],       %[dc]                   \n\t" 
 1151         "paddsh     %[ftmp3],   %[ftmp3],       %[dc]                   \n\t" 
 1152         "paddsh     %[ftmp4],   %[ftmp4],       %[dc]                   \n\t" 
 1154         "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t" 
 1155         "packushb   %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t" 
 1156         "packushb   %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t" 
 1157         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t" 
 1159         MMI_SWC1(%[ftmp1], %[dest0], 0x00)
 
 1160         MMI_SWC1(%[ftmp2], %[dest1], 0x00)
 
 1161         MMI_SWC1(%[ftmp3], %[dest2], 0x00)
 
 1162         MMI_SWC1(%[ftmp4], %[dest3], 0x00)
 
 1163         : [ftmp0]
"=&f"(ftmp[0]),        [ftmp1]
"=&f"(ftmp[1]),
 
 1164           [ftmp2]
"=&f"(ftmp[2]),        [ftmp3]
"=&f"(ftmp[3]),
 
 1166           [ftmp4]
"=&f"(ftmp[4])
 
 1167         : [dest0]
"r"(dest+0*linesize),  [dest1]
"r"(dest+1*linesize),
 
 1168           [dest2]
"r"(dest+2*linesize),  [dest3]
"r"(dest+3*linesize),
 
 1179     uint32_t count = 4, 
tmp[1];
 
 1180     int16_t 
coeff[16] = {17, 22, 17, 10,
 
 1187         "li         %[tmp0],    0x03                                    \n\t" 
 1188         "mtc1       %[tmp0],    %[ftmp0]                                \n\t" 
 1189         MMI_LDC1(%[ftmp2], %[
coeff], 0x00)
 
 1190         MMI_LDC1(%[ftmp3], %[
coeff], 0x08)
 
 1191         MMI_LDC1(%[ftmp4], %[
coeff], 0x10)
 
 1192         MMI_LDC1(%[ftmp5], %[
coeff], 0x18)
 
 1195         MMI_LDC1(%[ftmp1], %[
src], 0x00)
 
 1196         "pmaddhw    %[ftmp6],   %[ftmp2],   %[ftmp1]                    \n\t" 
 1197         "pmaddhw    %[ftmp7],   %[ftmp3],   %[ftmp1]                    \n\t" 
 1198         "pmaddhw    %[ftmp8],   %[ftmp4],   %[ftmp1]                    \n\t" 
 1199         "pmaddhw    %[ftmp9],   %[ftmp5],   %[ftmp1]                    \n\t" 
 1200         "punpcklwd  %[ftmp10],  %[ftmp6],   %[ftmp7]                    \n\t" 
 1201         "punpckhwd  %[ftmp11],  %[ftmp6],   %[ftmp7]                    \n\t" 
 1202         "punpcklwd  %[ftmp6],   %[ftmp8],   %[ftmp9]                    \n\t" 
 1203         "punpckhwd  %[ftmp7],   %[ftmp8],   %[ftmp9]                    \n\t" 
 1204         "paddw      %[ftmp8],   %[ftmp10],  %[ftmp11]                   \n\t" 
 1205         "paddw      %[ftmp9],   %[ftmp6],   %[ftmp7]                    \n\t" 
 1206         "paddw      %[ftmp8],   %[ftmp8],   %[ff_pw_4]                  \n\t" 
 1207         "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_4]                  \n\t" 
 1208         "psraw      %[ftmp8],   %[ftmp8],   %[ftmp0]                    \n\t" 
 1209         "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t" 
 1210         "punpcklhw  %[ftmp6],   %[ftmp8],   %[ftmp9]                    \n\t" 
 1211         "punpckhhw  %[ftmp7],   %[ftmp8],   %[ftmp9]                    \n\t" 
 1212         "punpcklhw  %[ftmp8],   %[ftmp6],   %[ftmp7]                    \n\t" 
 1213         MMI_SDC1(%[ftmp8], %[
dst], 0x00)
 
 1217         "addiu      %[count],   %[count],   -0x01                       \n\t" 
 1218         "bnez       %[count],   1b                                      \n\t" 
 1219         : [ftmp0]
"=&f"(ftmp[0]),        [ftmp1]
"=&f"(ftmp[1]),
 
 1220           [ftmp2]
"=&f"(ftmp[2]),        [ftmp3]
"=&f"(ftmp[3]),
 
 1221           [ftmp4]
"=&f"(ftmp[4]),        [ftmp5]
"=&f"(ftmp[5]),
 
 1222           [ftmp6]
"=&f"(ftmp[6]),        [ftmp7]
"=&f"(ftmp[7]),
 
 1223           [ftmp8]
"=&f"(ftmp[8]),        [ftmp9]
"=&f"(ftmp[9]),
 
 1224           [ftmp10]
"=&f"(ftmp[10]),      [ftmp11]
"=&f"(ftmp[11]),
 
 1225           [tmp0]
"=&r"(
tmp[0]),          [count]
"+&r"(count),
 
 1235         "li         %[tmp0],    0x07                                    \n\t" 
 1236         "mtc1       %[tmp0],    %[ftmp0]                                \n\t" 
 1237         "li         %[tmp0],    0x44                                    \n\t" 
 1238         "mtc1       %[tmp0],    %[ftmp15]                               \n\t" 
 1240         MMI_LDC1(%[ftmp1], %[
src], 0x00)
 
 1241         MMI_LDC1(%[ftmp2], %[
src], 0x10)
 
 1242         MMI_LDC1(%[ftmp3], %[
src], 0x20)
 
 1243         MMI_LDC1(%[ftmp4], %[
src], 0x30)
 
 1244         "punpcklhw  %[ftmp5],   %[ftmp1],   %[ftmp2]                    \n\t" 
 1245         "punpckhhw  %[ftmp6],   %[ftmp1],   %[ftmp2]                    \n\t" 
 1246         "punpcklhw  %[ftmp7],   %[ftmp3],   %[ftmp4]                    \n\t" 
 1247         "punpckhhw  %[ftmp8],   %[ftmp3],   %[ftmp4]                    \n\t" 
 1250         "li         %[tmp0],    0x00160011                              \n\t" 
 1251         "mtc1       %[tmp0],    %[ftmp3]                                \n\t" 
 1252         "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t" 
 1253         "li         %[tmp0],    0x000a0011                              \n\t" 
 1254         "mtc1       %[tmp0],    %[ftmp4]                                \n\t" 
 1255         "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t" 
 1256         "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t" 
 1257         "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t" 
 1258         "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t" 
 1259         "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t" 
 1260         "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t" 
 1261         "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t" 
 1262         "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t" 
 1263         "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t" 
 1264         "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t" 
 1265         "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t" 
 1266         "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t" 
 1267         "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t" 
 1268         "punpcklhw  %[ftmp11],  %[ftmp1],   %[ftmp2]                    \n\t" 
 1271         "li         %[tmp0],    0x000a0011                              \n\t" 
 1272         "mtc1       %[tmp0],    %[ftmp3]                                \n\t" 
 1273         "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t" 
 1274         "li         %[tmp0],    0xffeaffef                              \n\t" 
 1275         "mtc1       %[tmp0],    %[ftmp4]                                \n\t" 
 1276         "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t" 
 1277         "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t" 
 1278         "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t" 
 1279         "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t" 
 1280         "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t" 
 1281         "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t" 
 1282         "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t" 
 1283         "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t" 
 1284         "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t" 
 1285         "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t" 
 1286         "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t" 
 1287         "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t" 
 1288         "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t" 
 1289         "punpcklhw  %[ftmp12],  %[ftmp1],   %[ftmp2]                    \n\t" 
 1292         "li         %[tmp0],    0xfff60011                              \n\t" 
 1293         "mtc1       %[tmp0],    %[ftmp3]                                \n\t" 
 1294         "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t" 
 1295         "li         %[tmp0],    0x0016ffef                              \n\t" 
 1296         "mtc1       %[tmp0],    %[ftmp4]                                \n\t" 
 1297         "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t" 
 1298         "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t" 
 1299         "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t" 
 1300         "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t" 
 1301         "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t" 
 1302         "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t" 
 1303         "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t" 
 1304         "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t" 
 1305         "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t" 
 1306         "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t" 
 1307         "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t" 
 1308         "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t" 
 1309         "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t" 
 1310         "punpcklhw  %[ftmp13],  %[ftmp1],   %[ftmp2]                    \n\t" 
 1313         "li         %[tmp0],    0xffea0011                              \n\t" 
 1314         "mtc1       %[tmp0],    %[ftmp3]                                \n\t" 
 1315         "pshufh     %[ftmp3],   %[ftmp3],   %[ftmp15]                   \n\t" 
 1316         "li         %[tmp0],    0xfff60011                              \n\t" 
 1317         "mtc1       %[tmp0],    %[ftmp4]                                \n\t" 
 1318         "pshufh     %[ftmp4],   %[ftmp4],   %[ftmp15]                   \n\t" 
 1319         "pmaddhw    %[ftmp1],   %[ftmp5],   %[ftmp3]                    \n\t" 
 1320         "pmaddhw    %[ftmp2],   %[ftmp7],   %[ftmp4]                    \n\t" 
 1321         "paddw      %[ftmp9],   %[ftmp1],   %[ftmp2]                    \n\t" 
 1322         "pmaddhw    %[ftmp1],   %[ftmp6],   %[ftmp3]                    \n\t" 
 1323         "pmaddhw    %[ftmp2],   %[ftmp8],   %[ftmp4]                    \n\t" 
 1324         "paddw      %[ftmp10],  %[ftmp1],   %[ftmp2]                    \n\t" 
 1325         "paddw      %[ftmp9],   %[ftmp9],   %[ff_pw_64]                 \n\t" 
 1326         "paddw      %[ftmp10],  %[ftmp10],  %[ff_pw_64]                 \n\t" 
 1327         "psraw      %[ftmp9],   %[ftmp9],   %[ftmp0]                    \n\t" 
 1328         "psraw      %[ftmp10],  %[ftmp10],  %[ftmp0]                    \n\t" 
 1329         "punpcklhw  %[ftmp1],   %[ftmp9],   %[ftmp10]                   \n\t" 
 1330         "punpckhhw  %[ftmp2],   %[ftmp9],   %[ftmp10]                   \n\t" 
 1331         "punpcklhw  %[ftmp14],  %[ftmp1],   %[ftmp2]                    \n\t" 
 1333         MMI_LWC1(%[ftmp1], %[dest], 0x00)
 
 1334         PTR_ADDU    "%[tmp0],   %[dest],    %[linesize]                 \n\t" 
 1335         MMI_LWC1(%[ftmp2], %[tmp0], 0x00)
 
 1336         PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t" 
 1337         MMI_LWC1(%[ftmp3], %[tmp0], 0x00)
 
 1338         PTR_ADDU    "%[tmp0],   %[tmp0],    %[linesize]                 \n\t" 
 1339         MMI_LWC1(%[ftmp4], %[tmp0], 0x00)
 
 1340         "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t" 
 1341         "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t" 
 1342         "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t" 
 1343         "punpcklbh  %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t" 
 1344         "punpcklbh  %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t" 
 1345         "paddh      %[ftmp1],   %[ftmp1],   %[ftmp11]                   \n\t" 
 1346         "paddh      %[ftmp2],   %[ftmp2],   %[ftmp12]                   \n\t" 
 1347         "paddh      %[ftmp3],   %[ftmp3],   %[ftmp13]                   \n\t" 
 1348         "paddh      %[ftmp4],   %[ftmp4],   %[ftmp14]                   \n\t" 
 1349         "packushb   %[ftmp1],   %[ftmp1],   %[ftmp0]                    \n\t" 
 1350         "packushb   %[ftmp2],   %[ftmp2],   %[ftmp0]                    \n\t" 
 1351         "packushb   %[ftmp3],   %[ftmp3],   %[ftmp0]                    \n\t" 
 1352         "packushb   %[ftmp4],   %[ftmp4],   %[ftmp0]                    \n\t" 
 1354         MMI_SWC1(%[ftmp1], %[dest], 0x00)
 
 1355         PTR_ADDU   "%[tmp0],    %[dest],    %[linesize]                 \n\t" 
 1356         MMI_SWC1(%[ftmp2], %[tmp0], 0x00)
 
 1357         PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t" 
 1358         MMI_SWC1(%[ftmp3], %[tmp0], 0x00)
 
 1359         PTR_ADDU   "%[tmp0],    %[tmp0],    %[linesize]                 \n\t" 
 1360         MMI_SWC1(%[ftmp4], %[tmp0], 0x00)
 
 1362         : [ftmp0]
"=&f"(ftmp[0]),        [ftmp1]
"=&f"(ftmp[1]),
 
 1363           [ftmp2]
"=&f"(ftmp[2]),        [ftmp3]
"=&f"(ftmp[3]),
 
 1364           [ftmp4]
"=&f"(ftmp[4]),        [ftmp5]
"=&f"(ftmp[5]),
 
 1365           [ftmp6]
"=&f"(ftmp[6]),        [ftmp7]
"=&f"(ftmp[7]),
 
 1366           [ftmp8]
"=&f"(ftmp[8]),        [ftmp9]
"=&f"(ftmp[9]),
 
 1367           [ftmp10]
"=&f"(ftmp[10]),      [ftmp11]
"=&f"(ftmp[11]),
 
 1368           [ftmp12]
"=&f"(ftmp[12]),      [ftmp13]
"=&f"(ftmp[13]),
 
 1369           [ftmp14]
"=&f"(ftmp[14]),      [ftmp15]
"=&f"(ftmp[15]),
 
 1372           [
src]
"r"(
src), [dest]
"r"(dest), [linesize]
"r"(linesize)
 
 1384     for (
i = 0; 
i < 8; 
i++) {
 
 1389         d1 = (
a - d + 3 + 
rnd) >> 3;
 
 1390         d2 = (
a - d + 
b - 
c + 4 - 
rnd) >> 3;
 
 1406     int rnd1 = 
flags & 2 ? 3 : 4;
 
 1407     int rnd2 = 7 - rnd1;
 
 1408     for (
i = 0; 
i < 8; 
i++) {
 
 1416         left[6]  = ((
a << 3) - d1 + rnd1) >> 3;
 
 1417         left[7]  = ((
b << 3) - d2 + rnd2) >> 3;
 
 1418         right[0] = ((c << 3) + d2 + rnd1) >> 3;
 
 1419         right[1] = ((d << 3) + d1 + rnd2) >> 3;
 
 1421         right += right_stride;
 
 1422         left  += left_stride;
 
 1437     for (
i = 0; 
i < 8; 
i++) {
 
 1442         d1 = (
a - d + 3 + 
rnd) >> 3;
 
 1443         d2 = (
a - d + 
b - 
c + 4 - 
rnd) >> 3;
 
 1459     int rnd1 = 4, rnd2 = 3;
 
 1460     for (
i = 0; 
i < 8; 
i++) {
 
 1468         top[48]   = ((
a << 3) - d1 + rnd1) >> 3;
 
 1469         top[56]   = ((
b << 3) - d2 + rnd2) >> 3;
 
 1470         bottom[0] = ((c << 3) + d2 + rnd1) >> 3;
 
 1471         bottom[8] = ((d << 3) + d1 + rnd2) >> 3;
 
 1492     int a0_sign = 
a0 >> 31;        
 
 1494     a0 = (
a0 ^ a0_sign) - a0_sign; 
 
 1502             int clip_sign = 
clip >> 31;
 
 1504             clip = ((
clip ^ clip_sign) - clip_sign) >> 1;
 
 1507                 int d      = 5 * (
a3 - 
a0);
 
 1508                 int d_sign = (d >> 31);
 
 1510                 d       = ((d ^ d_sign) - d_sign) >> 3;
 
 1513                 if (d_sign ^ clip_sign)
 
 1517                     d = (d ^ d_sign) - d_sign; 
 
 1543     for (
i = 0; 
i < 
len; 
i += 4) {
 
 1605 #define OP_PUT(S, D) 
 1606 #define OP_AVG(S, D)                                                        \ 
 1607     "ldc1       $f16,   "#S"                        \n\t"                   \ 
 1608     "pavgb      "#D",   "#D",   $f16                \n\t" 
 1611 #define NORMALIZE_MMI(SHIFT)                                                \ 
 1612     "paddh      $f6,    $f6,    $f14                \n\t"      \ 
 1613     "paddh      $f8,    $f8,    $f14                \n\t"      \ 
 1614     "psrah      $f6,    $f6,    "SHIFT"             \n\t"                   \ 
 1615     "psrah      $f8,    $f8,    "SHIFT"             \n\t" 
 1617 #define TRANSFER_DO_PACK(OP)                                                \ 
 1618     "packushb   $f6,    $f6,    $f8                 \n\t"                   \ 
 1620     "sdc1       $f6,    0x00(%[dst])                \n\t" 
 1622 #define TRANSFER_DONT_PACK(OP)                                              \ 
 1623      OP(0(%[dst]), $f6)                                                     \ 
 1624      OP(8(%[dst]), $f8)                                                     \ 
 1625      "sdc1      $f6,    0x00(%[dst])                \n\t"                   \ 
 1626      "sdc1      $f8,    0x08(%[dst])                \n\t" 
 1629 #define DO_UNPACK(reg)                                                      \ 
 1630     "punpcklbh  "reg",  "reg",  $f0                 \n\t" 
 1631 #define DONT_UNPACK(reg) 
 1634 #define LOAD_ROUNDER_MMI(ROUND)                                             \ 
 1635     "lwc1       $f14,   "ROUND"                     \n\t"                   \ 
 1636     "punpcklhw  $f14,   $f14,   $f14                \n\t"                   \ 
 1637     "punpcklwd  $f14,   $f14,   $f14                \n\t" 
 1640 #define SHIFT2_LINE(OFF, R0, R1, R2, R3)                                    \ 
 1641     "paddh      "#R1",      "#R1",  "#R2"           \n\t"                   \ 
 1642     PTR_ADDU    "$9,        %[src], %[stride1]      \n\t"                   \ 
 1643     MMI_ULWC1(R0, $9, 0x00)                                                 \ 
 1644     "pmullh     "#R1",      "#R1",  $f6             \n\t"                   \ 
 1645     "punpcklbh  "#R0",      "#R0",  $f0             \n\t"                   \ 
 1646     PTR_ADDU    "$9,        %[src], %[stride]       \n\t"                   \ 
 1647     MMI_ULWC1(R3, $9, 0x00)                                                 \ 
 1648     "psubh      "#R1",      "#R1",  "#R0"           \n\t"                   \ 
 1649     "punpcklbh  "#R3",      "#R3",  $f0             \n\t"                   \ 
 1650     "paddh      "#R1",      "#R1",  $f14            \n\t"                   \ 
 1651     "psubh      "#R1",      "#R1",  "#R3"           \n\t"                   \ 
 1652     "psrah      "#R1",      "#R1",  %[shift]        \n\t"                   \ 
 1653     MMI_SDC1(R1, %[dst], OFF)                                               \ 
 1654     PTR_ADDU    "%[src],    %[src], %[stride]       \n\t" 
 1667         "pxor       $f0,    $f0,    $f0             \n\t" 
 1671         MMI_ULWC1($f4, %[
src], 0x00)
 
 1672         PTR_ADDU   "%[src], %[src], %[stride]       \n\t" 
 1673         MMI_ULWC1($f6, %[
src], 0x00)
 
 1674         "punpcklbh  $f4,    $f4,    $f0             \n\t" 
 1675         "punpcklbh  $f6,    $f6,    $f0             \n\t" 
 1684         PTR_SUBU   "%[src], %[src], %[stride2]      \n\t" 
 1686         "addiu      $8,     $8,    -0x01            \n\t" 
 1693         : 
"$8", 
"$9", 
"$f0", 
"$f2", 
"$f4", 
"$f6", 
"$f8", 
"$f10",
 
 1694           "$f14", 
"$f16", 
"memory" 
 1702 #define VC1_HOR_16B_SHIFT2(OP, OPNAME)                                      \ 
 1703 static void OPNAME ## vc1_hor_16b_shift2_mmi(uint8_t *dst, mips_reg stride, \ 
 1704                                              const int16_t *src, int rnd)   \ 
 1707     DECLARE_VAR_ALL64;                                                      \ 
 1708     DECLARE_VAR_ADDRT;                                                      \ 
 1711     rnd -= (-1+9+9-1)*1024;                             \ 
 1714         LOAD_ROUNDER_MMI("%[rnd]")                                          \ 
 1716         MMI_ULDC1($f2, %[src], 0x00)                                        \ 
 1717         MMI_ULDC1($f4, %[src], 0x08)                                        \ 
 1718         MMI_ULDC1($f6, %[src], 0x02)                                        \ 
 1719         MMI_ULDC1($f8, %[src], 0x0a)                                        \ 
 1720         MMI_ULDC1($f0, %[src], 0x06)                                        \ 
 1721         "paddh      $f2,    $f2,    $f0             \n\t"                   \ 
 1722         MMI_ULDC1($f0, %[src], 0x0e)                                        \ 
 1723         "paddh      $f4,    $f4,    $f0             \n\t"                   \ 
 1724         MMI_ULDC1($f0, %[src], 0x04)                                        \ 
 1725         "paddh      $f6,    $f6,    $f0             \n\t"                   \ 
 1726         MMI_ULDC1($f0, %[src], 0x0b)                                        \ 
 1727         "paddh      $f8,    $f8,    $f0             \n\t"                   \ 
 1728         "pmullh     $f6,    $f6,    %[ff_pw_9]      \n\t"                   \ 
 1729         "pmullh     $f8,    $f8,    %[ff_pw_9]      \n\t"                   \ 
 1730         "psubh      $f6,    $f6,    $f2             \n\t"                   \ 
 1731         "psubh      $f8,    $f8,    $f4             \n\t"                   \ 
 1732         "li         $8,     0x07                    \n\t"                   \ 
 1733         "mtc1       $8,     $f16                    \n\t"                   \ 
 1734         NORMALIZE_MMI("$f16")                                               \ 
 1736         "paddh      $f6,    $f6,    %[ff_pw_128]    \n\t"                   \ 
 1737         "paddh      $f8,    $f8,    %[ff_pw_128]    \n\t"                   \ 
 1738         TRANSFER_DO_PACK(OP)                                                \ 
 1739         "addiu      %[h],   %[h],  -0x01            \n\t"                   \ 
 1740         PTR_ADDIU  "%[src], %[src], 0x18            \n\t"                   \ 
 1741         PTR_ADDU   "%[dst], %[dst], %[stride]       \n\t"                   \ 
 1742         "bnez       %[h],   1b                      \n\t"                   \ 
 1743         : RESTRICT_ASM_ALL64            RESTRICT_ASM_ADDRT                  \ 
 1745           [src]"+r"(src),               [dst]"+r"(dst)                      \ 
 1746         : [stride]"r"(stride),          [rnd]"m"(rnd),                      \ 
 1747           [ff_pw_9]"f"(ff_pw_9.f),      [ff_pw_128]"f"(ff_pw_128.f)         \ 
 1748         : "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f14",                  \ 
 1760 #define VC1_SHIFT2(OP, OPNAME)\ 
 1761 static void OPNAME ## vc1_shift2_mmi(uint8_t *dst, const uint8_t *src,      \ 
 1762                                      mips_reg stride, int rnd,              \ 
 1765     DECLARE_VAR_LOW32;                                                      \ 
 1766     DECLARE_VAR_ADDRT;                                                      \ 
 1771         "pxor       $f0,    $f0,    $f0             \n\t"                   \ 
 1772         "li         $10,    0x08                    \n\t"                   \ 
 1773         LOAD_ROUNDER_MMI("%[rnd]")                                          \ 
 1775         MMI_ULWC1($f6, %[src], 0x00)                                        \ 
 1776         MMI_ULWC1($f8, %[src], 0x04)                                        \ 
 1777         PTR_ADDU   "$9,     %[src], %[offset]       \n\t"                   \ 
 1778         MMI_ULWC1($f2, $9, 0x00)                                            \ 
 1779         MMI_ULWC1($f4, $9, 0x04)                                            \ 
 1780         PTR_ADDU   "%[src], %[src], %[offset]       \n\t"                   \ 
 1781         "punpcklbh  $f6,    $f6,    $f0             \n\t"                   \ 
 1782         "punpcklbh  $f8,    $f8,    $f0             \n\t"                   \ 
 1783         "punpcklbh  $f2,    $f2,    $f0             \n\t"                   \ 
 1784         "punpcklbh  $f4,    $f4,    $f0             \n\t"                   \ 
 1785         "paddh      $f6,    $f6,    $f2             \n\t"                   \ 
 1786         "paddh      $f8,    $f8,    $f4             \n\t"                   \ 
 1787         PTR_ADDU   "$9,     %[src], %[offset_x2n]   \n\t"                   \ 
 1788         MMI_ULWC1($f2, $9, 0x00)                                            \ 
 1789         MMI_ULWC1($f4, $9, 0x04)                                            \ 
 1790         "pmullh     $f6,    $f6,    %[ff_pw_9]      \n\t"       \ 
 1791         "pmullh     $f8,    $f8,    %[ff_pw_9]      \n\t"       \ 
 1792         "punpcklbh  $f2,    $f2,    $f0             \n\t"                   \ 
 1793         "punpcklbh  $f4,    $f4,    $f0             \n\t"                   \ 
 1794         "psubh      $f6,    $f6,    $f2             \n\t"       \ 
 1795         "psubh      $f8,    $f8,    $f4             \n\t"       \ 
 1796         PTR_ADDU   "$9,     %[src], %[offset]       \n\t"                   \ 
 1797         MMI_ULWC1($f2, $9, 0x00)                                            \ 
 1798         MMI_ULWC1($f4, $9, 0x04)                                            \ 
 1799         "punpcklbh  $f2,    $f2,    $f0             \n\t"                   \ 
 1800         "punpcklbh  $f4,    $f4,    $f0             \n\t"                   \ 
 1801         "psubh      $f6,    $f6,    $f2             \n\t"      \ 
 1802         "psubh      $f8,    $f8,    $f4             \n\t"      \ 
 1803         "li         $8,     0x04                    \n\t"                   \ 
 1804         "mtc1       $8,     $f16                    \n\t"                   \ 
 1805         NORMALIZE_MMI("$f16")                                               \ 
 1806         "packushb   $f6,    $f6,    $f8             \n\t"                   \ 
 1808         "sdc1       $f6,    0x00(%[dst])            \n\t"                   \ 
 1809         "addiu      $10,    $10,   -0x01            \n\t"                   \ 
 1810         PTR_ADDU   "%[src], %[src], %[stride1]      \n\t"                   \ 
 1811         PTR_ADDU   "%[dst], %[dst], %[stride]       \n\t"                   \ 
 1812         "bnez       $10,    1b                      \n\t"                   \ 
 1813         : RESTRICT_ASM_LOW32            RESTRICT_ASM_ADDRT                  \ 
 1814           [src]"+r"(src),               [dst]"+r"(dst)                      \ 
 1815         : [offset]"r"(offset),          [offset_x2n]"r"(-2*offset),         \ 
 1816           [stride]"r"(stride),          [rnd]"m"(rnd),                      \ 
 1817           [stride1]"r"(stride-offset),                                      \ 
 1818           [ff_pw_9]"f"(ff_pw_9.f)                                           \ 
 1819         : "$8", "$9", "$10", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10",     \ 
 1820           "$f14", "$f16", "memory"                                          \ 
 1838 #define MSPEL_FILTER13_CORE(UNPACK, LOAD, M, A1, A2, A3, A4)                \ 
 1839     PTR_ADDU   "$9,     %[src], "#A1"           \n\t"                       \ 
 1840     LOAD($f2, $9, M*0)                                                      \ 
 1841     LOAD($f4, $9, M*4)                                                      \ 
 1844     "pmullh     $f2,    $f2,    %[ff_pw_3]      \n\t"                       \ 
 1845     "pmullh     $f4,    $f4,    %[ff_pw_3]      \n\t"                       \ 
 1846     PTR_ADDU   "$9,     %[src], "#A2"           \n\t"                       \ 
 1847     LOAD($f6, $9, M*0)                                                      \ 
 1848     LOAD($f8, $9, M*4)                                                      \ 
 1851     "pmullh     $f6,    $f6,    %[ff_pw_18]     \n\t"              \ 
 1852     "pmullh     $f8,    $f8,    %[ff_pw_18]     \n\t"              \ 
 1853     "psubh      $f6,    $f6,    $f2             \n\t"          \ 
 1854     "psubh      $f8,    $f8,    $f4             \n\t"          \ 
 1855     PTR_ADDU   "$9,     %[src], "#A4"           \n\t"                       \ 
 1856     LOAD($f2, $9, M*0)                                                      \ 
 1857     LOAD($f4, $9, M*4)                                                      \ 
 1860     "li         $8,     0x02                    \n\t"                       \ 
 1861     "mtc1       $8,     $f16                    \n\t"                       \ 
 1862     "psllh      $f2,    $f2,    $f16            \n\t"               \ 
 1863     "psllh      $f4,    $f4,    $f16            \n\t"               \ 
 1864     "psubh      $f6,    $f6,    $f2             \n\t"         \ 
 1865     "psubh      $f8,    $f8,    $f4             \n\t"         \ 
 1866     PTR_ADDU   "$9,     %[src], "#A3"           \n\t"                       \ 
 1867     LOAD($f2, $9, M*0)                                                      \ 
 1868     LOAD($f4, $9, M*4)                                                      \ 
 1871     "pmullh     $f2,    $f2,    %[ff_pw_53]     \n\t"              \ 
 1872     "pmullh     $f4,    $f4,    %[ff_pw_53]     \n\t"              \ 
 1873     "paddh      $f6,    $f6,    $f2             \n\t"       \ 
 1874     "paddh      $f8,    $f8,    $f4             \n\t"  
 1884 #define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4)                        \ 
 1886 vc1_put_ver_16b_ ## NAME ## _mmi(int16_t *dst, const uint8_t *src,          \ 
 1887                                  mips_reg src_stride,                       \ 
 1888                                  int rnd, int64_t shift)                    \ 
 1891     union mmi_intfloat64 shift_u;                                           \ 
 1892     DECLARE_VAR_LOW32;                                                      \ 
 1893     DECLARE_VAR_ADDRT;                                                      \ 
 1894     shift_u.i = shift;                                                      \ 
 1896     src -= src_stride;                                                      \ 
 1899         "pxor       $f0,    $f0,    $f0             \n\t"                   \ 
 1900         LOAD_ROUNDER_MMI("%[rnd]")                                          \ 
 1903         MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4)        \ 
 1904         NORMALIZE_MMI("%[shift]")                                           \ 
 1905         TRANSFER_DONT_PACK(OP_PUT)                                          \ 
 1907         PTR_ADDU   "$9,     %[src], "#A1"           \n\t"                   \ 
 1908         MMI_ULWC1($f2, $9, 0x08)                                            \ 
 1910         "mov.d      $f6,    $f2                     \n\t"                   \ 
 1911         "paddh      $f2,    $f2,    $f2             \n\t"                   \ 
 1912         "paddh      $f2,    $f2,    $f6             \n\t"           \ 
 1913         PTR_ADDU   "$9,     %[src], "#A2"           \n\t"                   \ 
 1914         MMI_ULWC1($f6, $9, 0x08)                                            \ 
 1916         "pmullh     $f6,    $f6,    %[ff_pw_18]     \n\t"          \ 
 1917         "psubh      $f6,    $f6,    $f2             \n\t"       \ 
 1918         PTR_ADDU   "$9,     %[src], "#A3"           \n\t"                   \ 
 1919         MMI_ULWC1($f2, $9, 0x08)                                            \ 
 1921         "pmullh     $f2,    $f2,    %[ff_pw_53]     \n\t"          \ 
 1922         "paddh      $f6,    $f6,    $f2             \n\t"    \ 
 1923         PTR_ADDU   "$9,     %[src], "#A4"           \n\t"                   \ 
 1924         MMI_ULWC1($f2, $9, 0x08)                                            \ 
 1926         "li         $8,     0x02                    \n\t"                   \ 
 1927         "mtc1       $8,     $f16                    \n\t"                   \ 
 1928         "psllh      $f2,    $f2,    $f16            \n\t"           \ 
 1929         "psubh      $f6,    $f6,    $f2             \n\t"                   \ 
 1930         "paddh      $f6,    $f6,    $f14            \n\t"                   \ 
 1931         "li         $8,     0x06                    \n\t"                   \ 
 1932         "mtc1       $8,     $f16                    \n\t"                   \ 
 1933         "psrah      $f6,    $f6,    $f16            \n\t"                   \ 
 1934         "sdc1       $f6,    0x10(%[dst])            \n\t"                   \ 
 1935         "addiu      %[h],   %[h],  -0x01            \n\t"                   \ 
 1936         PTR_ADDU   "%[src], %[src], %[stride_x1]    \n\t"                   \ 
 1937         PTR_ADDIU  "%[dst], %[dst], 0x18            \n\t"                   \ 
 1938         "bnez       %[h],   1b                      \n\t"                   \ 
 1939         : RESTRICT_ASM_LOW32            RESTRICT_ASM_ADDRT                  \ 
 1941           [src]"+r"(src),               [dst]"+r"(dst)                      \ 
 1942         : [stride_x1]"r"(src_stride),   [stride_x2]"r"(2*src_stride),       \ 
 1943           [stride_x3]"r"(3*src_stride),                                     \ 
 1944           [rnd]"m"(rnd),                [shift]"f"(shift_u.f),              \ 
 1945           [ff_pw_53]"f"(ff_pw_53.f),    [ff_pw_18]"f"(ff_pw_18.f),          \ 
 1946           [ff_pw_3]"f"(ff_pw_3.f)                                           \ 
 1947         : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8",                    \ 
 1948           "$f14", "$f16", "memory"                                          \ 
 1959 #define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4, OP, OPNAME)            \ 
 1961 OPNAME ## vc1_hor_16b_ ## NAME ## _mmi(uint8_t *dst, mips_reg stride,       \ 
 1962                                        const int16_t *src, int rnd)         \ 
 1965     DECLARE_VAR_ALL64;                                                      \ 
 1966     DECLARE_VAR_ADDRT;                                                      \ 
 1969     rnd -= (-4+58+13-3)*256;                             \ 
 1972         "pxor       $f0,    $f0,    $f0             \n\t"                   \ 
 1973         LOAD_ROUNDER_MMI("%[rnd]")                                          \ 
 1976         MSPEL_FILTER13_CORE(DONT_UNPACK, MMI_ULDC1, 2, A1, A2, A3, A4)      \ 
 1977         "li         $8,     0x07                    \n\t"                   \ 
 1978         "mtc1       $8,     $f16                    \n\t"                   \ 
 1979         NORMALIZE_MMI("$f16")                                               \ 
 1981         "paddh      $f6,    $f6,    %[ff_pw_128]    \n\t"                   \ 
 1982         "paddh      $f8,    $f8,    %[ff_pw_128]    \n\t"                   \ 
 1983         TRANSFER_DO_PACK(OP)                                                \ 
 1984         "addiu      %[h],   %[h],  -0x01            \n\t"                   \ 
 1985         PTR_ADDU   "%[src], %[src], 0x18            \n\t"                   \ 
 1986         PTR_ADDU   "%[dst], %[dst], %[stride]       \n\t"                   \ 
 1987         "bnez       %[h],   1b                      \n\t"                   \ 
 1988         : RESTRICT_ASM_ALL64            RESTRICT_ASM_ADDRT                  \ 
 1990           [src]"+r"(src),               [dst]"+r"(dst)                      \ 
 1991         : [stride]"r"(stride),          [rnd]"m"(rnd),                      \ 
 1992           [ff_pw_53]"f"(ff_pw_53.f),    [ff_pw_18]"f"(ff_pw_18.f),          \ 
 1993           [ff_pw_3]"f"(ff_pw_3.f),      [ff_pw_128]"f"(ff_pw_128.f)         \ 
 1994         : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8",                    \ 
 1995           "$f14", "$f16", "memory"                                          \ 
 2007 #define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4, OP, OPNAME)                 \ 
 2009 OPNAME ## vc1_## NAME ## _mmi(uint8_t *dst, const uint8_t *src,             \ 
 2010                               mips_reg stride, int rnd, mips_reg offset)    \ 
 2013     DECLARE_VAR_LOW32;                                                      \ 
 2014     DECLARE_VAR_ADDRT;                                                      \ 
 2019     __asm__ volatile (                                                      \ 
 2020         "pxor       $f0,    $f0,    $f0             \n\t"                   \ 
 2021         LOAD_ROUNDER_MMI("%[rnd]")                                          \ 
 2024         MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4)        \ 
 2025         "li         $8,     0x06                    \n\t"                   \ 
 2026         "mtc1       $8,     $f16                    \n\t"                   \ 
 2027         NORMALIZE_MMI("$f16")                                               \ 
 2028         TRANSFER_DO_PACK(OP)                                                \ 
 2029         "addiu      %[h],   %[h],      -0x01        \n\t"                   \ 
 2030         PTR_ADDU   "%[src], %[src],     %[stride]   \n\t"                   \ 
 2031         PTR_ADDU   "%[dst], %[dst],     %[stride]   \n\t"                   \ 
 2032         "bnez       %[h],   1b                      \n\t"                   \ 
 2033         : RESTRICT_ASM_LOW32            RESTRICT_ASM_ADDRT                  \ 
 2035           [src]"+r"(src),               [dst]"+r"(dst)                      \ 
 2036         : [offset_x1]"r"(offset),       [offset_x2]"r"(2*offset),           \ 
 2037           [offset_x3]"r"(3*offset),     [stride]"r"(stride),                \ 
 2039           [ff_pw_53]"f"(ff_pw_53.f),    [ff_pw_18]"f"(ff_pw_18.f),          \ 
 2040           [ff_pw_3]"f"(ff_pw_3.f)                                           \ 
 2041         : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8",                    \ 
 2042           "$f14", "$f16", "memory"                                          \ 
 2081 #define VC1_MSPEL_MC(OP)                                                    \ 
 2082 static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\ 
 2083                                int hmode, int vmode, int rnd)               \ 
 2085     static const vc1_mspel_mc_filter_ver_16bits vc1_put_shift_ver_16bits[] =\ 
 2086          { NULL, vc1_put_ver_16b_shift1_mmi,                                \ 
 2087                  vc1_put_ver_16b_shift2_mmi,                                \ 
 2088                  vc1_put_ver_16b_shift3_mmi };                              \ 
 2089     static const vc1_mspel_mc_filter_hor_16bits vc1_put_shift_hor_16bits[] =\ 
 2090          { NULL, OP ## vc1_hor_16b_shift1_mmi,                              \ 
 2091                  OP ## vc1_hor_16b_shift2_mmi,                              \ 
 2092                  OP ## vc1_hor_16b_shift3_mmi };                            \ 
 2093     static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] =          \ 
 2094          { NULL, OP ## vc1_shift1_mmi,                                      \ 
 2095                  OP ## vc1_shift2_mmi,                                      \ 
 2096                  OP ## vc1_shift3_mmi };                                    \ 
 2100             static const int shift_value[] = { 0, 5, 1, 5 };                \ 
 2101             int    shift = (shift_value[hmode]+shift_value[vmode])>>1;      \ 
 2103             LOCAL_ALIGNED(16, int16_t, tmp, [12*8]);                        \ 
 2105             r = (1<<(shift-1)) + rnd-1;                                     \ 
 2106             vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift);  \ 
 2108             vc1_put_shift_hor_16bits[hmode](dst, stride, tmp+1, 64-rnd);    \ 
 2112             vc1_put_shift_8bits[vmode](dst, src, stride, 1-rnd, stride);    \ 
 2118     vc1_put_shift_8bits[hmode](dst, src, stride, rnd, 1);                   \ 
 2120 static void OP ## vc1_mspel_mc_16(uint8_t *dst, const uint8_t *src,         \ 
 2121                                   int stride, int hmode, int vmode, int rnd)\ 
 2123     OP ## vc1_mspel_mc(dst + 0, src + 0, stride, hmode, vmode, rnd);        \ 
 2124     OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd);        \ 
 2125     dst += 8*stride; src += 8*stride;                                       \ 
 2126     OP ## vc1_mspel_mc(dst + 0, src + 0, stride, hmode, vmode, rnd);        \ 
 2127     OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd);        \ 
 2134 #define DECLARE_FUNCTION(a, b)                                              \ 
 2135 void ff_put_vc1_mspel_mc ## a ## b ## _mmi(uint8_t *dst,                    \ 
 2136                                            const uint8_t *src,              \ 
 2140      put_vc1_mspel_mc(dst, src, stride, a, b, rnd);                         \ 
 2142 void ff_avg_vc1_mspel_mc ## a ## b ## _mmi(uint8_t *dst,                    \ 
 2143                                            const uint8_t *src,              \ 
 2147      avg_vc1_mspel_mc(dst, src, stride, a, b, rnd);                         \ 
 2149 void ff_put_vc1_mspel_mc ## a ## b ## _16_mmi(uint8_t *dst,                 \ 
 2150                                               const uint8_t *src,           \ 
 2154      put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd);                      \ 
 2156 void ff_avg_vc1_mspel_mc ## a ## b ## _16_mmi(uint8_t *dst,                 \ 
 2157                                               const uint8_t *src,           \ 
 2161      avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd);                      \ 
 2183 #define CHROMA_MC_8_MMI                                                     \ 
 2184         "punpckhbh  %[ftmp5],   %[ftmp1],   %[ftmp0]                \n\t"   \ 
 2185         "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]                \n\t"   \ 
 2186         "punpckhbh  %[ftmp6],   %[ftmp2],   %[ftmp0]                \n\t"   \ 
 2187         "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]                \n\t"   \ 
 2188         "punpckhbh  %[ftmp7],   %[ftmp3],   %[ftmp0]                \n\t"   \ 
 2189         "punpcklbh  %[ftmp3],   %[ftmp3],   %[ftmp0]                \n\t"   \ 
 2190         "punpckhbh  %[ftmp8],   %[ftmp4],   %[ftmp0]                \n\t"   \ 
 2191         "punpcklbh  %[ftmp4],   %[ftmp4],   %[ftmp0]                \n\t"   \ 
 2193         "pmullh     %[ftmp1],   %[ftmp1],   %[A]                    \n\t"   \ 
 2194         "pmullh     %[ftmp5],   %[ftmp5],   %[A]                    \n\t"   \ 
 2195         "pmullh     %[ftmp2],   %[ftmp2],   %[B]                    \n\t"   \ 
 2196         "pmullh     %[ftmp6],   %[ftmp6],   %[B]                    \n\t"   \ 
 2197         "pmullh     %[ftmp3],   %[ftmp3],   %[C]                    \n\t"   \ 
 2198         "pmullh     %[ftmp7],   %[ftmp7],   %[C]                    \n\t"   \ 
 2199         "pmullh     %[ftmp4],   %[ftmp4],   %[D]                    \n\t"   \ 
 2200         "pmullh     %[ftmp8],   %[ftmp8],   %[D]                    \n\t"   \ 
 2202         "paddh      %[ftmp1],   %[ftmp1],   %[ftmp2]                \n\t"   \ 
 2203         "paddh      %[ftmp3],   %[ftmp3],   %[ftmp4]                \n\t"   \ 
 2204         "paddh      %[ftmp1],   %[ftmp1],   %[ftmp3]                \n\t"   \ 
 2205         "paddh      %[ftmp1],   %[ftmp1],   %[ff_pw_28]             \n\t"   \ 
 2207         "paddh      %[ftmp5],   %[ftmp5],   %[ftmp6]                \n\t"   \ 
 2208         "paddh      %[ftmp7],   %[ftmp7],   %[ftmp8]                \n\t"   \ 
 2209         "paddh      %[ftmp5],   %[ftmp5],   %[ftmp7]                \n\t"   \ 
 2210         "paddh      %[ftmp5],   %[ftmp5],   %[ff_pw_28]             \n\t"   \ 
 2212         "psrlh      %[ftmp1],   %[ftmp1],   %[ftmp9]                \n\t"   \ 
 2213         "psrlh      %[ftmp5],   %[ftmp5],   %[ftmp9]                \n\t"   \ 
 2214         "packushb   %[ftmp1],   %[ftmp1],   %[ftmp5]                \n\t" 
 2217 #define CHROMA_MC_4_MMI                                                     \ 
 2218         "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]                \n\t"   \ 
 2219         "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]                \n\t"   \ 
 2220         "punpcklbh  %[ftmp3],   %[ftmp3],   %[ftmp0]                \n\t"   \ 
 2221         "punpcklbh  %[ftmp4],   %[ftmp4],   %[ftmp0]                \n\t"   \ 
 2223         "pmullh     %[ftmp1],   %[ftmp1],   %[A]                    \n\t"   \ 
 2224         "pmullh     %[ftmp2],   %[ftmp2],   %[B]                    \n\t"   \ 
 2225         "pmullh     %[ftmp3],   %[ftmp3],   %[C]                    \n\t"   \ 
 2226         "pmullh     %[ftmp4],   %[ftmp4],   %[D]                    \n\t"   \ 
 2228         "paddh      %[ftmp1],   %[ftmp1],   %[ftmp2]                \n\t"   \ 
 2229         "paddh      %[ftmp3],   %[ftmp3],   %[ftmp4]                \n\t"   \ 
 2230         "paddh      %[ftmp1],   %[ftmp1],   %[ftmp3]                \n\t"   \ 
 2231         "paddh      %[ftmp1],   %[ftmp1],   %[ff_pw_28]             \n\t"   \ 
 2233         "psrlh      %[ftmp1],   %[ftmp1],   %[ftmp5]                \n\t"   \ 
 2234         "packushb   %[ftmp1],   %[ftmp1],   %[ftmp0]                \n\t" 
 2238                                       const uint8_t *
src ,
 
 2239                                       ptrdiff_t 
stride, 
int h, 
int x, 
int y)
 
 2246     A.i = (8 - x) * (8 - y);
 
 2247     B.i =     (x) * (8 - y);
 
 2248     C.i = (8 - x) *     (y);
 
 2251     av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
 
 2254         "li         %[tmp0],    0x06                                    \n\t" 
 2255         "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t" 
 2256         "mtc1       %[tmp0],    %[ftmp9]                                \n\t" 
 2257         "pshufh     %[A],       %[A],       %[ftmp0]                    \n\t" 
 2258         "pshufh     %[B],       %[B],       %[ftmp0]                    \n\t" 
 2259         "pshufh     %[C],       %[C],       %[ftmp0]                    \n\t" 
 2260         "pshufh     %[D],       %[D],       %[ftmp0]                    \n\t" 
 2263         MMI_ULDC1(%[ftmp1], %[
src], 0x00)
 
 2264         MMI_ULDC1(%[ftmp2], %[
src], 0x01)
 
 2265         PTR_ADDU   "%[src],     %[src],     %[stride]                   \n\t" 
 2266         MMI_ULDC1(%[ftmp3], %[
src], 0x00)
 
 2267         MMI_ULDC1(%[ftmp4], %[
src], 0x01)
 
 2271         MMI_SDC1(%[ftmp1], %[
dst], 0x00)
 
 2272         "addiu      %[h],       %[h],      -0x01                        \n\t" 
 2273         PTR_ADDU   "%[dst],     %[dst],     %[stride]                   \n\t" 
 2274         "bnez       %[h],       1b                                      \n\t" 
 2275         : [ftmp0]
"=&f"(ftmp[0]),        [ftmp1]
"=&f"(ftmp[1]),
 
 2276           [ftmp2]
"=&f"(ftmp[2]),        [ftmp3]
"=&f"(ftmp[3]),
 
 2277           [ftmp4]
"=&f"(ftmp[4]),        [ftmp5]
"=&f"(ftmp[5]),
 
 2278           [ftmp6]
"=&f"(ftmp[6]),        [ftmp7]
"=&f"(ftmp[7]),
 
 2279           [ftmp8]
"=&f"(ftmp[8]),        [ftmp9]
"=&f"(ftmp[9]),
 
 2282           [tmp0]
"=&r"(
tmp[0]),
 
 2286           [
A]
"f"(
A.f),                  [
B]
"f"(
B.f),
 
 2287           [
C]
"f"(
C.f),                  [
D]
"f"(
D.f),
 
 2294                                       const uint8_t *
src ,
 
 2295                                       ptrdiff_t 
stride, 
int h, 
int x, 
int y)
 
 2302     A.i = (8 - x) * (8 - y);
 
 2303     B.i =     (x) * (8 - y);
 
 2304     C.i = (8 - x) *     (y);
 
 2307     av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
 
 2310         "li         %[tmp0],    0x06                                    \n\t" 
 2311         "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t" 
 2312         "mtc1       %[tmp0],    %[ftmp5]                                \n\t" 
 2313         "pshufh     %[A],       %[A],       %[ftmp0]                    \n\t" 
 2314         "pshufh     %[B],       %[B],       %[ftmp0]                    \n\t" 
 2315         "pshufh     %[C],       %[C],       %[ftmp0]                    \n\t" 
 2316         "pshufh     %[D],       %[D],       %[ftmp0]                    \n\t" 
 2319         MMI_ULWC1(%[ftmp1], %[
src], 0x00)
 
 2320         MMI_ULWC1(%[ftmp2], %[
src], 0x01)
 
 2321         PTR_ADDU   "%[src],     %[src],     %[stride]                   \n\t" 
 2322         MMI_ULWC1(%[ftmp3], %[
src], 0x00)
 
 2323         MMI_ULWC1(%[ftmp4], %[
src], 0x01)
 
 2327         MMI_SWC1(%[ftmp1], %[
dst], 0x00)
 
 2328         "addiu      %[h],       %[h],      -0x01                        \n\t" 
 2329         PTR_ADDU   "%[dst],     %[dst],     %[stride]                   \n\t" 
 2330         "bnez       %[h],       1b                                      \n\t" 
 2331         : [ftmp0]
"=&f"(ftmp[0]),        [ftmp1]
"=&f"(ftmp[1]),
 
 2332           [ftmp2]
"=&f"(ftmp[2]),        [ftmp3]
"=&f"(ftmp[3]),
 
 2333           [ftmp4]
"=&f"(ftmp[4]),        [ftmp5]
"=&f"(ftmp[5]),
 
 2334           [tmp0]
"=&r"(
tmp[0]),
 
 2340           [
A]
"f"(
A.f),                  [
B]
"f"(
B.f),
 
 2341           [
C]
"f"(
C.f),                  [
D]
"f"(
D.f),
 
 2348                                       const uint8_t *
src ,
 
 2349                                       ptrdiff_t 
stride, 
int h, 
int x, 
int y)
 
 2356     A.i = (8 - x) * (8 - y);
 
 2357     B.i =     (x) * (8 - y);
 
 2358     C.i = (8 - x) *     (y);
 
 2361     av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
 
 2364         "li         %[tmp0],    0x06                                    \n\t" 
 2365         "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t" 
 2366         "mtc1       %[tmp0],    %[ftmp9]                                \n\t" 
 2367         "pshufh     %[A],       %[A],       %[ftmp0]                    \n\t" 
 2368         "pshufh     %[B],       %[B],       %[ftmp0]                    \n\t" 
 2369         "pshufh     %[C],       %[C],       %[ftmp0]                    \n\t" 
 2370         "pshufh     %[D],       %[D],       %[ftmp0]                    \n\t" 
 2373         MMI_ULDC1(%[ftmp1], %[
src], 0x00)
 
 2374         MMI_ULDC1(%[ftmp2], %[
src], 0x01)
 
 2375         PTR_ADDU   "%[src],     %[src],     %[stride]                   \n\t" 
 2376         MMI_ULDC1(%[ftmp3], %[
src], 0x00)
 
 2377         MMI_ULDC1(%[ftmp4], %[
src], 0x01)
 
 2381         MMI_LDC1(%[ftmp2], %[
dst], 0x00)
 
 2382         "pavgb      %[ftmp1],   %[ftmp1],   %[ftmp2]                    \n\t" 
 2384         MMI_SDC1(%[ftmp1], %[
dst], 0x00)
 
 2385         "addiu      %[h],       %[h],      -0x01                        \n\t" 
 2386         PTR_ADDU   "%[dst],     %[dst],     %[stride]                   \n\t" 
 2387         "bnez       %[h],       1b                                      \n\t" 
 2388         : [ftmp0]
"=&f"(ftmp[0]),        [ftmp1]
"=&f"(ftmp[1]),
 
 2389           [ftmp2]
"=&f"(ftmp[2]),        [ftmp3]
"=&f"(ftmp[3]),
 
 2390           [ftmp4]
"=&f"(ftmp[4]),        [ftmp5]
"=&f"(ftmp[5]),
 
 2391           [ftmp6]
"=&f"(ftmp[6]),        [ftmp7]
"=&f"(ftmp[7]),
 
 2392           [ftmp8]
"=&f"(ftmp[8]),        [ftmp9]
"=&f"(ftmp[9]),
 
 2393           [tmp0]
"=&r"(
tmp[0]),
 
 2399           [
A]
"f"(
A.f),                 [
B]
"f"(
B.f),
 
 2400           [
C]
"f"(
C.f),                 [
D]
"f"(
D.f),
 
 2407                                       const uint8_t *
src ,
 
 2408                                       ptrdiff_t 
stride, 
int h, 
int x, 
int y)
 
 2415     A.i = (8 - x) * (8 - y);
 
 2416     B.i = (x) * (8 - y);
 
 2417     C.i = (8 - x) * (y);
 
 2420     av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
 
 2423         "li         %[tmp0],    0x06                                    \n\t" 
 2424         "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]                    \n\t" 
 2425         "mtc1       %[tmp0],    %[ftmp5]                                \n\t" 
 2426         "pshufh     %[A],       %[A],       %[ftmp0]                    \n\t" 
 2427         "pshufh     %[B],       %[B],       %[ftmp0]                    \n\t" 
 2428         "pshufh     %[C],       %[C],       %[ftmp0]                    \n\t" 
 2429         "pshufh     %[D],       %[D],       %[ftmp0]                    \n\t" 
 2432         MMI_ULWC1(%[ftmp1], %[
src], 0x00)
 
 2433         MMI_ULWC1(%[ftmp2], %[
src], 0x01)
 
 2434         PTR_ADDU   "%[src],     %[src],     %[stride]                   \n\t" 
 2435         MMI_ULWC1(%[ftmp3], %[
src], 0x00)
 
 2436         MMI_ULWC1(%[ftmp4], %[
src], 0x01)
 
 2440         MMI_LWC1(%[ftmp2], %[
dst], 0x00)
 
 2441         "pavgb      %[ftmp1],   %[ftmp1],   %[ftmp2]                    \n\t" 
 2443         MMI_SWC1(%[ftmp1], %[
dst], 0x00)
 
 2444         "addiu      %[h],       %[h],      -0x01                        \n\t" 
 2445         PTR_ADDU   "%[dst],     %[dst],     %[stride]                   \n\t" 
 2446         "bnez       %[h],       1b                                      \n\t" 
 2447         : [ftmp0]
"=&f"(ftmp[0]),        [ftmp1]
"=&f"(ftmp[1]),
 
 2448           [ftmp2]
"=&f"(ftmp[2]),        [ftmp3]
"=&f"(ftmp[3]),
 
 2449           [ftmp4]
"=&f"(ftmp[4]),        [ftmp5]
"=&f"(ftmp[5]),
 
 2450           [tmp0]
"=&r"(
tmp[0]),
 
 2456           [
A]
"f"(
A.f),                  [
B]
"f"(
B.f),
 
 2457           [
C]
"f"(
C.f),                  [
D]
"f"(
D.f),
 
  
static av_always_inline int vc1_filter_line(uint8_t *src, int stride, int pq)
VC-1 in-loop deblocking filter for one line.
void ff_vc1_h_loop_filter4_mmi(uint8_t *src, ptrdiff_t stride, int pq)
void ff_vc1_v_s_overlap_mmi(int16_t *top, int16_t *bottom)
const union av_intfloat64 ff_pw_4
#define DECLARE_VAR_LOW32
void ff_vc1_inv_trans_8x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
void ff_avg_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h, int x, int y)
void ff_put_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h, int x, int y)
void ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
const union av_intfloat64 ff_pw_1
void ff_avg_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
#define DECLARE_FUNCTION(a, b)
Macro to ease bicubic filter interpolation functions declarations.
const union av_intfloat64 ff_pw_64
#define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4)
Macro to build the vertical 16bits version of vc1_put_shift[13].
#define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4, OP, OPNAME)
Macro to build the horizontal 16bits version of vc1_put_shift[13].
#define TRANSPOSE_4H(fr_i0, fr_i1, fr_i2, fr_i3, fr_t0, fr_t1, fr_t2, fr_t3)
brief: Transpose 4X4 half word packaged data.
void ff_vc1_v_loop_filter8_mmi(uint8_t *src, ptrdiff_t stride, int pq)
static double a2(void *priv, double x, double y)
void(* vc1_mspel_mc_filter_ver_16bits)(int16_t *dst, const uint8_t *src, mips_reg src_stride, int rnd, int64_t shift)
1/4 shift bicubic interpolation
void ff_vc1_inv_trans_4x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
s EdgeDetect Foobar g libavfilter vf_edgedetect c libavfilter vf_foobar c edit libavfilter and add an entry for foobar following the pattern of the other filters edit libavfilter allfilters and add an entry for foobar following the pattern of the other filters configure make j< whatever > ffmpeg ffmpeg i you should get a foobar png with Lena edge detected That s your new playground is ready Some little details about what s going which in turn will define variables for the build system and the C
void ff_put_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
void ff_vc1_v_loop_filter4_mmi(uint8_t *src, ptrdiff_t stride, int pq)
void ff_vc1_inv_trans_4x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
void(* vc1_mspel_mc_filter_8bits)(uint8_t *dst, const uint8_t *src, mips_reg stride, int rnd, mips_reg offset)
void(* vc1_mspel_mc_filter_hor_16bits)(uint8_t *dst, mips_reg dst_stride, const int16_t *src, int rnd)
void ff_vc1_h_s_overlap_mmi(int16_t *left, int16_t *right, ptrdiff_t left_stride, ptrdiff_t right_stride, int flags)
const union av_intfloat64 ff_pw_32_1
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
void ff_put_vc1_mspel_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int rnd)
static double a3(void *priv, double x, double y)
void ff_vc1_inv_trans_8x8_mmi(int16_t block[64])
void ff_vc1_h_overlap_mmi(uint8_t *src, ptrdiff_t stride)
#define SHIFT2_LINE(OFF, R0, R1, R2, R3)
#define VC1_INV_TRANCS_8_TYPE2(o1, o2, r1, r2, r3, r4, c0, c1)
#define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4, OP, OPNAME)
Macro to build the 8bits, any direction, version of vc1_put_shift[13].
void ff_vc1_inv_trans_4x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
#define VC1_MSPEL_MC(OP)
Interpolate fractional pel values by applying proper vertical then horizontal filter.
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
#define DECLARE_ALIGNED(n, t, v)
static int shift(int a, int b)
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
void ff_vc1_v_overlap_mmi(uint8_t *src, ptrdiff_t stride)
const union av_intfloat64 ff_pw_32_64
const union av_intfloat64 ff_pw_32_4
#define VC1_HOR_16B_SHIFT2(OP, OPNAME)
Data is already unpacked, so some operations can directly be made from memory.
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
static double a0(void *priv, double x, double y)
#define DECLARE_VAR_ALL64
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
void ff_put_vc1_mspel_mc00_16_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int rnd)
const union av_intfloat64 ff_pw_28
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
#define i(width, name, range_min, range_max)
void ff_avg_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h, int x, int y)
void ff_vc1_h_loop_filter8_mmi(uint8_t *src, ptrdiff_t stride, int pq)
void ff_vc1_inv_trans_4x8_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
static void vc1_loop_filter(uint8_t *src, int step, int stride, int len, int pq)
VC-1 in-loop deblocking filter.
void ff_vc1_h_loop_filter16_mmi(uint8_t *src, ptrdiff_t stride, int pq)
void ff_avg_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
__asm__(".macro        parse_r var r\n\t" "\\var        = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt        \\var\n\t" ".error        \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
#define VC1_INV_TRANCS_8_TYPE1(o1, o2, r1, r2, r3, r4, c0)
#define VC1_SHIFT2(OP, OPNAME)
Purely vertical or horizontal 1/2 shift interpolation.
void ff_put_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h, int x, int y)
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left
void ff_vc1_inv_trans_8x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
#define RESTRICT_ASM_LOW32
#define DECLARE_VAR_ADDRT
void ff_avg_vc1_mspel_mc00_16_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int rnd)
#define LOAD_ROUNDER_MMI(ROUND)
Compute the rounder 32-r or 8-r and unpacks it to $f14.
void ff_put_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
void ff_vc1_v_loop_filter16_mmi(uint8_t *src, ptrdiff_t stride, int pq)
static const double coeff[2][5]
The exact code depends on how similar the blocks are and how related they are to the block
static double a1(void *priv, double x, double y)
#define RESTRICT_ASM_ADDRT
void ff_avg_vc1_mspel_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int rnd)
static const uint8_t shift1[6]
#define RESTRICT_ASM_ALL64
static void vc1_put_ver_16b_shift2_mmi(int16_t *dst, const uint8_t *src, mips_reg stride, int rnd, int64_t shift)
Sacrificing $f12 makes it possible to pipeline loads from src.