FFmpeg
h264pred_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264pred
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include "h264pred_mips.h"
28 #include "constants.h"
29 
31 {
32  double ftmp[2];
33  uint64_t tmp[1];
34  DECLARE_VAR_ALL64;
35 
36  __asm__ volatile (
37  "dli %[tmp0], 0x08 \n\t"
38  MMI_LDC1(%[ftmp0], %[srcA], 0x00)
39  MMI_LDC1(%[ftmp1], %[srcA], 0x08)
40 
41  "1: \n\t"
42  MMI_SDC1(%[ftmp0], %[src], 0x00)
43  MMI_SDC1(%[ftmp1], %[src], 0x08)
44  PTR_ADDU "%[src], %[src], %[stride] \n\t"
45  MMI_SDC1(%[ftmp0], %[src], 0x00)
46  MMI_SDC1(%[ftmp1], %[src], 0x08)
47 
48  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
49  PTR_ADDU "%[src], %[src], %[stride] \n\t"
50  "bnez %[tmp0], 1b \n\t"
51  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
52  [tmp0]"=&r"(tmp[0]),
53  RESTRICT_ASM_ALL64
54  [src]"+&r"(src)
55  : [stride]"r"((mips_reg)stride), [srcA]"r"((mips_reg)(src-stride))
56  : "memory"
57  );
58 }
59 
61 {
62  uint64_t tmp[3];
63  mips_reg addr[2];
64 
65  __asm__ volatile (
66  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
67  PTR_ADDU "%[addr1], %[src], $0 \n\t"
68  "dli %[tmp2], 0x08 \n\t"
69  "1: \n\t"
70  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
71  "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t"
72  "swl %[tmp1], 0x07(%[addr1]) \n\t"
73  "swr %[tmp1], 0x00(%[addr1]) \n\t"
74  "swl %[tmp1], 0x0f(%[addr1]) \n\t"
75  "swr %[tmp1], 0x08(%[addr1]) \n\t"
76  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
77  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
78  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
79  "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t"
80  "swl %[tmp1], 0x07(%[addr1]) \n\t"
81  "swr %[tmp1], 0x00(%[addr1]) \n\t"
82  "swl %[tmp1], 0x0f(%[addr1]) \n\t"
83  "swr %[tmp1], 0x08(%[addr1]) \n\t"
84  "daddi %[tmp2], %[tmp2], -0x01 \n\t"
85  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
86  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
87  "bnez %[tmp2], 1b \n\t"
88  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
89  [tmp2]"=&r"(tmp[2]),
90  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
91  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
92  [ff_pb_1]"r"(ff_pb_1)
93  : "memory"
94  );
95 }
96 
98 {
99  uint64_t tmp[4];
100  mips_reg addr[2];
101 
102  __asm__ volatile (
103  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
104  "dli %[tmp0], 0x08 \n\t"
105  "xor %[tmp3], %[tmp3], %[tmp3] \n\t"
106  "1: \n\t"
107  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
108  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
109  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
110  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
111  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
112  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
113  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
114  "bnez %[tmp0], 1b \n\t"
115 
116  "dli %[tmp0], 0x08 \n\t"
117  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
118  "2: \n\t"
119  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
120  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
121  PTR_ADDIU "%[addr0], %[addr0], 0x01 \n\t"
122  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
123  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
124  "daddu %[tmp3], %[tmp3], %[tmp1] \n\t"
125  PTR_ADDIU "%[addr0], %[addr0], 0x01 \n\t"
126  "bnez %[tmp0], 2b \n\t"
127 
128  "daddiu %[tmp3], %[tmp3], 0x10 \n\t"
129  "dsra %[tmp3], 0x05 \n\t"
130  "dmul %[tmp2], %[tmp3], %[ff_pb_1] \n\t"
131  PTR_ADDU "%[addr0], %[src], $0 \n\t"
132  "dli %[tmp0], 0x08 \n\t"
133  "3: \n\t"
134  "swl %[tmp2], 0x07(%[addr0]) \n\t"
135  "swr %[tmp2], 0x00(%[addr0]) \n\t"
136  "swl %[tmp2], 0x0f(%[addr0]) \n\t"
137  "swr %[tmp2], 0x08(%[addr0]) \n\t"
138  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
139  "swl %[tmp2], 0x07(%[addr0]) \n\t"
140  "swr %[tmp2], 0x00(%[addr0]) \n\t"
141  "swl %[tmp2], 0x0f(%[addr0]) \n\t"
142  "swr %[tmp2], 0x08(%[addr0]) \n\t"
143  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
144  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
145  "bnez %[tmp0], 3b \n\t"
146  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
147  [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]),
148  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
149  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
150  [ff_pb_1]"r"(ff_pb_1)
151  : "memory"
152  );
153 }
154 
155 void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
156  int has_topright, ptrdiff_t stride)
157 {
158  uint32_t dc;
159  double ftmp[11];
160  mips_reg tmp[3];
161  DECLARE_VAR_ALL64;
162  DECLARE_VAR_ADDRT;
163 
164  __asm__ volatile (
165  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
166  MMI_ULDC1(%[ftmp10], %[srcA], 0x00)
167  MMI_ULDC1(%[ftmp9], %[src0], 0x00)
168  MMI_ULDC1(%[ftmp8], %[src1], 0x00)
169 
170  "punpcklbh %[ftmp7], %[ftmp10], %[ftmp0] \n\t"
171  "punpckhbh %[ftmp6], %[ftmp10], %[ftmp0] \n\t"
172  "punpcklbh %[ftmp5], %[ftmp9], %[ftmp0] \n\t"
173  "punpckhbh %[ftmp4], %[ftmp9], %[ftmp0] \n\t"
174  "punpcklbh %[ftmp3], %[ftmp8], %[ftmp0] \n\t"
175  "punpckhbh %[ftmp2], %[ftmp8], %[ftmp0] \n\t"
176  "bnez %[has_topleft], 1f \n\t"
177  "pinsrh_0 %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
178 
179  "1: \n\t"
180  "bnez %[has_topright], 2f \n\t"
181  "pinsrh_3 %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
182 
183  "2: \n\t"
184  "dli %[tmp0], 0x02 \n\t"
185  "mtc1 %[tmp0], %[ftmp1] \n\t"
186  "pmullh %[ftmp5], %[ftmp5], %[ff_pw_2] \n\t"
187  "pmullh %[ftmp4], %[ftmp4], %[ff_pw_2] \n\t"
188  "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
189  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
190  "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
191  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
192  "paddh %[ftmp7], %[ftmp7], %[ff_pw_2] \n\t"
193  "paddh %[ftmp6], %[ftmp6], %[ff_pw_2] \n\t"
194  "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
195  "psrah %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
196  "packushb %[ftmp9], %[ftmp7], %[ftmp6] \n\t"
197  "biadd %[ftmp10], %[ftmp9] \n\t"
198  "mfc1 %[tmp1], %[ftmp10] \n\t"
199  "addiu %[tmp1], %[tmp1], 0x04 \n\t"
200  "srl %[tmp1], %[tmp1], 0x03 \n\t"
201  "mul %[dc], %[tmp1], %[ff_pb_1] \n\t"
202  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
203  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
204  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
205  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
206  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
207  [ftmp10]"=&f"(ftmp[10]),
208  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
209  RESTRICT_ASM_ALL64
210  [dc]"=r"(dc)
211  : [srcA]"r"((mips_reg)(src-stride-1)),
212  [src0]"r"((mips_reg)(src-stride)),
213  [src1]"r"((mips_reg)(src-stride+1)),
214  [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright),
215  [ff_pb_1]"r"(ff_pb_1), [ff_pw_2]"f"(ff_pw_2)
216  : "memory"
217  );
218 
219  __asm__ volatile (
220  "dli %[tmp0], 0x02 \n\t"
221  "punpcklwd %[ftmp0], %[dc], %[dc] \n\t"
222 
223  "1: \n\t"
224  MMI_SDC1(%[ftmp0], %[src], 0x00)
225  MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00)
226  PTR_ADDU "%[src], %[src], %[stride] \n\t"
227  PTR_ADDU "%[src], %[src], %[stride] \n\t"
228  MMI_SDC1(%[ftmp0], %[src], 0x00)
229  MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00)
230 
231  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
232  PTR_ADDU "%[src], %[src], %[stride] \n\t"
233  PTR_ADDU "%[src], %[src], %[stride] \n\t"
234  "bnez %[tmp0], 1b \n\t"
235  : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]),
236  RESTRICT_ASM_ALL64
237  RESTRICT_ASM_ADDRT
238  [src]"+&r"(src)
239  : [dc]"f"(dc), [stride]"r"((mips_reg)stride)
240  : "memory"
241  );
242 }
243 
244 void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright,
245  ptrdiff_t stride)
246 {
247  uint32_t dc, dc1, dc2;
248  double ftmp[14];
249  mips_reg tmp[1];
250 
251  const int l0 = ((has_topleft ? src[-1+-1*stride] : src[-1+0*stride]) + 2*src[-1+0*stride] + src[-1+1*stride] + 2) >> 2;
252  const int l1 = (src[-1+0*stride] + 2*src[-1+1*stride] + src[-1+2*stride] + 2) >> 2;
253  const int l2 = (src[-1+1*stride] + 2*src[-1+2*stride] + src[-1+3*stride] + 2) >> 2;
254  const int l3 = (src[-1+2*stride] + 2*src[-1+3*stride] + src[-1+4*stride] + 2) >> 2;
255  const int l4 = (src[-1+3*stride] + 2*src[-1+4*stride] + src[-1+5*stride] + 2) >> 2;
256  const int l5 = (src[-1+4*stride] + 2*src[-1+5*stride] + src[-1+6*stride] + 2) >> 2;
257  const int l6 = (src[-1+5*stride] + 2*src[-1+6*stride] + src[-1+7*stride] + 2) >> 2;
258  const int l7 = (src[-1+6*stride] + 2*src[-1+7*stride] + src[-1+7*stride] + 2) >> 2;
259 
260  DECLARE_VAR_ALL64;
261  DECLARE_VAR_ADDRT;
262 
263  __asm__ volatile (
264  MMI_ULDC1(%[ftmp4], %[srcA], 0x00)
265  MMI_ULDC1(%[ftmp5], %[src0], 0x00)
266  MMI_ULDC1(%[ftmp6], %[src1], 0x00)
267  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
268  "dli %[tmp0], 0x03 \n\t"
269  "punpcklbh %[ftmp7], %[ftmp4], %[ftmp0] \n\t"
270  "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t"
271  "mtc1 %[tmp0], %[ftmp1] \n\t"
272  "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
273  "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
274  "punpcklbh %[ftmp11], %[ftmp6], %[ftmp0] \n\t"
275  "punpckhbh %[ftmp12], %[ftmp6], %[ftmp0] \n\t"
276  "pshufh %[ftmp3], %[ftmp8], %[ftmp1] \n\t"
277  "pshufh %[ftmp13], %[ftmp12], %[ftmp1] \n\t"
278  "pinsrh_3 %[ftmp8], %[ftmp8], %[ftmp13] \n\t"
279  "pinsrh_3 %[ftmp12], %[ftmp12], %[ftmp3] \n\t"
280  "bnez %[has_topleft], 1f \n\t"
281  "pinsrh_0 %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
282 
283  "1: \n\t"
284  "bnez %[has_topright], 2f \n\t"
285  "pshufh %[ftmp13], %[ftmp10], %[ftmp1] \n\t"
286  "pinsrh_3 %[ftmp8], %[ftmp8], %[ftmp13] \n\t"
287 
288  "2: \n\t"
289  "dli %[tmp0], 0x02 \n\t"
290  "mtc1 %[tmp0], %[ftmp1] \n\t"
291  "pshufh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
292  "pmullh %[ftmp9], %[ftmp9], %[ftmp2] \n\t"
293  "pmullh %[ftmp10], %[ftmp10], %[ftmp2] \n\t"
294  "paddh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
295  "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
296  "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
297  "paddh %[ftmp8], %[ftmp8], %[ftmp12] \n\t"
298  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
299  "paddh %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
300  "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
301  "psrah %[ftmp8], %[ftmp8], %[ftmp1] \n\t"
302  "packushb %[ftmp5], %[ftmp7], %[ftmp8] \n\t"
303  "biadd %[ftmp4], %[ftmp5] \n\t"
304  "mfc1 %[dc2], %[ftmp4] \n\t"
305  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
306  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
307  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
308  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
309  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
310  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
311  [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
312  [tmp0]"=&r"(tmp[0]),
313  RESTRICT_ASM_ALL64
314  [dc2]"=r"(dc2)
315  : [srcA]"r"((mips_reg)(src-stride-1)),
316  [src0]"r"((mips_reg)(src-stride)),
317  [src1]"r"((mips_reg)(src-stride+1)),
318  [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright)
319  : "memory"
320  );
321 
322  dc1 = l0+l1+l2+l3+l4+l5+l6+l7;
323  dc = ((dc1+dc2+8)>>4)*0x01010101U;
324 
325  __asm__ volatile (
326  "dli %[tmp0], 0x02 \n\t"
327  "punpcklwd %[ftmp0], %[dc], %[dc] \n\t"
328 
329  "1: \n\t"
330  MMI_SDC1(%[ftmp0], %[src], 0x00)
331  MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00)
332  PTR_ADDU "%[src], %[src], %[stride] \n\t"
333  PTR_ADDU "%[src], %[src], %[stride] \n\t"
334  MMI_SDC1(%[ftmp0], %[src], 0x00)
335  MMI_SDXC1(%[ftmp0], %[src], %[stride], 0x00)
336 
337  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
338  PTR_ADDU "%[src], %[src], %[stride] \n\t"
339  PTR_ADDU "%[src], %[src], %[stride] \n\t"
340  "bnez %[tmp0], 1b \n\t"
341  : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]),
342  RESTRICT_ASM_ALL64
343  RESTRICT_ASM_ADDRT
344  [src]"+&r"(src)
345  : [dc]"f"(dc), [stride]"r"((mips_reg)stride)
346  : "memory"
347  );
348 }
349 
350 void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft,
351  int has_topright, ptrdiff_t stride)
352 {
353  double ftmp[12];
354  mips_reg tmp[1];
355  DECLARE_VAR_ALL64;
356 
357  __asm__ volatile (
358  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
359  MMI_LDC1(%[ftmp3], %[srcA], 0x00)
360  MMI_LDC1(%[ftmp4], %[src0], 0x00)
361  MMI_LDC1(%[ftmp5], %[src1], 0x00)
362  "punpcklbh %[ftmp6], %[ftmp3], %[ftmp0] \n\t"
363  "punpckhbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
364  "punpcklbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t"
365  "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
366  "punpcklbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
367  "punpckhbh %[ftmp11], %[ftmp5], %[ftmp0] \n\t"
368  "bnez %[has_topleft], 1f \n\t"
369  "pinsrh_0 %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
370 
371  "1: \n\t"
372  "bnez %[has_topright], 2f \n\t"
373  "pinsrh_3 %[ftmp11], %[ftmp11], %[ftmp9] \n\t"
374 
375  "2: \n\t"
376  "dli %[tmp0], 0x02 \n\t"
377  "mtc1 %[tmp0], %[ftmp1] \n\t"
378  "pshufh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
379  "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
380  "pmullh %[ftmp9], %[ftmp9], %[ftmp2] \n\t"
381  "paddh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
382  "paddh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
383  "paddh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
384  "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
385  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
386  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
387  "psrah %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
388  "psrah %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
389  "packushb %[ftmp4], %[ftmp6], %[ftmp7] \n\t"
390  MMI_SDC1(%[ftmp4], %[src], 0x00)
391  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
392  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
393  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
394  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
395  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
396  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
397  [tmp0]"=&r"(tmp[0]),
398  RESTRICT_ASM_ALL64
399  [src]"=r"(src)
400  : [srcA]"r"((mips_reg)(src-stride-1)),
401  [src0]"r"((mips_reg)(src-stride)),
402  [src1]"r"((mips_reg)(src-stride+1)),
403  [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright)
404  : "memory"
405  );
406 
407  __asm__ volatile (
408  "dli %[tmp0], 0x02 \n\t"
409 
410  "1: \n\t"
411  MMI_SDC1(%[ftmp0], %[src], 0x00)
412  PTR_ADDU "%[src], %[src], %[stride] \n\t"
413  MMI_SDC1(%[ftmp0], %[src], 0x00)
414  PTR_ADDU "%[src], %[src], %[stride] \n\t"
415  MMI_SDC1(%[ftmp0], %[src], 0x00)
416  PTR_ADDU "%[src], %[src], %[stride] \n\t"
417  MMI_SDC1(%[ftmp0], %[src], 0x00)
418 
419  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
420  PTR_ADDU "%[src], %[src], %[stride] \n\t"
421  "bnez %[tmp0], 1b \n\t"
422  : [ftmp0]"=&f"(ftmp[0]), [tmp0]"=&r"(tmp[0]),
423  RESTRICT_ASM_ALL64
424  [src]"+&r"(src)
425  : [stride]"r"((mips_reg)stride)
426  : "memory"
427  );
428 }
429 
430 void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright,
431  ptrdiff_t stride)
432 {
433  const int dc = (src[-stride] + src[1-stride] + src[2-stride]
434  + src[3-stride] + src[-1+0*stride] + src[-1+1*stride]
435  + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
436  uint64_t tmp[2];
437  mips_reg addr[1];
438  DECLARE_VAR_ADDRT;
439 
440  __asm__ volatile (
441  PTR_ADDU "%[tmp0], %[dc], $0 \n\t"
442  "dmul %[tmp1], %[tmp0], %[ff_pb_1] \n\t"
443  "xor %[addr0], %[addr0], %[addr0] \n\t"
444  MMI_SWX(%[tmp1], %[src], %[addr0], 0x00)
445  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
446  MMI_SWX(%[tmp1], %[src], %[addr0], 0x00)
447  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
448  MMI_SWX(%[tmp1], %[src], %[addr0], 0x00)
449  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
450  MMI_SWX(%[tmp1], %[src], %[addr0], 0x00)
451  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
452  RESTRICT_ASM_ADDRT
453  [addr0]"=&r"(addr[0])
454  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
455  [dc]"r"(dc), [ff_pb_1]"r"(ff_pb_1)
456  : "memory"
457  );
458 }
459 
461 {
462  uint64_t tmp[2];
463  mips_reg addr[2];
464 
465  __asm__ volatile (
466  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
467  PTR_ADDU "%[addr1], %[src], $0 \n\t"
468  "ldl %[tmp0], 0x07(%[addr0]) \n\t"
469  "ldr %[tmp0], 0x00(%[addr0]) \n\t"
470  "dli %[tmp1], 0x04 \n\t"
471  "1: \n\t"
472  "sdl %[tmp0], 0x07(%[addr1]) \n\t"
473  "sdr %[tmp0], 0x00(%[addr1]) \n\t"
474  PTR_ADDU "%[addr1], %[stride] \n\t"
475  "sdl %[tmp0], 0x07(%[addr1]) \n\t"
476  "sdr %[tmp0], 0x00(%[addr1]) \n\t"
477  "daddi %[tmp1], -0x01 \n\t"
478  PTR_ADDU "%[addr1], %[stride] \n\t"
479  "bnez %[tmp1], 1b \n\t"
480  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
481  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
482  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride)
483  : "memory"
484  );
485 }
486 
488 {
489  uint64_t tmp[3];
490  mips_reg addr[2];
491 
492  __asm__ volatile (
493  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
494  PTR_ADDU "%[addr1], %[src], $0 \n\t"
495  "dli %[tmp0], 0x04 \n\t"
496  "1: \n\t"
497  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
498  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
499  "swl %[tmp2], 0x07(%[addr1]) \n\t"
500  "swr %[tmp2], 0x00(%[addr1]) \n\t"
501  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
502  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
503  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
504  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
505  "swl %[tmp2], 0x07(%[addr1]) \n\t"
506  "swr %[tmp2], 0x00(%[addr1]) \n\t"
507  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
508  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
509  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
510  "bnez %[tmp0], 1b \n\t"
511  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
512  [tmp2]"=&r"(tmp[2]),
513  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
514  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
515  [ff_pb_1]"r"(ff_pb_1)
516  : "memory"
517  );
518 }
519 
521 {
522  double ftmp[4];
523  uint64_t tmp[1];
524  mips_reg addr[1];
525  DECLARE_VAR_ALL64;
526 
527  __asm__ volatile (
528  "dli %[tmp0], 0x02 \n\t"
529  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
530  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
531  MMI_LDC1(%[ftmp1], %[addr0], 0x00)
532  "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
533  "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
534  "biadd %[ftmp2], %[ftmp2] \n\t"
535  "biadd %[ftmp3], %[ftmp3] \n\t"
536  "mtc1 %[tmp0], %[ftmp1] \n\t"
537  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
538  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
539  "pshufh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
540  "paddush %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
541  "paddush %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
542  "mtc1 %[tmp0], %[ftmp1] \n\t"
543  "psrlh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
544  "psrlh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
545  "packushb %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
546  MMI_SDC1(%[ftmp1], %[src], 0x00)
547  PTR_ADDU "%[src], %[src], %[stride] \n\t"
548  MMI_SDC1(%[ftmp1], %[src], 0x00)
549  PTR_ADDU "%[src], %[src], %[stride] \n\t"
550  MMI_SDC1(%[ftmp1], %[src], 0x00)
551  PTR_ADDU "%[src], %[src], %[stride] \n\t"
552  MMI_SDC1(%[ftmp1], %[src], 0x00)
553  PTR_ADDU "%[src], %[src], %[stride] \n\t"
554  MMI_SDC1(%[ftmp1], %[src], 0x00)
555  PTR_ADDU "%[src], %[src], %[stride] \n\t"
556  MMI_SDC1(%[ftmp1], %[src], 0x00)
557  PTR_ADDU "%[src], %[src], %[stride] \n\t"
558  MMI_SDC1(%[ftmp1], %[src], 0x00)
559  PTR_ADDU "%[src], %[src], %[stride] \n\t"
560  MMI_SDC1(%[ftmp1], %[src], 0x00)
561  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
562  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
563  [tmp0]"=&r"(tmp[0]),
564  RESTRICT_ASM_ALL64
565  [addr0]"=&r"(addr[0]),
566  [src]"+&r"(src)
567  : [stride]"r"((mips_reg)stride)
568  : "memory"
569  );
570 }
571 
573 {
574  double ftmp[5];
575  mips_reg addr[7];
576 
577  __asm__ volatile (
578  "negu %[addr0], %[stride] \n\t"
579  PTR_ADDU "%[addr0], %[addr0], %[src] \n\t"
580  PTR_ADDIU "%[addr1], %[addr0], 0x04 \n\t"
581  "lbu %[addr2], 0x00(%[addr0]) \n\t"
582  PTR_ADDU "%[addr3], $0, %[addr2] \n\t"
583  PTR_ADDIU "%[addr0], 0x01 \n\t"
584  "lbu %[addr2], 0x00(%[addr1]) \n\t"
585  PTR_ADDU "%[addr4], $0, %[addr2] \n\t"
586  PTR_ADDIU "%[addr1], 0x01 \n\t"
587  "lbu %[addr2], 0x00(%[addr0]) \n\t"
588  PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t"
589  PTR_ADDIU "%[addr0], 0x01 \n\t"
590  "lbu %[addr2], 0x00(%[addr1]) \n\t"
591  PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t"
592  PTR_ADDIU "%[addr1], 0x01 \n\t"
593  "lbu %[addr2], 0x00(%[addr0]) \n\t"
594  PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t"
595  PTR_ADDIU "%[addr0], 0x01 \n\t"
596  "lbu %[addr2], 0x00(%[addr1]) \n\t"
597  PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t"
598  PTR_ADDIU "%[addr1], 0x01 \n\t"
599  "lbu %[addr2], 0x00(%[addr0]) \n\t"
600  PTR_ADDU "%[addr3], %[addr3], %[addr2] \n\t"
601  PTR_ADDIU "%[addr0], 0x01 \n\t"
602  "lbu %[addr2], 0x00(%[addr1]) \n\t"
603  PTR_ADDU "%[addr4], %[addr4], %[addr2] \n\t"
604  PTR_ADDIU "%[addr1], 0x01 \n\t"
605  "dli %[addr2], -0x01 \n\t"
606  PTR_ADDU "%[addr2], %[addr2], %[src] \n\t"
607  "lbu %[addr1], 0x00(%[addr2]) \n\t"
608  PTR_ADDU "%[addr5], $0, %[addr1] \n\t"
609  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
610  "lbu %[addr1], 0x00(%[addr2]) \n\t"
611  PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t"
612  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
613  "lbu %[addr1], 0x00(%[addr2]) \n\t"
614  PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t"
615  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
616  "lbu %[addr1], 0x00(%[addr2]) \n\t"
617  PTR_ADDU "%[addr5], %[addr5], %[addr1] \n\t"
618  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
619  "lbu %[addr1], 0x00(%[addr2]) \n\t"
620  PTR_ADDU "%[addr6], $0, %[addr1] \n\t"
621  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
622  "lbu %[addr1], 0x00(%[addr2]) \n\t"
623  PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t"
624  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
625  "lbu %[addr1], 0x00(%[addr2]) \n\t"
626  PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t"
627  PTR_ADDU "%[addr2], %[addr2], %[stride] \n\t"
628  "lbu %[addr1], 0x00(%[addr2]) \n\t"
629  PTR_ADDU "%[addr6], %[addr6], %[addr1] \n\t"
630  PTR_ADDU "%[addr3], %[addr3], %[addr5] \n\t"
631  PTR_ADDIU "%[addr3], %[addr3], 0x04 \n\t"
632  PTR_ADDIU "%[addr4], %[addr4], 0x02 \n\t"
633  PTR_ADDIU "%[addr1], %[addr6], 0x02 \n\t"
634  PTR_ADDU "%[addr2], %[addr4], %[addr1] \n\t"
635  PTR_SRL "%[addr3], 0x03 \n\t"
636  PTR_SRL "%[addr4], 0x02 \n\t"
637  PTR_SRL "%[addr1], 0x02 \n\t"
638  PTR_SRL "%[addr2], 0x03 \n\t"
639  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
640  "dmtc1 %[addr3], %[ftmp1] \n\t"
641  "pshufh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
642  "dmtc1 %[addr4], %[ftmp2] \n\t"
643  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
644  "dmtc1 %[addr1], %[ftmp3] \n\t"
645  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
646  "dmtc1 %[addr2], %[ftmp4] \n\t"
647  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
648  "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
649  "packushb %[ftmp2], %[ftmp3], %[ftmp4] \n\t"
650  PTR_ADDU "%[addr0], $0, %[src] \n\t"
651  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
652  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
653  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
654  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
655  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
656  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
657  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
658  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
659  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
660  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
661  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
662  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
663  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
664  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
665  MMI_SDC1(%[ftmp2], %[addr0], 0x00)
666  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
667  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
668  [ftmp4]"=&f"(ftmp[4]),
669  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
670  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
671  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
672  [addr6]"=&r"(addr[6])
673  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride)
674  : "memory"
675  );
676 }
677 
679 {
680  double ftmp[1];
681  uint64_t tmp[1];
682  DECLARE_VAR_ALL64;
683 
684  __asm__ volatile (
685  MMI_LDC1(%[ftmp0], %[srcA], 0x00)
686  "dli %[tmp0], 0x04 \n\t"
687 
688  "1: \n\t"
689  MMI_SDC1(%[ftmp0], %[src], 0x00)
690  PTR_ADDU "%[src], %[src], %[stride] \n\t"
691  MMI_SDC1(%[ftmp0], %[src], 0x00)
692  PTR_ADDU "%[src], %[src], %[stride] \n\t"
693  MMI_SDC1(%[ftmp0], %[src], 0x00)
694  PTR_ADDU "%[src], %[src], %[stride] \n\t"
695  MMI_SDC1(%[ftmp0], %[src], 0x00)
696 
697  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
698  PTR_ADDU "%[src], %[src], %[stride] \n\t"
699  "bnez %[tmp0], 1b \n\t"
700  : [ftmp0]"=&f"(ftmp[0]),
701  [tmp0]"=&r"(tmp[0]),
702  RESTRICT_ASM_ALL64
703  [src]"+&r"(src)
704  : [stride]"r"((mips_reg)stride), [srcA]"r"((mips_reg)(src-stride))
705  : "memory"
706  );
707 }
708 
710 {
711  uint64_t tmp[3];
712  mips_reg addr[2];
713 
714  __asm__ volatile (
715  PTR_ADDI "%[addr0], %[src], -0x01 \n\t"
716  PTR_ADDU "%[addr1], %[src], $0 \n\t"
717  "dli %[tmp0], 0x08 \n\t"
718  "1: \n\t"
719  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
720  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
721  "swl %[tmp2], 0x07(%[addr1]) \n\t"
722  "swr %[tmp2], 0x00(%[addr1]) \n\t"
723  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
724  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
725  "lbu %[tmp1], 0x00(%[addr0]) \n\t"
726  "dmul %[tmp2], %[tmp1], %[ff_pb_1] \n\t"
727  "swl %[tmp2], 0x07(%[addr1]) \n\t"
728  "swr %[tmp2], 0x00(%[addr1]) \n\t"
729  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
730  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
731  PTR_ADDU "%[addr1], %[addr1], %[stride] \n\t"
732  "bnez %[tmp0], 1b \n\t"
733  : [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
734  [tmp2]"=&r"(tmp[2]),
735  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
736  : [src]"r"((mips_reg)src), [stride]"r"((mips_reg)stride),
737  [ff_pb_1]"r"(ff_pb_1)
738  : "memory"
739  );
740 }
741 
742 static inline void pred16x16_plane_compat_mmi(uint8_t *src, int stride,
743  const int svq3, const int rv40)
744 {
745  double ftmp[11];
746  uint64_t tmp[6];
747  mips_reg addr[1];
748  DECLARE_VAR_ALL64;
749 
750  __asm__ volatile(
751  PTR_SUBU "%[addr0], %[src], %[stride] \n\t"
752  "dli %[tmp0], 0x20 \n\t"
753  "dmtc1 %[tmp0], %[ftmp4] \n\t"
754  MMI_ULDC1(%[ftmp0], %[addr0], -0x01)
755  MMI_ULDC1(%[ftmp2], %[addr0], 0x08)
756  "dsrl %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
757  "dsrl %[ftmp3], %[ftmp2], %[ftmp4] \n\t"
758  "xor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
759  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
760  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
761  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
762  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
763  "pmullh %[ftmp0], %[ftmp0], %[ff_pw_m8tom5] \n\t"
764  "pmullh %[ftmp1], %[ftmp1], %[ff_pw_m4tom1] \n\t"
765  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_1to4] \n\t"
766  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5to8] \n\t"
767  "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
768  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
769  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
770  "dli %[tmp0], 0x0e \n\t"
771  "dmtc1 %[tmp0], %[ftmp4] \n\t"
772  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
773  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
774  "dli %[tmp0], 0x01 \n\t"
775  "dmtc1 %[tmp0], %[ftmp4] \n\t"
776  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
777  "paddsh %[ftmp5], %[ftmp0], %[ftmp1] \n\t"
778 
779  PTR_ADDIU "%[addr0], %[src], -0x01 \n\t"
780  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
781  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
782  "lbu %[tmp5], 0x10(%[addr0]) \n\t"
783  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
784  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
785  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
786  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
787  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
788  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
789  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
790  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
791  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
792  "or %[tmp4], %[tmp4], %[tmp0] \n\t"
793  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
794  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
795  "dmtc1 %[tmp2], %[ftmp0] \n\t"
796 
797  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
798  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
799  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
800  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
801  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
802  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
803  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
804  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
805  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
806  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
807  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
808  "or %[tmp4], %[tmp4], %[tmp0] \n\t"
809  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
810  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
811  "dmtc1 %[tmp2], %[ftmp1] \n\t"
812 
813  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
814  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
815  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
816  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
817  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
818  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
819  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
820  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
821  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
822  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
823  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
824  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
825  "or %[tmp4], %[tmp4], %[tmp0] \n\t"
826  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
827  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
828  "dmtc1 %[tmp2], %[ftmp2] \n\t"
829 
830  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
831  "lbu %[tmp2], 0x00(%[addr0]) \n\t"
832  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
833  "lbu %[tmp3], 0x00(%[addr0]) \n\t"
834  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
835  "lbu %[tmp4], 0x00(%[addr0]) \n\t"
836  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
837  "lbu %[tmp0], 0x00(%[addr0]) \n\t"
838  "daddu %[tmp5], %[tmp5], %[tmp0] \n\t"
839  "daddiu %[tmp5], %[tmp5], 0x01 \n\t"
840  "dsll %[tmp5], %[tmp5], 0x04 \n\t"
841 
842  "dsll %[tmp3], %[tmp3], 0x10 \n\t"
843  "dsll %[tmp4], %[tmp4], 0x20 \n\t"
844  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
845  "or %[tmp4], %[tmp4], %[tmp0] \n\t"
846  "or %[tmp2], %[tmp2], %[tmp3] \n\t"
847  "or %[tmp2], %[tmp2], %[tmp4] \n\t"
848  "dmtc1 %[tmp2], %[ftmp3] \n\t"
849 
850  "pmullh %[ftmp0], %[ftmp0], %[ff_pw_m8tom5] \n\t"
851  "pmullh %[ftmp1], %[ftmp1], %[ff_pw_m4tom1] \n\t"
852  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_1to4] \n\t"
853  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5to8] \n\t"
854  "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
855  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
856  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
857  "dli %[tmp0], 0x0e \n\t"
858  "dmtc1 %[tmp0], %[ftmp4] \n\t"
859  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
860  "paddsh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
861 
862  "dli %[tmp0], 0x01 \n\t"
863  "dmtc1 %[tmp0], %[ftmp4] \n\t"
864  "pshufh %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
865  "paddsh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
866 
867  "dmfc1 %[tmp0], %[ftmp5] \n\t"
868  "dsll %[tmp0], %[tmp0], 0x30 \n\t"
869  "dsra %[tmp0], %[tmp0], 0x30 \n\t"
870  "dmfc1 %[tmp1], %[ftmp6] \n\t"
871  "dsll %[tmp1], %[tmp1], 0x30 \n\t"
872  "dsra %[tmp1], %[tmp1], 0x30 \n\t"
873 
874  "beqz %[svq3], 1f \n\t"
875  "dli %[tmp2], 0x04 \n\t"
876  "ddiv %[tmp0], %[tmp0], %[tmp2] \n\t"
877  "ddiv %[tmp1], %[tmp1], %[tmp2] \n\t"
878  "dli %[tmp2], 0x05 \n\t"
879  "dmul %[tmp0], %[tmp0], %[tmp2] \n\t"
880  "dmul %[tmp1], %[tmp1], %[tmp2] \n\t"
881  "dli %[tmp2], 0x10 \n\t"
882  "ddiv %[tmp0], %[tmp0], %[tmp2] \n\t"
883  "ddiv %[tmp1], %[tmp1], %[tmp2] \n\t"
884  "daddu %[tmp2], %[tmp0], $0 \n\t"
885  "daddu %[tmp0], %[tmp1], $0 \n\t"
886  "daddu %[tmp1], %[tmp2], $0 \n\t"
887  "b 2f \n\t"
888 
889  "1: \n\t"
890  "beqz %[rv40], 1f \n\t"
891  "dsra %[tmp2], %[tmp0], 0x02 \n\t"
892  "daddu %[tmp0], %[tmp0], %[tmp2] \n\t"
893  "dsra %[tmp2], %[tmp1], 0x02 \n\t"
894  "daddu %[tmp1], %[tmp1], %[tmp2] \n\t"
895  "dsra %[tmp0], %[tmp0], 0x04 \n\t"
896  "dsra %[tmp1], %[tmp1], 0x04 \n\t"
897  "b 2f \n\t"
898 
899  "1: \n\t"
900  "dli %[tmp2], 0x05 \n\t"
901  "dmul %[tmp0], %[tmp0], %[tmp2] \n\t"
902  "dmul %[tmp1], %[tmp1], %[tmp2] \n\t"
903  "daddiu %[tmp0], %[tmp0], 0x20 \n\t"
904  "daddiu %[tmp1], %[tmp1], 0x20 \n\t"
905  "dsra %[tmp0], %[tmp0], 0x06 \n\t"
906  "dsra %[tmp1], %[tmp1], 0x06 \n\t"
907 
908  "2: \n\t"
909  "daddu %[tmp3], %[tmp0], %[tmp1] \n\t"
910  "dli %[tmp2], 0x07 \n\t"
911  "dmul %[tmp3], %[tmp3], %[tmp2] \n\t"
912  "dsubu %[tmp5], %[tmp5], %[tmp3] \n\t"
913 
914  "xor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
915  "dmtc1 %[tmp0], %[ftmp0] \n\t"
916  "pshufh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
917  "dmtc1 %[tmp1], %[ftmp5] \n\t"
918  "pshufh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
919  "dmtc1 %[tmp5], %[ftmp6] \n\t"
920  "pshufh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
921  "dli %[tmp0], 0x05 \n\t"
922  "dmtc1 %[tmp0], %[ftmp7] \n\t"
923  "pmullh %[ftmp1], %[ff_pw_0to3], %[ftmp0] \n\t"
924  "dmtc1 %[ff_pw_4to7], %[ftmp2] \n\t"
925  "pmullh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
926  "dmtc1 %[ff_pw_8tob], %[ftmp3] \n\t"
927  "pmullh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
928  "dmtc1 %[ff_pw_ctof], %[ftmp4] \n\t"
929  "pmullh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
930 
931  "dli %[tmp0], 0x10 \n\t"
932  PTR_ADDU "%[addr0], %[src], $0 \n\t"
933  "1: \n\t"
934  "paddsh %[ftmp8], %[ftmp1], %[ftmp6] \n\t"
935  "psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
936  "paddsh %[ftmp9], %[ftmp2], %[ftmp6] \n\t"
937  "psrah %[ftmp9], %[ftmp9], %[ftmp7] \n\t"
938  "packushb %[ftmp0], %[ftmp8], %[ftmp9] \n\t"
939  MMI_SDC1(%[ftmp0], %[addr0], 0x00)
940 
941  "paddsh %[ftmp8], %[ftmp3], %[ftmp6] \n\t"
942  "psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
943  "paddsh %[ftmp9], %[ftmp4], %[ftmp6] \n\t"
944  "psrah %[ftmp9], %[ftmp9], %[ftmp7] \n\t"
945  "packushb %[ftmp0], %[ftmp8], %[ftmp9] \n\t"
946  MMI_SDC1(%[ftmp0], %[addr0], 0x08)
947 
948  "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
949  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
950  "daddiu %[tmp0], %[tmp0], -0x01 \n\t"
951  "bnez %[tmp0], 1b \n\t"
952  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
953  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
954  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
955  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
956  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
957  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
958  [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]),
959  [tmp4]"=&r"(tmp[4]), [tmp5]"=&r"(tmp[5]),
960  RESTRICT_ASM_ALL64
961  [addr0]"=&r"(addr[0])
962  : [src]"r"(src), [stride]"r"((mips_reg)stride),
963  [svq3]"r"(svq3), [rv40]"r"(rv40),
968  : "memory"
969  );
970 }
971 
973 {
975 }
976 
978 {
980 }
981 
983 {
985 }
stride
int stride
Definition: mace.c:144
ff_pred8x16_vertical_8_mmi
void ff_pred8x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:678
ff_pw_2
const uint64_t ff_pw_2
Definition: constants.c:27
ff_pw_m8tom5
const uint64_t ff_pw_m8tom5
Definition: constants.c:48
ff_pw_4to7
const uint64_t ff_pw_4to7
Definition: constants.c:53
ff_pw_m4tom1
const uint64_t ff_pw_m4tom1
Definition: constants.c:49
ff_pred8x16_horizontal_8_mmi
void ff_pred8x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:709
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:26
ff_pred8x8_vertical_8_mmi
void ff_pred8x8_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:460
ff_pred16x16_plane_svq3_8_mmi
void ff_pred16x16_plane_svq3_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:977
mips_reg
#define mips_reg
Definition: asmdefs.h:44
ff_pred8x8_horizontal_8_mmi
void ff_pred8x8_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:487
ff_pred16x16_dc_8_mmi
void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:97
PTR_ADDI
#define PTR_ADDI
Definition: asmdefs.h:49
ff_pred8x8l_top_dc_8_mmi
void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:155
U
#define U(x)
Definition: vp56_arith.h:37
ff_pred8x8l_dc_8_mmi
void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:244
constants.h
pred16x16_plane_compat_mmi
static void pred16x16_plane_compat_mmi(uint8_t *src, int stride, const int svq3, const int rv40)
Definition: h264pred_mmi.c:742
mmiutils.h
ff_pred16x16_horizontal_8_mmi
void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:60
h264pred_mips.h
ff_pred16x16_plane_h264_8_mmi
void ff_pred16x16_plane_h264_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:972
ff_pw_5to8
const uint64_t ff_pw_5to8
Definition: constants.c:51
ff_pred4x4_dc_8_mmi
void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:430
src
#define src
Definition: vp8dsp.c:254
ff_pred8x8l_vertical_8_mmi
void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft, int has_topright, ptrdiff_t stride)
Definition: h264pred_mmi.c:350
bit_depth_template.c
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
ff_pw_0to3
const uint64_t ff_pw_0to3
Definition: constants.c:52
ff_pw_ctof
const uint64_t ff_pw_ctof
Definition: constants.c:55
PTR_SUBU
#define PTR_SUBU
Definition: asmdefs.h:50
src0
#define src0
Definition: h264pred.c:138
src1
#define src1
Definition: h264pred.c:139
ff_pred8x8_top_dc_8_mmi
void ff_pred8x8_top_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:520
uint8_t
uint8_t
Definition: audio_convert.c:194
PTR_ADDU
#define PTR_ADDU
Definition: asmdefs.h:47
ff_pred16x16_vertical_8_mmi
void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:30
ff_pw_1to4
const uint64_t ff_pw_1to4
Definition: constants.c:50
PTR_ADDIU
#define PTR_ADDIU
Definition: asmdefs.h:48
ff_pb_1
const uint64_t ff_pb_1
Definition: constants.c:57
ff_pred16x16_plane_rv40_8_mmi
void ff_pred16x16_plane_rv40_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:982
ff_pw_8tob
const uint64_t ff_pw_8tob
Definition: constants.c:54
ff_pred8x8_dc_8_mmi
void ff_pred8x8_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_mmi.c:572
PTR_SRL
#define PTR_SRL
Definition: asmdefs.h:54