FFmpeg
hpeldsp_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized qpeldsp
3  *
4  * Copyright (c) 2016 Loongson Technology Corporation Limited
5  * Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "hpeldsp_mips.h"
27 #include "constants.h"
28 
30  ptrdiff_t line_size, int h)
31 {
32  double ftmp[4];
33  DECLARE_VAR_LOW32;
34 
35  __asm__ volatile (
36  "1: \n\t"
37  MMI_ULWC1(%[ftmp0], %[pixels], 0x00)
38  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
39  MMI_ULWC1(%[ftmp1], %[pixels], 0x00)
40  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
41 
42  PTR_ADDI "%[h], %[h], -0x02 \n\t"
43 
44  MMI_SWC1(%[ftmp0], %[block], 0x00)
45  PTR_ADDU "%[block], %[block], %[line_size] \n\t"
46  MMI_SWC1(%[ftmp1], %[block], 0x00)
47  PTR_ADDU "%[block], %[block], %[line_size] \n\t"
48 
49  "bnez %[h], 1b \n\t"
50  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
51  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
52  RESTRICT_ASM_LOW32
53  [block]"+&r"(block), [pixels]"+&r"(pixels),
54  [h]"+&r"(h)
55  : [line_size]"r"((mips_reg)line_size)
56  : "memory"
57  );
58 }
59 
61  ptrdiff_t line_size, int h)
62 {
63  double ftmp[4];
64  DECLARE_VAR_ALL64;
65 
66  __asm__ volatile (
67  "1: \n\t"
68  MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
69  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
70  MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
71  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
72  MMI_ULDC1(%[ftmp2], %[pixels], 0x00)
73  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
74  MMI_ULDC1(%[ftmp3], %[pixels], 0x00)
75  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
76 
77  PTR_ADDI "%[h], %[h], -0x04 \n\t"
78 
79  MMI_SDC1(%[ftmp0], %[block], 0x00)
80  PTR_ADDU "%[block], %[block], %[line_size] \n\t"
81  MMI_SDC1(%[ftmp1], %[block], 0x00)
82  PTR_ADDU "%[block], %[block], %[line_size] \n\t"
83  MMI_SDC1(%[ftmp2], %[block], 0x00)
84  PTR_ADDU "%[block], %[block], %[line_size] \n\t"
85  MMI_SDC1(%[ftmp3], %[block], 0x00)
86  PTR_ADDU "%[block], %[block], %[line_size] \n\t"
87 
88  "bnez %[h], 1b \n\t"
89  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
90  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
91  RESTRICT_ASM_ALL64
92  [block]"+&r"(block), [pixels]"+&r"(pixels),
93  [h]"+&r"(h)
94  : [line_size]"r"((mips_reg)line_size)
95  : "memory"
96  );
97 }
98 
100  ptrdiff_t line_size, int h)
101 {
102  double ftmp[8];
103  DECLARE_VAR_ALL64;
104 
105  __asm__ volatile (
106  "1: \n\t"
107  MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
108  MMI_ULDC1(%[ftmp2], %[pixels], 0x08)
109  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
110  MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
111  MMI_ULDC1(%[ftmp3], %[pixels], 0x08)
112  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
113  MMI_ULDC1(%[ftmp4], %[pixels], 0x00)
114  MMI_ULDC1(%[ftmp6], %[pixels], 0x08)
115  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
116  MMI_ULDC1(%[ftmp5], %[pixels], 0x00)
117  MMI_ULDC1(%[ftmp7], %[pixels], 0x08)
118  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
119 
120  PTR_ADDI "%[h], %[h], -0x04 \n\t"
121 
122  MMI_SDC1(%[ftmp0], %[block], 0x00)
123  MMI_SDC1(%[ftmp2], %[block], 0x08)
124  PTR_ADDU "%[block], %[block], %[line_size] \n\t"
125  MMI_SDC1(%[ftmp1], %[block], 0x00)
126  MMI_SDC1(%[ftmp3], %[block], 0x08)
127  PTR_ADDU "%[block], %[block], %[line_size] \n\t"
128  MMI_SDC1(%[ftmp4], %[block], 0x00)
129  MMI_SDC1(%[ftmp6], %[block], 0x08)
130  PTR_ADDU "%[block], %[block], %[line_size] \n\t"
131  MMI_SDC1(%[ftmp5], %[block], 0x00)
132  MMI_SDC1(%[ftmp7], %[block], 0x08)
133  PTR_ADDU "%[block], %[block], %[line_size] \n\t"
134 
135  "bnez %[h], 1b \n\t"
136  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
137  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
138  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
139  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
140  RESTRICT_ASM_ALL64
141  [block]"+&r"(block), [pixels]"+&r"(pixels),
142  [h]"+&r"(h)
143  : [line_size]"r"((mips_reg)line_size)
144  : "memory"
145  );
146 }
147 
149  ptrdiff_t line_size, int h)
150 {
151  double ftmp[4];
152  mips_reg addr[2];
153  DECLARE_VAR_LOW32;
154 
155  __asm__ volatile (
156  "1: \n\t"
157  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
158  MMI_ULWC1(%[ftmp0], %[pixels], 0x00)
159  MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
160  PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
161  MMI_ULWC1(%[ftmp2], %[block], 0x00)
162  MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
163 
164  PTR_ADDI "%[h], %[h], -0x02 \n\t"
165 
166  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
167  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
168  MMI_SWC1(%[ftmp0], %[block], 0x00)
169  MMI_SWC1(%[ftmp1], %[addr1], 0x00)
170  PTR_ADDU "%[pixels], %[addr0], %[line_size] \n\t"
171  PTR_ADDU "%[block], %[addr1], %[line_size] \n\t"
172 
173  "bnez %[h], 1b \n\t"
174  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
175  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
176  RESTRICT_ASM_LOW32
177  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
178  [block]"+&r"(block), [pixels]"+&r"(pixels),
179  [h]"+&r"(h)
180  : [line_size]"r"((mips_reg)line_size)
181  : "memory"
182  );
183 }
184 
186  ptrdiff_t line_size, int h)
187 {
188  double ftmp[4];
189  mips_reg addr[3];
190  DECLARE_VAR_ALL64;
191  DECLARE_VAR_ADDRT;
192 
193  __asm__ volatile (
194  PTR_ADDU "%[addr2], %[line_size], %[line_size] \n\t"
195  "1: \n\t"
196  MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
197  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
198  MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
199  PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
200  MMI_ULDC1(%[ftmp2], %[block], 0x00)
201  MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
202  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
203  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
204  MMI_SDC1(%[ftmp0], %[block], 0x00)
205  MMI_SDXC1(%[ftmp1], %[block], %[line_size], 0x00)
206  PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
207  PTR_ADDU "%[block], %[block], %[addr2] \n\t"
208 
209  MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
210  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
211  MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
212  PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
213  MMI_ULDC1(%[ftmp2], %[block], 0x00)
214  MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
215  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
216  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
217  MMI_SDC1(%[ftmp0], %[block], 0x00)
218  MMI_SDXC1(%[ftmp1], %[block], %[line_size], 0x00)
219  PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
220  PTR_ADDU "%[block], %[block], %[addr2] \n\t"
221 
222  PTR_ADDI "%[h], %[h], -0x04 \n\t"
223  "bnez %[h], 1b \n\t"
224  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
225  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
226  RESTRICT_ASM_ALL64
227  RESTRICT_ASM_ADDRT
228  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
229  [addr2]"=&r"(addr[2]),
230  [block]"+&r"(block), [pixels]"+&r"(pixels),
231  [h]"+&r"(h)
232  : [line_size]"r"((mips_reg)line_size)
233  : "memory"
234  );
235 }
236 
238  ptrdiff_t line_size, int h)
239 {
240  double ftmp[8];
241  mips_reg addr[1];
242  DECLARE_VAR_ALL64;
243 
244  __asm__ volatile (
245  "1: \n\t"
246  PTR_ADDI "%[h], %[h], -0x04 \n\t"
247  MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
248  MMI_ULDC1(%[ftmp4], %[pixels], 0x08)
249  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
250  MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
251  MMI_ULDC1(%[ftmp5], %[pixels], 0x08)
252  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
253  MMI_ULDC1(%[ftmp2], %[block], 0x00)
254  MMI_ULDC1(%[ftmp6], %[block], 0x08)
255  PTR_ADDU "%[addr0], %[block], %[line_size] \n\t"
256  MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
257  MMI_ULDC1(%[ftmp7], %[addr0], 0x08)
258  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
259  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
260  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
261  "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
262  MMI_SDC1(%[ftmp0], %[block], 0x00)
263  MMI_SDC1(%[ftmp4], %[block], 0x08)
264  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
265  MMI_SDC1(%[ftmp5], %[addr0], 0x08)
266  PTR_ADDU "%[block], %[addr0], %[line_size] \n\t"
267 
268  MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
269  MMI_ULDC1(%[ftmp4], %[pixels], 0x08)
270  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
271  MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
272  MMI_ULDC1(%[ftmp5], %[pixels], 0x08)
273  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
274  MMI_ULDC1(%[ftmp2], %[block], 0x00)
275  MMI_ULDC1(%[ftmp6], %[block], 0x08)
276  PTR_ADDU "%[addr0], %[block], %[line_size] \n\t"
277  MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
278  MMI_ULDC1(%[ftmp7], %[addr0], 0x08)
279  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
280  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
281  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
282  "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
283  MMI_SDC1(%[ftmp0], %[block], 0x00)
284  MMI_SDC1(%[ftmp4], %[block], 0x08)
285  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
286  MMI_SDC1(%[ftmp5], %[addr0], 0x08)
287  PTR_ADDU "%[block], %[addr0], %[line_size] \n\t"
288 
289  "bnez %[h], 1b \n\t"
290  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
291  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
292  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
293  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
294  RESTRICT_ASM_ALL64
295  [addr0]"=&r"(addr[0]),
296  [block]"+&r"(block), [pixels]"+&r"(pixels),
297  [h]"+&r"(h)
298  : [line_size]"r"((mips_reg)line_size)
299  : "memory"
300  );
301 }
302 
303 inline void ff_put_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
304  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
305  int h)
306 {
307  double ftmp[4];
308  mips_reg addr[5];
309  DECLARE_VAR_LOW32;
310 
311  __asm__ volatile (
312  "1: \n\t"
313  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
314  MMI_ULWC1(%[ftmp0], %[src1], 0x00)
315  MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
316  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
317  MMI_ULWC1(%[ftmp2], %[src2], 0x00)
318  MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
319  PTR_ADDU "%[src1], %[addr0], %[src_stride1] \n\t"
320  PTR_ADDU "%[src2], %[addr1], %[src_stride2] \n\t"
321 
322  PTR_ADDI "%[h], %[h], -0x02 \n\t"
323 
324  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
325  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
326  MMI_SWC1(%[ftmp0], %[dst], 0x00)
327  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
328  MMI_SWC1(%[ftmp1], %[dst], 0x00)
329  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
330 
331  "bnez %[h], 1b \n\t"
332  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
333  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
334  RESTRICT_ASM_LOW32
335  RESTRICT_ASM_ADDRT
336  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
337  [dst]"+&r"(dst), [src1]"+&r"(src1),
338  [src2]"+&r"(src2), [h]"+&r"(h)
339  : [dst_stride]"r"((mips_reg)dst_stride),
340  [src_stride1]"r"((mips_reg)src_stride1),
341  [src_stride2]"r"((mips_reg)src_stride2)
342  : "memory"
343  );
344 }
345 
346 inline void ff_put_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
347  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
348  int h)
349 {
350  double ftmp[4];
351  mips_reg addr[5];
352  DECLARE_VAR_ALL64;
353  DECLARE_VAR_ADDRT;
354 
355  __asm__ volatile (
356  PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
357  PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
358  PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
359 
360  "1: \n\t"
361  MMI_ULDC1(%[ftmp0], %[src1], 0x00)
362  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
363  MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
364  MMI_ULDC1(%[ftmp2], %[src2], 0x00)
365  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
366  MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
367  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
368  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
369  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
370  MMI_SDC1(%[ftmp0], %[dst], 0x00)
371  MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
372  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
373  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
374 
375  MMI_ULDC1(%[ftmp0], %[src1], 0x00)
376  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
377  MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
378  MMI_ULDC1(%[ftmp2], %[src2], 0x00)
379  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
380  MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
381  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
382  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
383  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
384  MMI_SDC1(%[ftmp0], %[dst], 0x00)
385  MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
386  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
387  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
388 
389  PTR_ADDI "%[h], %[h], -0x04 \n\t"
390  "bnez %[h], 1b \n\t"
391  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
392  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
393  RESTRICT_ASM_ALL64
394  RESTRICT_ASM_ADDRT
395  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
396  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
397  [addr4]"=&r"(addr[4]),
398  [dst]"+&r"(dst), [src1]"+&r"(src1),
399  [src2]"+&r"(src2), [h]"+&r"(h)
400  : [dst_stride]"r"((mips_reg)dst_stride),
401  [src_stride1]"r"((mips_reg)src_stride1),
402  [src_stride2]"r"((mips_reg)src_stride2)
403  : "memory"
404  );
405 }
406 
407 inline void ff_put_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
408  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
409  int h)
410 {
411  double ftmp[8];
412  mips_reg addr[5];
413  DECLARE_VAR_ALL64;
414  DECLARE_VAR_ADDRT;
415 
416  __asm__ volatile (
417  PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
418  PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
419  PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
420 
421  "1: \n\t"
422  MMI_ULDC1(%[ftmp0], %[src1], 0x00)
423  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
424  MMI_ULDC1(%[ftmp4], %[src1], 0x08)
425  MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
426  MMI_ULDC1(%[ftmp5], %[addr0], 0x08)
427  MMI_ULDC1(%[ftmp2], %[src2], 0x00)
428  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
429  MMI_ULDC1(%[ftmp6], %[src2], 0x08)
430  MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
431  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
432  MMI_ULDC1(%[ftmp7], %[addr1], 0x08)
433  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
434  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
435  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
436  "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
437  MMI_SDC1(%[ftmp0], %[dst], 0x00)
438  MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
439  MMI_SDC1(%[ftmp4], %[dst], 0x08)
440  MMI_SDXC1(%[ftmp5], %[dst], %[dst_stride], 0x08)
441  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
442  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
443 
444  MMI_ULDC1(%[ftmp0], %[src1], 0x00)
445  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
446  MMI_ULDC1(%[ftmp4], %[src1], 0x08)
447  MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
448  MMI_ULDC1(%[ftmp5], %[addr0], 0x08)
449  MMI_ULDC1(%[ftmp2], %[src2], 0x00)
450  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
451  MMI_ULDC1(%[ftmp6], %[src2], 0x08)
452  MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
453  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
454  MMI_ULDC1(%[ftmp7], %[addr1], 0x08)
455  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
456  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
457  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
458  "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
459  MMI_SDC1(%[ftmp0], %[dst], 0x00)
460  MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
461  MMI_SDC1(%[ftmp4], %[dst], 0x08)
462  MMI_SDXC1(%[ftmp5], %[dst], %[dst_stride], 0x08)
463  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
464  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
465 
466  PTR_ADDI "%[h], %[h], -0x04 \n\t"
467  "bnez %[h], 1b \n\t"
468  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
469  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
470  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
471  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
472  RESTRICT_ASM_ALL64
473  RESTRICT_ASM_ADDRT
474  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
475  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
476  [addr4]"=&r"(addr[4]),
477  [dst]"+&r"(dst), [src1]"+&r"(src1),
478  [src2]"+&r"(src2), [h]"+&r"(h)
479  : [dst_stride]"r"((mips_reg)dst_stride),
480  [src_stride1]"r"((mips_reg)src_stride1),
481  [src_stride2]"r"((mips_reg)src_stride2)
482  : "memory"
483  );
484 }
485 
486 inline void ff_avg_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
487  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
488  int h)
489 {
490  double ftmp[6];
491  mips_reg addr[6];
492  DECLARE_VAR_LOW32;
493 
494  __asm__ volatile (
495  "1: \n\t"
496  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
497  MMI_ULWC1(%[ftmp0], %[src1], 0x00)
498  MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
499  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
500  MMI_ULWC1(%[ftmp2], %[src2], 0x00)
501  MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
502  PTR_ADDU "%[src1], %[addr0], %[src_stride1] \n\t"
503  PTR_ADDU "%[src2], %[addr1], %[src_stride2] \n\t"
504  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
505  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
506  PTR_ADDU "%[addr2], %[dst], %[dst_stride] \n\t"
507  MMI_ULWC1(%[ftmp4], %[dst], 0x00)
508  MMI_ULWC1(%[ftmp5], %[addr2], 0x00)
509  PTR_ADDI "%[h], %[h], -0x02 \n\t"
510  "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
511  "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
512  MMI_SWC1(%[ftmp0], %[dst], 0x00)
513  MMI_SWC1(%[ftmp1], %[addr2], 0x00)
514  PTR_ADDU "%[dst], %[addr2], %[dst_stride] \n\t"
515 
516  "bnez %[h], 1b \n\t"
517  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
518  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
519  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
520  RESTRICT_ASM_LOW32
521  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
522  [addr2]"=&r"(addr[2]),
523  [dst]"+&r"(dst), [src1]"+&r"(src1),
524  [src2]"+&r"(src2), [h]"+&r"(h)
525  : [dst_stride]"r"((mips_reg)dst_stride),
526  [src_stride1]"r"((mips_reg)src_stride1),
527  [src_stride2]"r"((mips_reg)src_stride2)
528  : "memory"
529  );
530 }
531 
532 inline void ff_avg_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
533  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
534  int h)
535 {
536  double ftmp[6];
537  mips_reg addr[6];
538  DECLARE_VAR_ALL64;
539  DECLARE_VAR_ADDRT;
540 
541  __asm__ volatile (
542  PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
543  PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
544  PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
545 
546  "1: \n\t"
547  MMI_ULDC1(%[ftmp0], %[src1], 0x00)
548  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
549  MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
550  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
551  MMI_ULDC1(%[ftmp2], %[src2], 0x00)
552  MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
553  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
554  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
555  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
556  PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
557  MMI_ULDC1(%[ftmp4], %[dst], 0x00)
558  MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
559  "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
560  "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
561  MMI_SDC1(%[ftmp0], %[dst], 0x00)
562  MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
563  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
564  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
565 
566  MMI_ULDC1(%[ftmp0], %[src1], 0x00)
567  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
568  MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
569  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
570  MMI_ULDC1(%[ftmp2], %[src2], 0x00)
571  MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
572  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
573  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
574  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
575  PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
576  MMI_ULDC1(%[ftmp4], %[dst], 0x00)
577  MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
578  "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
579  "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
580  MMI_SDC1(%[ftmp0], %[dst], 0x00)
581  MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
582  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
583  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
584 
585  PTR_ADDI "%[h], %[h], -0x04 \n\t"
586  "bnez %[h], 1b \n\t"
587  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
588  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
589  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
590  RESTRICT_ASM_ALL64
591  RESTRICT_ASM_ADDRT
592  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
593  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
594  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
595  [dst]"+&r"(dst), [src1]"+&r"(src1),
596  [src2]"+&r"(src2), [h]"+&r"(h)
597  : [dst_stride]"r"((mips_reg)dst_stride),
598  [src_stride1]"r"((mips_reg)src_stride1),
599  [src_stride2]"r"((mips_reg)src_stride2)
600  : "memory"
601  );
602 }
603 
604 inline void ff_avg_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
605  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
606  int h)
607 {
608  ff_avg_pixels8_l2_8_mmi(dst, src1, src2, dst_stride, src_stride1,
609  src_stride2, h);
610  ff_avg_pixels8_l2_8_mmi(dst + 8, src1 + 8, src2 + 8, dst_stride,
611  src_stride1, src_stride2, h);
612 }
613 
615  ptrdiff_t line_size, int h)
616 {
617  ff_put_pixels4_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
618  line_size, h);
619 }
620 
622  ptrdiff_t line_size, int h)
623 {
624  ff_put_pixels8_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
625  line_size, h);
626 }
627 
629  ptrdiff_t line_size, int h)
630 {
631  ff_put_pixels16_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
632  line_size, h);
633 }
634 
636  ptrdiff_t line_size, int h)
637 {
638  ff_avg_pixels4_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
639  line_size, h);
640 }
641 
643  ptrdiff_t line_size, int h)
644 {
645  ff_avg_pixels8_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
646  line_size, h);
647 }
648 
650  ptrdiff_t line_size, int h)
651 {
652  ff_avg_pixels8_x2_8_mmi(block, pixels, line_size, h);
653  ff_avg_pixels8_x2_8_mmi(block + 8, pixels + 8, line_size, h);
654 }
655 
657  const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
658  int h)
659 {
660  double ftmp[5];
661  mips_reg addr[5];
662  DECLARE_VAR_ALL64;
663  DECLARE_VAR_ADDRT;
664 
665  __asm__ volatile (
666  "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
667  PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
668  PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
669  PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
670 
671  "1: \n\t"
672  MMI_ULDC1(%[ftmp0], %[src1], 0x00)
673  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
674  MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
675  MMI_ULDC1(%[ftmp2], %[src2], 0x00)
676  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
677  MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
678  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
679  "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
680  "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
681  "xor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
682  "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
683  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
684  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
685  "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
686  "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
687  MMI_SDC1(%[ftmp0], %[dst], 0x00)
688  MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
689  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
690  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
691 
692  MMI_ULDC1(%[ftmp0], %[src1], 0x00)
693  PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
694  MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
695  MMI_ULDC1(%[ftmp2], %[src2], 0x00)
696  PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
697  MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
698  PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
699  "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
700  "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
701  "xor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
702  "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
703  "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
704  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
705  "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
706  "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
707  MMI_SDC1(%[ftmp0], %[dst], 0x00)
708  MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
709  PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
710  PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
711 
712  PTR_ADDI "%[h], %[h], -0x04 \n\t"
713  "bnez %[h], 1b \n\t"
714  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
715  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
716  [ftmp4]"=&f"(ftmp[4]),
717  RESTRICT_ASM_ALL64
718  RESTRICT_ASM_ADDRT
719  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
720  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
721  [addr4]"=&r"(addr[4]),
722  [dst]"+&r"(dst), [src1]"+&r"(src1),
723  [src2]"+&r"(src2), [h]"+&r"(h)
724  : [dst_stride]"r"((mips_reg)dst_stride),
725  [src_stride1]"r"((mips_reg)src_stride1),
726  [src_stride2]"r"((mips_reg)src_stride2)
727  : "memory"
728  );
729 }
730 
732  ptrdiff_t line_size, int h)
733 {
734  ff_put_no_rnd_pixels8_l2_8_mmi(block, pixels, pixels + 1, line_size,
735  line_size, line_size, h);
736 }
737 
739  ptrdiff_t line_size, int h)
740 {
741  ff_put_no_rnd_pixels8_x2_8_mmi(block, pixels, line_size, h);
742  ff_put_no_rnd_pixels8_x2_8_mmi(block + 8, pixels + 8, line_size, h);
743 }
744 
746  ptrdiff_t line_size, int h)
747 {
748  ff_put_pixels4_l2_8_mmi(block, pixels, pixels + line_size, line_size,
749  line_size, line_size, h);
750 }
751 
753  ptrdiff_t line_size, int h)
754 {
755  ff_put_pixels8_l2_8_mmi(block, pixels, pixels + line_size, line_size,
756  line_size, line_size, h);
757 }
758 
760  ptrdiff_t line_size, int h)
761 {
762  ff_put_pixels16_l2_8_mmi(block, pixels, pixels + line_size, line_size,
763  line_size, line_size, h);
764 }
765 
767  ptrdiff_t line_size, int h)
768 {
769  ff_avg_pixels4_l2_8_mmi(block, pixels, pixels + line_size, line_size,
770  line_size, line_size, h);
771 }
772 
774  ptrdiff_t line_size, int h)
775 {
776  ff_avg_pixels8_l2_8_mmi(block, pixels, pixels + line_size, line_size,
777  line_size, line_size, h);
778 }
779 
781  ptrdiff_t line_size, int h)
782 {
783  ff_avg_pixels8_y2_8_mmi(block, pixels, line_size, h);
784  ff_avg_pixels8_y2_8_mmi(block + 8, pixels + 8, line_size, h);
785 }
786 
788  ptrdiff_t line_size, int h)
789 {
790  ff_put_no_rnd_pixels8_l2_8_mmi(block, pixels, pixels + line_size,
791  line_size, line_size, line_size, h);
792 }
793 
795  ptrdiff_t line_size, int h)
796 {
797  ff_put_no_rnd_pixels8_y2_8_mmi(block, pixels, line_size, h);
798  ff_put_no_rnd_pixels8_y2_8_mmi(block + 8 , pixels + 8, line_size, h);
799 }
800 
802  ptrdiff_t line_size, int h)
803 {
804  /* FIXME HIGH BIT DEPTH */
805  int i;
806  const uint32_t a = AV_RN32(pixels);
807  const uint32_t b = AV_RN32(pixels + 1);
808  uint32_t l0 = (a & 0x03030303UL) +
809  (b & 0x03030303UL) +
810  0x02020202UL;
811  uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
812  ((b & 0xFCFCFCFCUL) >> 2);
813  uint32_t l1, h1;
814 
815  pixels += line_size;
816  for (i = 0; i < h; i += 2) {
817  uint32_t a = AV_RN32(pixels);
818  uint32_t b = AV_RN32(pixels + 1);
819  l1 = (a & 0x03030303UL) +
820  (b & 0x03030303UL);
821  h1 = ((a & 0xFCFCFCFCUL) >> 2) +
822  ((b & 0xFCFCFCFCUL) >> 2);
823  *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
824  pixels += line_size;
825  block += line_size;
826  a = AV_RN32(pixels);
827  b = AV_RN32(pixels + 1);
828  l0 = (a & 0x03030303UL) +
829  (b & 0x03030303UL) +
830  0x02020202UL;
831  h0 = ((a & 0xFCFCFCFCUL) >> 2) +
832  ((b & 0xFCFCFCFCUL) >> 2);
833  *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
834  pixels += line_size;
835  block += line_size;
836  }
837 }
838 
840  ptrdiff_t line_size, int h)
841 {
842 #if 1
843  double ftmp[10];
844  mips_reg addr[2];
845  DECLARE_VAR_ALL64;
846  DECLARE_VAR_ADDRT;
847 
848  __asm__ volatile (
849  "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
850  "dli %[addr0], 0x0f \n\t"
851  "pcmpeqw %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
852  "dmtc1 %[addr0], %[ftmp8] \n\t"
853  "dli %[addr0], 0x01 \n\t"
854  "psrlh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
855  "dmtc1 %[addr0], %[ftmp8] \n\t"
856  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
857 
858  "dli %[addr0], 0x02 \n\t"
859  "dmtc1 %[addr0], %[ftmp9] \n\t"
860  MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
861  MMI_ULDC1(%[ftmp4], %[pixels], 0x01)
862  "mov.d %[ftmp1], %[ftmp0] \n\t"
863  "mov.d %[ftmp5], %[ftmp4] \n\t"
864  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
865  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
866  "punpckhbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
867  "punpckhbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
868  "paddush %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
869  "paddush %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
870  "xor %[addr0], %[addr0], %[addr0] \n\t"
871  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
872  ".p2align 3 \n\t"
873 
874  "1: \n\t"
875  PTR_ADDU "%[addr1], %[pixels], %[addr0] \n\t"
876  MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
877  MMI_ULDC1(%[ftmp2], %[addr1], 0x01)
878  "mov.d %[ftmp1], %[ftmp0] \n\t"
879  "mov.d %[ftmp3], %[ftmp2] \n\t"
880  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
881  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
882  "punpckhbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
883  "punpckhbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
884  "paddush %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
885  "paddush %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
886  "paddush %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
887  "paddush %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
888  "paddush %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
889  "paddush %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
890  "psrlh %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
891  "psrlh %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
892  "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
893  MMI_SDXC1(%[ftmp4], %[block], %[addr0], 0x00)
894  PTR_ADDU "%[addr0], %[addr0], %[line_size] \n\t"
895  PTR_ADDU "%[addr1], %[pixels], %[addr0] \n\t"
896  MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
897  MMI_ULDC1(%[ftmp4], %[addr1], 0x01)
898  "mov.d %[ftmp3], %[ftmp2] \n\t"
899  "mov.d %[ftmp5], %[ftmp4] \n\t"
900  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
901  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
902  "punpckhbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
903  "punpckhbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
904  "paddush %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
905  "paddush %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
906  "paddush %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
907  "paddush %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
908  "paddush %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
909  "paddush %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
910  "psrlh %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
911  "psrlh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
912  "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
913  MMI_SDXC1(%[ftmp0], %[block], %[addr0], 0x00)
914  PTR_ADDU "%[addr0], %[addr0], %[line_size] \n\t"
915  PTR_ADDU "%[h], %[h], -0x02 \n\t"
916  "bnez %[h], 1b \n\t"
917  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
918  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
919  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
920  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
921  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
922  RESTRICT_ASM_ALL64
923  RESTRICT_ASM_ADDRT
924  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
925  [h]"+&r"(h), [pixels]"+&r"(pixels)
926  : [block]"r"(block), [line_size]"r"((mips_reg)line_size)
927  : "memory"
928  );
929 #else
930  /* FIXME HIGH BIT DEPTH */
931  int j;
932 
933  for (j = 0; j < 2; j++) {
934  int i;
935  const uint32_t a = AV_RN32(pixels);
936  const uint32_t b = AV_RN32(pixels + 1);
937  uint32_t l0 = (a & 0x03030303UL) +
938  (b & 0x03030303UL) +
939  0x02020202UL;
940  uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
941  ((b & 0xFCFCFCFCUL) >> 2);
942  uint32_t l1, h1;
943 
944  pixels += line_size;
945  for (i = 0; i < h; i += 2) {
946  uint32_t a = AV_RN32(pixels);
947  uint32_t b = AV_RN32(pixels + 1);
948  l1 = (a & 0x03030303UL) +
949  (b & 0x03030303UL);
950  h1 = ((a & 0xFCFCFCFCUL) >> 2) +
951  ((b & 0xFCFCFCFCUL) >> 2);
952  *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
953  pixels += line_size;
954  block += line_size;
955  a = AV_RN32(pixels);
956  b = AV_RN32(pixels + 1);
957  l0 = (a & 0x03030303UL) +
958  (b & 0x03030303UL) +
959  0x02020202UL;
960  h0 = ((a & 0xFCFCFCFCUL) >> 2) +
961  ((b & 0xFCFCFCFCUL) >> 2);
962  *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
963  pixels += line_size;
964  block += line_size;
965  }
966  pixels += 4 - line_size * (h + 1);
967  block += 4 - line_size * h;
968  }
969 #endif
970 }
971 
973  ptrdiff_t line_size, int h)
974 {
975  ff_put_pixels8_xy2_8_mmi(block, pixels, line_size, h);
976  ff_put_pixels8_xy2_8_mmi(block + 8, pixels + 8, line_size, h);
977 }
978 
980  ptrdiff_t line_size, int h)
981 {
982  /* FIXME HIGH BIT DEPTH */
983  int i;
984  const uint32_t a = AV_RN32(pixels);
985  const uint32_t b = AV_RN32(pixels + 1);
986  uint32_t l0 = (a & 0x03030303UL) +
987  (b & 0x03030303UL) +
988  0x02020202UL;
989  uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
990  ((b & 0xFCFCFCFCUL) >> 2);
991  uint32_t l1, h1;
992 
993  pixels += line_size;
994  for (i = 0; i < h; i += 2) {
995  uint32_t a = AV_RN32(pixels);
996  uint32_t b = AV_RN32(pixels + 1);
997  l1 = (a & 0x03030303UL) +
998  (b & 0x03030303UL);
999  h1 = ((a & 0xFCFCFCFCUL) >> 2) +
1000  ((b & 0xFCFCFCFCUL) >> 2);
1001  *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1002  pixels += line_size;
1003  block += line_size;
1004  a = AV_RN32(pixels);
1005  b = AV_RN32(pixels + 1);
1006  l0 = (a & 0x03030303UL) +
1007  (b & 0x03030303UL) +
1008  0x02020202UL;
1009  h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1010  ((b & 0xFCFCFCFCUL) >> 2);
1011  *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1012  pixels += line_size;
1013  block += line_size;
1014  }
1015 }
1016 
1018  ptrdiff_t line_size, int h)
1019 {
1020  /* FIXME HIGH BIT DEPTH */
1021  int j;
1022 
1023  for (j = 0; j < 2; j++) {
1024  int i;
1025  const uint32_t a = AV_RN32(pixels);
1026  const uint32_t b = AV_RN32(pixels + 1);
1027  uint32_t l0 = (a & 0x03030303UL) +
1028  (b & 0x03030303UL) +
1029  0x02020202UL;
1030  uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1031  ((b & 0xFCFCFCFCUL) >> 2);
1032  uint32_t l1, h1;
1033 
1034  pixels += line_size;
1035  for (i = 0; i < h; i += 2) {
1036  uint32_t a = AV_RN32(pixels);
1037  uint32_t b = AV_RN32(pixels + 1);
1038  l1 = (a & 0x03030303UL) +
1039  (b & 0x03030303UL);
1040  h1 = ((a & 0xFCFCFCFCUL) >> 2) +
1041  ((b & 0xFCFCFCFCUL) >> 2);
1042  *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1043  pixels += line_size;
1044  block += line_size;
1045  a = AV_RN32(pixels);
1046  b = AV_RN32(pixels + 1);
1047  l0 = (a & 0x03030303UL) +
1048  (b & 0x03030303UL) +
1049  0x02020202UL;
1050  h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1051  ((b & 0xFCFCFCFCUL) >> 2);
1052  *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1053  pixels += line_size;
1054  block += line_size;
1055  }
1056  pixels += 4 - line_size * (h + 1);
1057  block += 4 - line_size * h;
1058  }
1059 }
1060 
1062  ptrdiff_t line_size, int h)
1063 {
1064  ff_avg_pixels8_xy2_8_mmi(block, pixels, line_size, h);
1065  ff_avg_pixels8_xy2_8_mmi(block + 8, pixels + 8, line_size, h);
1066 }
1067 
1069  ptrdiff_t line_size, int h)
1070 {
1071  /* FIXME HIGH BIT DEPTH */
1072  int j;
1073 
1074  for (j = 0; j < 2; j++) {
1075  int i;
1076  const uint32_t a = AV_RN32(pixels);
1077  const uint32_t b = AV_RN32(pixels + 1);
1078  uint32_t l0 = (a & 0x03030303UL) +
1079  (b & 0x03030303UL) +
1080  0x01010101UL;
1081  uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1082  ((b & 0xFCFCFCFCUL) >> 2);
1083  uint32_t l1, h1;
1084 
1085  pixels += line_size;
1086  for (i = 0; i < h; i += 2) {
1087  uint32_t a = AV_RN32(pixels);
1088  uint32_t b = AV_RN32(pixels + 1);
1089  l1 = (a & 0x03030303UL) +
1090  (b & 0x03030303UL);
1091  h1 = ((a & 0xFCFCFCFCUL) >> 2) +
1092  ((b & 0xFCFCFCFCUL) >> 2);
1093  *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1094  pixels += line_size;
1095  block += line_size;
1096  a = AV_RN32(pixels);
1097  b = AV_RN32(pixels + 1);
1098  l0 = (a & 0x03030303UL) +
1099  (b & 0x03030303UL) +
1100  0x01010101UL;
1101  h0 = ((a & 0xFCFCFCFCUL) >> 2) +
1102  ((b & 0xFCFCFCFCUL) >> 2);
1103  *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1104  pixels += line_size;
1105  block += line_size;
1106  }
1107  pixels += 4 - line_size * (h + 1);
1108  block += 4 - line_size * h;
1109  }
1110 }
1111 
1113  ptrdiff_t line_size, int h)
1114 {
1115  ff_put_no_rnd_pixels8_xy2_8_mmi(block, pixels, line_size, h);
1116  ff_put_no_rnd_pixels8_xy2_8_mmi(block + 8, pixels + 8, line_size, h);
1117 }
ff_put_pixels4_x2_8_mmi
void ff_put_pixels4_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:614
ff_put_no_rnd_pixels8_l2_8_mmi
void ff_put_no_rnd_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:656
ff_put_no_rnd_pixels16_xy2_8_mmi
void ff_put_no_rnd_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:1112
ff_avg_pixels4_x2_8_mmi
void ff_avg_pixels4_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:635
rnd_avg32
static uint32_t rnd_avg32(uint32_t a, uint32_t b)
Definition: rnd_avg.h:31
ff_put_pixels4_8_mmi
void ff_put_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:29
ff_put_pixels4_xy2_8_mmi
void ff_put_pixels4_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:801
b
#define b
Definition: input.c:41
ff_put_no_rnd_pixels8_y2_8_mmi
void ff_put_no_rnd_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:787
mips_reg
#define mips_reg
Definition: asmdefs.h:44
ff_put_pixels8_y2_8_mmi
void ff_put_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:752
ff_avg_pixels8_x2_8_mmi
void ff_avg_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:642
ff_put_pixels16_8_mmi
void ff_put_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:99
PTR_ADDI
#define PTR_ADDI
Definition: asmdefs.h:49
ff_avg_pixels8_y2_8_mmi
void ff_avg_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:773
constants.h
ff_put_no_rnd_pixels16_y2_8_mmi
void ff_put_no_rnd_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:794
mmiutils.h
ff_avg_pixels16_x2_8_mmi
void ff_avg_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:649
ff_put_pixels8_l2_8_mmi
void ff_put_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:346
ff_put_pixels16_xy2_8_mmi
void ff_put_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:972
ff_avg_pixels4_xy2_8_mmi
void ff_avg_pixels4_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:979
ff_avg_pixels4_l2_8_mmi
void ff_avg_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:486
ff_put_no_rnd_pixels8_xy2_8_mmi
void ff_put_no_rnd_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:1068
AV_RN32
#define AV_RN32(p)
Definition: intreadwrite.h:364
bit_depth_template.c
ff_put_pixels16_x2_8_mmi
void ff_put_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:628
ff_put_pixels8_x2_8_mmi
void ff_put_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:621
ff_avg_pixels16_y2_8_mmi
void ff_avg_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:780
hpeldsp_mips.h
ff_put_no_rnd_pixels16_x2_8_mmi
void ff_put_no_rnd_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:738
ff_avg_pixels8_8_mmi
void ff_avg_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:185
ff_avg_pixels8_xy2_8_mmi
void ff_avg_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:1017
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
ff_avg_pixels16_l2_8_mmi
void ff_avg_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:604
src1
#define src1
Definition: h264pred.c:140
i
int i
Definition: input.c:407
ff_avg_pixels4_8_mmi
void ff_avg_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:148
ff_put_pixels8_8_mmi
void ff_put_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:60
uint8_t
uint8_t
Definition: audio_convert.c:194
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
ff_put_pixels4_l2_8_mmi
void ff_put_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:303
PTR_ADDU
#define PTR_ADDU
Definition: asmdefs.h:47
ff_avg_pixels16_8_mmi
void ff_avg_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:237
ff_avg_pixels4_y2_8_mmi
void ff_avg_pixels4_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:766
ff_put_pixels8_xy2_8_mmi
void ff_put_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:839
ff_put_pixels16_l2_8_mmi
void ff_put_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:407
ff_put_pixels4_y2_8_mmi
void ff_put_pixels4_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:745
ff_avg_pixels8_l2_8_mmi
void ff_avg_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:532
ff_put_pixels16_y2_8_mmi
void ff_put_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:759
ff_avg_pixels16_xy2_8_mmi
void ff_avg_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:1061
ff_put_no_rnd_pixels8_x2_8_mmi
void ff_put_no_rnd_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
Definition: hpeldsp_mmi.c:731
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h
h
Definition: vp9dsp_template.c:2038