30 ptrdiff_t line_size,
int h)
37 MMI_ULWC1(%[ftmp0], %[pixels], 0x00)
38 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
39 MMI_ULWC1(%[ftmp1], %[pixels], 0x00)
40 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
44 MMI_SWC1(%[ftmp0], %[
block], 0x00)
45 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
46 MMI_SWC1(%[ftmp1], %[
block], 0x00)
47 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
50 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
51 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
55 : [line_size]
"r"((
mips_reg)line_size)
61 ptrdiff_t line_size,
int h)
68 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
69 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
70 MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
71 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
72 MMI_ULDC1(%[ftmp2], %[pixels], 0x00)
73 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
74 MMI_ULDC1(%[ftmp3], %[pixels], 0x00)
75 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
79 MMI_SDC1(%[ftmp0], %[
block], 0x00)
80 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
81 MMI_SDC1(%[ftmp1], %[
block], 0x00)
82 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
83 MMI_SDC1(%[ftmp2], %[
block], 0x00)
84 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
85 MMI_SDC1(%[ftmp3], %[
block], 0x00)
86 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
89 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
90 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
94 : [line_size]
"r"((
mips_reg)line_size)
100 ptrdiff_t line_size,
int h)
107 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
108 MMI_ULDC1(%[ftmp2], %[pixels], 0x08)
109 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
110 MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
111 MMI_ULDC1(%[ftmp3], %[pixels], 0x08)
112 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
113 MMI_ULDC1(%[ftmp4], %[pixels], 0x00)
114 MMI_ULDC1(%[ftmp6], %[pixels], 0x08)
115 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
116 MMI_ULDC1(%[ftmp5], %[pixels], 0x00)
117 MMI_ULDC1(%[ftmp7], %[pixels], 0x08)
118 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
122 MMI_SDC1(%[ftmp0], %[
block], 0x00)
123 MMI_SDC1(%[ftmp2], %[
block], 0x08)
124 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
125 MMI_SDC1(%[ftmp1], %[
block], 0x00)
126 MMI_SDC1(%[ftmp3], %[
block], 0x08)
127 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
128 MMI_SDC1(%[ftmp4], %[
block], 0x00)
129 MMI_SDC1(%[ftmp6], %[
block], 0x08)
130 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
131 MMI_SDC1(%[ftmp5], %[
block], 0x00)
132 MMI_SDC1(%[ftmp7], %[
block], 0x08)
133 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
136 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
137 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
138 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
139 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
143 : [line_size]
"r"((
mips_reg)line_size)
149 ptrdiff_t line_size,
int h)
157 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
158 MMI_ULWC1(%[ftmp0], %[pixels], 0x00)
159 MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
160 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
161 MMI_ULWC1(%[ftmp2], %[
block], 0x00)
162 MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
166 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
167 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
168 MMI_SWC1(%[ftmp0], %[
block], 0x00)
169 MMI_SWC1(%[ftmp1], %[addr1], 0x00)
170 PTR_ADDU "%[pixels], %[addr0], %[line_size] \n\t"
171 PTR_ADDU "%[block], %[addr1], %[line_size] \n\t"
174 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
175 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
177 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
180 : [line_size]
"r"((
mips_reg)line_size)
186 ptrdiff_t line_size,
int h)
194 PTR_ADDU "%[addr2], %[line_size], %[line_size] \n\t"
196 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
197 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
198 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
199 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
200 MMI_ULDC1(%[ftmp2], %[
block], 0x00)
201 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
202 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
203 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
204 MMI_SDC1(%[ftmp0], %[
block], 0x00)
205 MMI_SDXC1(%[ftmp1], %[
block], %[line_size], 0x00)
206 PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
207 PTR_ADDU "%[block], %[block], %[addr2] \n\t"
209 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
210 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
211 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
212 PTR_ADDU "%[addr1], %[block], %[line_size] \n\t"
213 MMI_ULDC1(%[ftmp2], %[
block], 0x00)
214 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
215 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
216 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
217 MMI_SDC1(%[ftmp0], %[
block], 0x00)
218 MMI_SDXC1(%[ftmp1], %[
block], %[line_size], 0x00)
219 PTR_ADDU "%[pixels], %[pixels], %[addr2] \n\t"
220 PTR_ADDU "%[block], %[block], %[addr2] \n\t"
224 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
225 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
228 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
229 [addr2]
"=&r"(addr[2]),
232 : [line_size]
"r"((
mips_reg)line_size)
238 ptrdiff_t line_size,
int h)
247 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
248 MMI_ULDC1(%[ftmp4], %[pixels], 0x08)
249 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
250 MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
251 MMI_ULDC1(%[ftmp5], %[pixels], 0x08)
252 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
253 MMI_ULDC1(%[ftmp2], %[
block], 0x00)
254 MMI_ULDC1(%[ftmp6], %[
block], 0x08)
255 PTR_ADDU "%[addr0], %[block], %[line_size] \n\t"
256 MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
257 MMI_ULDC1(%[ftmp7], %[addr0], 0x08)
258 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
259 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
260 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
261 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
262 MMI_SDC1(%[ftmp0], %[
block], 0x00)
263 MMI_SDC1(%[ftmp4], %[
block], 0x08)
264 MMI_SDC1(%[ftmp1], %[addr0], 0x00)
265 MMI_SDC1(%[ftmp5], %[addr0], 0x08)
266 PTR_ADDU "%[block], %[addr0], %[line_size] \n\t"
268 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
269 MMI_ULDC1(%[ftmp4], %[pixels], 0x08)
270 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
271 MMI_ULDC1(%[ftmp1], %[pixels], 0x00)
272 MMI_ULDC1(%[ftmp5], %[pixels], 0x08)
273 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
274 MMI_ULDC1(%[ftmp2], %[
block], 0x00)
275 MMI_ULDC1(%[ftmp6], %[
block], 0x08)
276 PTR_ADDU "%[addr0], %[block], %[line_size] \n\t"
277 MMI_ULDC1(%[ftmp3], %[addr0], 0x00)
278 MMI_ULDC1(%[ftmp7], %[addr0], 0x08)
279 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
280 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
281 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
282 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
283 MMI_SDC1(%[ftmp0], %[
block], 0x00)
284 MMI_SDC1(%[ftmp4], %[
block], 0x08)
285 MMI_SDC1(%[ftmp1], %[addr0], 0x00)
286 MMI_SDC1(%[ftmp5], %[addr0], 0x08)
287 PTR_ADDU "%[block], %[addr0], %[line_size] \n\t"
290 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
291 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
292 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
293 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
295 [addr0]
"=&r"(addr[0]),
298 : [line_size]
"r"((
mips_reg)line_size)
304 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
313 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
314 MMI_ULWC1(%[ftmp0], %[
src1], 0x00)
315 MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
316 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
317 MMI_ULWC1(%[ftmp2], %[src2], 0x00)
318 MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
319 PTR_ADDU "%[src1], %[addr0], %[src_stride1] \n\t"
320 PTR_ADDU "%[src2], %[addr1], %[src_stride2] \n\t"
324 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
325 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
326 MMI_SWC1(%[ftmp0], %[dst], 0x00)
327 PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
328 MMI_SWC1(%[ftmp1], %[dst], 0x00)
329 PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
332 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
333 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
336 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
338 [src2]
"+&r"(src2), [
h]
"+&r"(
h)
339 : [dst_stride]
"r"((
mips_reg)dst_stride),
340 [src_stride1]
"r"((
mips_reg)src_stride1),
341 [src_stride2]
"r"((
mips_reg)src_stride2)
347 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
356 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
357 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
358 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
361 MMI_ULDC1(%[ftmp0], %[
src1], 0x00)
362 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
363 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
364 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
365 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
366 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
367 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
368 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
369 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
370 MMI_SDC1(%[ftmp0], %[dst], 0x00)
371 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
372 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
373 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
375 MMI_ULDC1(%[ftmp0], %[
src1], 0x00)
376 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
377 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
378 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
379 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
380 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
381 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
382 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
383 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
384 MMI_SDC1(%[ftmp0], %[dst], 0x00)
385 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
386 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
387 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
391 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
392 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
395 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
396 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
397 [addr4]
"=&r"(addr[4]),
399 [src2]
"+&r"(src2), [
h]
"+&r"(
h)
400 : [dst_stride]
"r"((
mips_reg)dst_stride),
401 [src_stride1]
"r"((
mips_reg)src_stride1),
402 [src_stride2]
"r"((
mips_reg)src_stride2)
408 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
417 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
418 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
419 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
422 MMI_ULDC1(%[ftmp0], %[
src1], 0x00)
423 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
424 MMI_ULDC1(%[ftmp4], %[
src1], 0x08)
425 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
426 MMI_ULDC1(%[ftmp5], %[addr0], 0x08)
427 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
428 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
429 MMI_ULDC1(%[ftmp6], %[src2], 0x08)
430 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
431 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
432 MMI_ULDC1(%[ftmp7], %[addr1], 0x08)
433 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
434 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
435 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
436 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
437 MMI_SDC1(%[ftmp0], %[dst], 0x00)
438 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
439 MMI_SDC1(%[ftmp4], %[dst], 0x08)
440 MMI_SDXC1(%[ftmp5], %[dst], %[dst_stride], 0x08)
441 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
442 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
444 MMI_ULDC1(%[ftmp0], %[
src1], 0x00)
445 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
446 MMI_ULDC1(%[ftmp4], %[
src1], 0x08)
447 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
448 MMI_ULDC1(%[ftmp5], %[addr0], 0x08)
449 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
450 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
451 MMI_ULDC1(%[ftmp6], %[src2], 0x08)
452 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
453 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
454 MMI_ULDC1(%[ftmp7], %[addr1], 0x08)
455 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
456 "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
457 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
458 "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
459 MMI_SDC1(%[ftmp0], %[dst], 0x00)
460 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
461 MMI_SDC1(%[ftmp4], %[dst], 0x08)
462 MMI_SDXC1(%[ftmp5], %[dst], %[dst_stride], 0x08)
463 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
464 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
468 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
469 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
470 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
471 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
474 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
475 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
476 [addr4]
"=&r"(addr[4]),
478 [src2]
"+&r"(src2), [
h]
"+&r"(
h)
479 : [dst_stride]
"r"((
mips_reg)dst_stride),
480 [src_stride1]
"r"((
mips_reg)src_stride1),
481 [src_stride2]
"r"((
mips_reg)src_stride2)
487 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
496 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
497 MMI_ULWC1(%[ftmp0], %[
src1], 0x00)
498 MMI_ULWC1(%[ftmp1], %[addr0], 0x00)
499 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
500 MMI_ULWC1(%[ftmp2], %[src2], 0x00)
501 MMI_ULWC1(%[ftmp3], %[addr1], 0x00)
502 PTR_ADDU "%[src1], %[addr0], %[src_stride1] \n\t"
503 PTR_ADDU "%[src2], %[addr1], %[src_stride2] \n\t"
504 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
505 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
506 PTR_ADDU "%[addr2], %[dst], %[dst_stride] \n\t"
507 MMI_ULWC1(%[ftmp4], %[dst], 0x00)
508 MMI_ULWC1(%[ftmp5], %[addr2], 0x00)
510 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
511 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
512 MMI_SWC1(%[ftmp0], %[dst], 0x00)
513 MMI_SWC1(%[ftmp1], %[addr2], 0x00)
514 PTR_ADDU "%[dst], %[addr2], %[dst_stride] \n\t"
517 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
518 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
519 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
521 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
522 [addr2]
"=&r"(addr[2]),
524 [src2]
"+&r"(src2), [
h]
"+&r"(
h)
525 : [dst_stride]
"r"((
mips_reg)dst_stride),
526 [src_stride1]
"r"((
mips_reg)src_stride1),
527 [src_stride2]
"r"((
mips_reg)src_stride2)
533 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
542 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
543 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
544 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
547 MMI_ULDC1(%[ftmp0], %[
src1], 0x00)
548 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
549 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
550 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
551 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
552 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
553 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
554 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
555 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
556 PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
557 MMI_ULDC1(%[ftmp4], %[dst], 0x00)
558 MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
559 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
560 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
561 MMI_SDC1(%[ftmp0], %[dst], 0x00)
562 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
563 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
564 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
566 MMI_ULDC1(%[ftmp0], %[
src1], 0x00)
567 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
568 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
569 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
570 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
571 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
572 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
573 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
574 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
575 PTR_ADDU "%[addr5], %[dst], %[dst_stride] \n\t"
576 MMI_ULDC1(%[ftmp4], %[dst], 0x00)
577 MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
578 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
579 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
580 MMI_SDC1(%[ftmp0], %[dst], 0x00)
581 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
582 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
583 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
587 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
588 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
589 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
592 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
593 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
594 [addr4]
"=&r"(addr[4]), [addr5]
"=&r"(addr[5]),
596 [src2]
"+&r"(src2), [
h]
"+&r"(
h)
597 : [dst_stride]
"r"((
mips_reg)dst_stride),
598 [src_stride1]
"r"((
mips_reg)src_stride1),
599 [src_stride2]
"r"((
mips_reg)src_stride2)
605 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
611 src_stride1, src_stride2,
h);
615 ptrdiff_t line_size,
int h)
622 ptrdiff_t line_size,
int h)
629 ptrdiff_t line_size,
int h)
636 ptrdiff_t line_size,
int h)
643 ptrdiff_t line_size,
int h)
650 ptrdiff_t line_size,
int h)
657 const uint8_t *src2,
int dst_stride,
int src_stride1,
int src_stride2,
666 "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
667 PTR_ADDU "%[addr2], %[src_stride1], %[src_stride1] \n\t"
668 PTR_ADDU "%[addr3], %[src_stride2], %[src_stride2] \n\t"
669 PTR_ADDU "%[addr4], %[dst_stride], %[dst_stride] \n\t"
672 MMI_ULDC1(%[ftmp0], %[
src1], 0x00)
673 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
674 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
675 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
676 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
677 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
678 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
679 "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
680 "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
681 "xor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
682 "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
683 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
684 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
685 "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
686 "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
687 MMI_SDC1(%[ftmp0], %[dst], 0x00)
688 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
689 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
690 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
692 MMI_ULDC1(%[ftmp0], %[
src1], 0x00)
693 PTR_ADDU "%[addr0], %[src1], %[src_stride1] \n\t"
694 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
695 MMI_ULDC1(%[ftmp2], %[src2], 0x00)
696 PTR_ADDU "%[addr1], %[src2], %[src_stride2] \n\t"
697 MMI_ULDC1(%[ftmp3], %[addr1], 0x00)
698 PTR_ADDU "%[src1], %[src1], %[addr2] \n\t"
699 "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
700 "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
701 "xor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
702 "xor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
703 "pavgb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
704 "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
705 "xor %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
706 "xor %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
707 MMI_SDC1(%[ftmp0], %[dst], 0x00)
708 MMI_SDXC1(%[ftmp1], %[dst], %[dst_stride], 0x00)
709 PTR_ADDU "%[src2], %[src2], %[addr3] \n\t"
710 PTR_ADDU "%[dst], %[dst], %[addr4] \n\t"
714 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
715 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
716 [ftmp4]
"=&f"(ftmp[4]),
719 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
720 [addr2]
"=&r"(addr[2]), [addr3]
"=&r"(addr[3]),
721 [addr4]
"=&r"(addr[4]),
723 [src2]
"+&r"(src2), [
h]
"+&r"(
h)
724 : [dst_stride]
"r"((
mips_reg)dst_stride),
725 [src_stride1]
"r"((
mips_reg)src_stride1),
726 [src_stride2]
"r"((
mips_reg)src_stride2)
732 ptrdiff_t line_size,
int h)
735 line_size, line_size,
h);
739 ptrdiff_t line_size,
int h)
746 ptrdiff_t line_size,
int h)
749 line_size, line_size,
h);
753 ptrdiff_t line_size,
int h)
756 line_size, line_size,
h);
760 ptrdiff_t line_size,
int h)
763 line_size, line_size,
h);
767 ptrdiff_t line_size,
int h)
770 line_size, line_size,
h);
774 ptrdiff_t line_size,
int h)
777 line_size, line_size,
h);
781 ptrdiff_t line_size,
int h)
788 ptrdiff_t line_size,
int h)
791 line_size, line_size, line_size,
h);
795 ptrdiff_t line_size,
int h)
802 ptrdiff_t line_size,
int h)
807 const uint32_t
b =
AV_RN32(pixels + 1);
808 uint32_t l0 = (
a & 0x03030303UL) +
811 uint32_t h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
812 ((
b & 0xFCFCFCFCUL) >> 2);
816 for (
i = 0;
i <
h;
i += 2) {
819 l1 = (
a & 0x03030303UL) +
821 h1 = ((
a & 0xFCFCFCFCUL) >> 2) +
822 ((
b & 0xFCFCFCFCUL) >> 2);
823 *((uint32_t *)
block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
828 l0 = (
a & 0x03030303UL) +
831 h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
832 ((
b & 0xFCFCFCFCUL) >> 2);
833 *((uint32_t *)
block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
840 ptrdiff_t line_size,
int h)
849 "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
850 "dli %[addr0], 0x0f \n\t"
851 "pcmpeqw %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
852 "dmtc1 %[addr0], %[ftmp8] \n\t"
853 "dli %[addr0], 0x01 \n\t"
854 "psrlh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
855 "dmtc1 %[addr0], %[ftmp8] \n\t"
856 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
858 "dli %[addr0], 0x02 \n\t"
859 "dmtc1 %[addr0], %[ftmp9] \n\t"
860 MMI_ULDC1(%[ftmp0], %[pixels], 0x00)
861 MMI_ULDC1(%[ftmp4], %[pixels], 0x01)
862 "mov.d %[ftmp1], %[ftmp0] \n\t"
863 "mov.d %[ftmp5], %[ftmp4] \n\t"
864 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
865 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
866 "punpckhbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
867 "punpckhbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
868 "paddush %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
869 "paddush %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
870 "xor %[addr0], %[addr0], %[addr0] \n\t"
871 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
875 PTR_ADDU "%[addr1], %[pixels], %[addr0] \n\t"
876 MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
877 MMI_ULDC1(%[ftmp2], %[addr1], 0x01)
878 "mov.d %[ftmp1], %[ftmp0] \n\t"
879 "mov.d %[ftmp3], %[ftmp2] \n\t"
880 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
881 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
882 "punpckhbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
883 "punpckhbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
884 "paddush %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
885 "paddush %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
886 "paddush %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
887 "paddush %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
888 "paddush %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
889 "paddush %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
890 "psrlh %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
891 "psrlh %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
892 "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
893 MMI_SDXC1(%[ftmp4], %[
block], %[addr0], 0x00)
894 PTR_ADDU "%[addr0], %[addr0], %[line_size] \n\t"
895 PTR_ADDU "%[addr1], %[pixels], %[addr0] \n\t"
896 MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
897 MMI_ULDC1(%[ftmp4], %[addr1], 0x01)
898 "mov.d %[ftmp3], %[ftmp2] \n\t"
899 "mov.d %[ftmp5], %[ftmp4] \n\t"
900 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
901 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
902 "punpckhbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
903 "punpckhbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
904 "paddush %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
905 "paddush %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
906 "paddush %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
907 "paddush %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
908 "paddush %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
909 "paddush %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
910 "psrlh %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
911 "psrlh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
912 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
913 MMI_SDXC1(%[ftmp0], %[
block], %[addr0], 0x00)
914 PTR_ADDU "%[addr0], %[addr0], %[line_size] \n\t"
917 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
918 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
919 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
920 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
921 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
924 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
925 [
h]
"+&r"(
h), [pixels]
"+&r"(pixels)
933 for (j = 0; j < 2; j++) {
936 const uint32_t
b =
AV_RN32(pixels + 1);
937 uint32_t l0 = (
a & 0x03030303UL) +
940 uint32_t h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
941 ((
b & 0xFCFCFCFCUL) >> 2);
945 for (
i = 0;
i <
h;
i += 2) {
948 l1 = (
a & 0x03030303UL) +
950 h1 = ((
a & 0xFCFCFCFCUL) >> 2) +
951 ((
b & 0xFCFCFCFCUL) >> 2);
952 *((uint32_t *)
block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
957 l0 = (
a & 0x03030303UL) +
960 h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
961 ((
b & 0xFCFCFCFCUL) >> 2);
962 *((uint32_t *)
block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
966 pixels += 4 - line_size * (
h + 1);
967 block += 4 - line_size *
h;
973 ptrdiff_t line_size,
int h)
980 ptrdiff_t line_size,
int h)
985 const uint32_t
b =
AV_RN32(pixels + 1);
986 uint32_t l0 = (
a & 0x03030303UL) +
989 uint32_t h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
990 ((
b & 0xFCFCFCFCUL) >> 2);
994 for (
i = 0;
i <
h;
i += 2) {
997 l1 = (
a & 0x03030303UL) +
999 h1 = ((
a & 0xFCFCFCFCUL) >> 2) +
1000 ((
b & 0xFCFCFCFCUL) >> 2);
1001 *((uint32_t *)
block) =
rnd_avg32(*((uint32_t *)
block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1002 pixels += line_size;
1006 l0 = (
a & 0x03030303UL) +
1007 (
b & 0x03030303UL) +
1009 h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
1010 ((
b & 0xFCFCFCFCUL) >> 2);
1011 *((uint32_t *)
block) =
rnd_avg32(*((uint32_t *)
block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1012 pixels += line_size;
1018 ptrdiff_t line_size,
int h)
1023 for (j = 0; j < 2; j++) {
1025 const uint32_t
a =
AV_RN32(pixels);
1026 const uint32_t
b =
AV_RN32(pixels + 1);
1027 uint32_t l0 = (
a & 0x03030303UL) +
1028 (
b & 0x03030303UL) +
1030 uint32_t h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
1031 ((
b & 0xFCFCFCFCUL) >> 2);
1034 pixels += line_size;
1035 for (
i = 0;
i <
h;
i += 2) {
1038 l1 = (
a & 0x03030303UL) +
1040 h1 = ((
a & 0xFCFCFCFCUL) >> 2) +
1041 ((
b & 0xFCFCFCFCUL) >> 2);
1042 *((uint32_t *)
block) =
rnd_avg32(*((uint32_t *)
block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1043 pixels += line_size;
1047 l0 = (
a & 0x03030303UL) +
1048 (
b & 0x03030303UL) +
1050 h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
1051 ((
b & 0xFCFCFCFCUL) >> 2);
1052 *((uint32_t *)
block) =
rnd_avg32(*((uint32_t *)
block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1053 pixels += line_size;
1056 pixels += 4 - line_size * (
h + 1);
1057 block += 4 - line_size *
h;
1062 ptrdiff_t line_size,
int h)
1069 ptrdiff_t line_size,
int h)
1074 for (j = 0; j < 2; j++) {
1076 const uint32_t
a =
AV_RN32(pixels);
1077 const uint32_t
b =
AV_RN32(pixels + 1);
1078 uint32_t l0 = (
a & 0x03030303UL) +
1079 (
b & 0x03030303UL) +
1081 uint32_t h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
1082 ((
b & 0xFCFCFCFCUL) >> 2);
1085 pixels += line_size;
1086 for (
i = 0;
i <
h;
i += 2) {
1089 l1 = (
a & 0x03030303UL) +
1091 h1 = ((
a & 0xFCFCFCFCUL) >> 2) +
1092 ((
b & 0xFCFCFCFCUL) >> 2);
1093 *((uint32_t *)
block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1094 pixels += line_size;
1098 l0 = (
a & 0x03030303UL) +
1099 (
b & 0x03030303UL) +
1101 h0 = ((
a & 0xFCFCFCFCUL) >> 2) +
1102 ((
b & 0xFCFCFCFCUL) >> 2);
1103 *((uint32_t *)
block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
1104 pixels += line_size;
1107 pixels += 4 - line_size * (
h + 1);
1108 block += 4 - line_size *
h;
1113 ptrdiff_t line_size,
int h)