Go to the documentation of this file.
29 #define DECLARE_DOUBLE_1 double db_1
30 #define DECLARE_DOUBLE_2 double db_2
31 #define DECLARE_UINT32_T uint32_t it_1
32 #define RESTRICT_ASM_DOUBLE_1 [db_1]"=&f"(db_1)
33 #define RESTRICT_ASM_DOUBLE_2 [db_2]"=&f"(db_2)
34 #define RESTRICT_ASM_UINT32_T [it_1]"=&r"(it_1)
36 #define MMI_PCMPGTUB(dst, src1, src2) \
37 "pcmpeqb %[db_1], "#src1", "#src2" \n\t" \
38 "pmaxub %[db_2], "#src1", "#src2" \n\t" \
39 "pcmpeqb %[db_2], %[db_2], "#src1" \n\t" \
40 "xor "#dst", %[db_2], %[db_1] \n\t"
42 #define MMI_BTOH(dst_l, dst_r, src) \
43 "xor %[db_1], %[db_1], %[db_1] \n\t" \
44 "pcmpgtb %[db_2], %[db_1], "#src" \n\t" \
45 "punpcklbh "#dst_r", "#src", %[db_2] \n\t" \
46 "punpckhbh "#dst_l", "#src", %[db_2] \n\t"
48 #define MMI_VP8_LOOP_FILTER \
50 "dmtc1 %[thresh], %[ftmp3] \n\t" \
51 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
52 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
53 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
54 "pasubub %[ftmp0], %[p1], %[p0] \n\t" \
55 "pasubub %[ftmp1], %[q1], %[q0] \n\t" \
56 "pmaxub %[ftmp0], %[ftmp0], %[ftmp1] \n\t" \
57 MMI_PCMPGTUB(%[hev], %[ftmp0], %[ftmp3]) \
59 "pasubub %[ftmp1], %[p0], %[q0] \n\t" \
60 "paddusb %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \
61 "pasubub %[ftmp2], %[p1], %[q1] \n\t" \
62 "li %[tmp0], 0x09 \n\t" \
63 "dmtc1 %[tmp0], %[ftmp3] \n\t" \
64 PSRLB_MMI(%[ftmp2], %[ftmp3], %[ftmp4], %[ftmp5], %[ftmp2]) \
65 "paddusb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
66 "dmtc1 %[e], %[ftmp3] \n\t" \
67 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
68 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
69 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
70 MMI_PCMPGTUB(%[mask], %[ftmp1], %[ftmp3]) \
71 "pmaxub %[mask], %[mask], %[ftmp0] \n\t" \
72 "pasubub %[ftmp1], %[p3], %[p2] \n\t" \
73 "pasubub %[ftmp2], %[p2], %[p1] \n\t" \
74 "pmaxub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
75 "pmaxub %[mask], %[mask], %[ftmp1] \n\t" \
76 "pasubub %[ftmp1], %[q3], %[q2] \n\t" \
77 "pasubub %[ftmp2], %[q2], %[q1] \n\t" \
78 "pmaxub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
79 "pmaxub %[mask], %[mask], %[ftmp1] \n\t" \
80 "dmtc1 %[i], %[ftmp3] \n\t" \
81 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
82 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
83 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
84 MMI_PCMPGTUB(%[mask], %[mask], %[ftmp3]) \
85 "pcmpeqw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
86 "xor %[mask], %[mask], %[ftmp3] \n\t" \
88 "li %[tmp0], 0x80808080 \n\t" \
89 "dmtc1 %[tmp0], %[ftmp7] \n\t" \
90 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t" \
91 "xor %[p2], %[p2], %[ftmp7] \n\t" \
92 "xor %[p1], %[p1], %[ftmp7] \n\t" \
93 "xor %[p0], %[p0], %[ftmp7] \n\t" \
94 "xor %[q0], %[q0], %[ftmp7] \n\t" \
95 "xor %[q1], %[q1], %[ftmp7] \n\t" \
96 "xor %[q2], %[q2], %[ftmp7] \n\t" \
97 "psubsb %[ftmp4], %[p1], %[q1] \n\t" \
98 "psubb %[ftmp5], %[q0], %[p0] \n\t" \
99 MMI_BTOH(%[ftmp1], %[ftmp0], %[ftmp5]) \
100 MMI_BTOH(%[ftmp3], %[ftmp2], %[ftmp4]) \
102 "paddh %[ftmp5], %[ftmp0], %[ftmp0] \n\t" \
103 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" \
104 "paddh %[ftmp0], %[ftmp2], %[ftmp0] \n\t" \
106 "paddh %[ftmp5], %[ftmp1], %[ftmp1] \n\t" \
107 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" \
108 "paddh %[ftmp1], %[ftmp3], %[ftmp1] \n\t" \
110 "packsshb %[ftmp1], %[ftmp0], %[ftmp1] \n\t" \
111 "and %[ftmp1], %[ftmp1], %[mask] \n\t" \
112 "and %[ftmp2], %[ftmp1], %[hev] \n\t" \
113 "li %[tmp0], 0x04040404 \n\t" \
114 "dmtc1 %[tmp0], %[ftmp0] \n\t" \
115 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
116 "paddsb %[ftmp3], %[ftmp2], %[ftmp0] \n\t" \
117 "li %[tmp0], 0x0B \n\t" \
118 "dmtc1 %[tmp0], %[ftmp4] \n\t" \
119 PSRAB_MMI(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6], %[ftmp3]) \
120 "li %[tmp0], 0x03030303 \n\t" \
121 "dmtc1 %[tmp0], %[ftmp0] \n\t" \
122 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
123 "paddsb %[ftmp4], %[ftmp2], %[ftmp0] \n\t" \
124 "li %[tmp0], 0x0B \n\t" \
125 "dmtc1 %[tmp0], %[ftmp2] \n\t" \
126 PSRAB_MMI(%[ftmp4], %[ftmp2], %[ftmp5], %[ftmp6], %[ftmp4]) \
127 "psubsb %[q0], %[q0], %[ftmp3] \n\t" \
128 "paddsb %[p0], %[p0], %[ftmp4] \n\t" \
130 "pcmpeqw %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
131 "xor %[hev], %[hev], %[ftmp0] \n\t" \
132 "and %[ftmp1], %[ftmp1], %[hev] \n\t" \
133 MMI_BTOH(%[ftmp5], %[ftmp6], %[ftmp1]) \
134 "li %[tmp0], 0x07 \n\t" \
135 "dmtc1 %[tmp0], %[ftmp2] \n\t" \
136 "li %[tmp0], 0x001b001b \n\t" \
137 "dmtc1 %[tmp0], %[ftmp1] \n\t" \
138 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \
139 "li %[tmp0], 0x003f003f \n\t" \
140 "dmtc1 %[tmp0], %[ftmp0] \n\t" \
141 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
143 "pmullh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \
144 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
145 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
147 "pmullh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \
148 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
149 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \
151 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \
152 "psubsb %[q0], %[q0], %[ftmp4] \n\t" \
153 "xor %[q0], %[q0], %[ftmp7] \n\t" \
154 "paddsb %[p0], %[p0], %[ftmp4] \n\t" \
155 "xor %[p0], %[p0], %[ftmp7] \n\t" \
156 "li %[tmp0], 0x00120012 \n\t" \
157 "dmtc1 %[tmp0], %[ftmp1] \n\t" \
158 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \
160 "pmullh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \
161 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
162 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
164 "pmullh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \
165 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
166 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \
168 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \
169 "psubsb %[q1], %[q1], %[ftmp4] \n\t" \
170 "xor %[q1], %[q1], %[ftmp7] \n\t" \
171 "paddsb %[p1], %[p1], %[ftmp4] \n\t" \
172 "xor %[p1], %[p1], %[ftmp7] \n\t" \
173 "li %[tmp0], 0x03 \n\t" \
174 "dmtc1 %[tmp0], %[ftmp1] \n\t" \
176 "psllh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \
177 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t" \
178 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
179 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
181 "psllh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \
182 "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \
183 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
184 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \
186 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \
187 "psubsb %[q2], %[q2], %[ftmp4] \n\t" \
188 "xor %[q2], %[q2], %[ftmp7] \n\t" \
189 "paddsb %[p2], %[p2], %[ftmp4] \n\t" \
190 "xor %[p2], %[p2], %[ftmp7] \n\t"
192 #define PUT_VP8_EPEL4_H6_MMI(src, dst) \
193 MMI_ULWC1(%[ftmp1], src, 0x00) \
194 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
195 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
197 MMI_ULWC1(%[ftmp1], src, -0x01) \
198 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
199 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
200 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
202 MMI_ULWC1(%[ftmp1], src, -0x02) \
203 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
204 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
205 "paddsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
207 MMI_ULWC1(%[ftmp1], src, 0x01) \
208 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
209 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
211 MMI_ULWC1(%[ftmp1], src, 0x02) \
212 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
213 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
214 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
216 MMI_ULWC1(%[ftmp1], src, 0x03) \
217 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
218 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
219 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
221 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
222 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
223 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
224 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
226 MMI_SWC1(%[ftmp1], dst, 0x00)
229 #define PUT_VP8_EPEL4_H4_MMI(src, dst) \
230 MMI_ULWC1(%[ftmp1], src, 0x00) \
231 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
232 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
234 MMI_ULWC1(%[ftmp1], src, -0x01) \
235 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
236 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
237 "psubsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
239 MMI_ULWC1(%[ftmp1], src, 0x01) \
240 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
241 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
243 MMI_ULWC1(%[ftmp1], src, 0x02) \
244 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
245 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
246 "psubh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
248 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
250 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
251 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
253 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
254 MMI_SWC1(%[ftmp1], dst, 0x00)
257 #define PUT_VP8_EPEL4_V6_MMI(src, src1, dst, srcstride) \
258 MMI_ULWC1(%[ftmp1], src, 0x00) \
259 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
260 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
262 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
263 MMI_ULWC1(%[ftmp1], src1, 0x00) \
264 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
265 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
266 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
268 PTR_SUBU ""#src1", "#src1", "#srcstride" \n\t" \
269 MMI_ULWC1(%[ftmp1], src1, 0x00) \
270 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
271 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
272 "paddsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
274 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
275 MMI_ULWC1(%[ftmp1], src1, 0x00) \
276 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
277 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
279 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
280 MMI_ULWC1(%[ftmp1], src1, 0x00) \
281 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
282 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
283 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
285 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
286 MMI_ULWC1(%[ftmp1], src1, 0x00) \
287 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
288 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
289 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
291 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
293 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
294 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
295 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
297 MMI_SWC1(%[ftmp1], dst, 0x00)
300 #define PUT_VP8_EPEL4_V4_MMI(src, src1, dst, srcstride) \
301 MMI_ULWC1(%[ftmp1], src, 0x00) \
302 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
303 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
305 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
306 MMI_ULWC1(%[ftmp1], src1, 0x00) \
307 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
308 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
309 "psubsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
311 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
312 MMI_ULWC1(%[ftmp1], src1, 0x00) \
313 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
314 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
316 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
317 MMI_ULWC1(%[ftmp1], src1, 0x00) \
318 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
319 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
320 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
322 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
324 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
325 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
326 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
328 MMI_SWC1(%[ftmp1], dst, 0x00)
331 #define PUT_VP8_EPEL8_H6_MMI(src, dst) \
332 MMI_ULDC1(%[ftmp1], src, 0x00) \
333 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
334 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
335 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
336 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
338 MMI_ULDC1(%[ftmp1], src, -0x01) \
339 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
340 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
341 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
342 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
343 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
344 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
346 MMI_ULDC1(%[ftmp1], src, -0x02) \
347 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
348 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
349 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
350 "pmullh %[ftmp3], %[ftmp3], %[filter0] \n\t" \
351 "paddsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
352 "paddsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
354 MMI_ULDC1(%[ftmp1], src, 0x01) \
355 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
356 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
357 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
358 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
360 MMI_ULDC1(%[ftmp1], src, 0x02) \
361 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
362 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
363 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
364 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
365 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
366 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
368 MMI_ULDC1(%[ftmp1], src, 0x03) \
369 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
370 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
371 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
372 "pmullh %[ftmp3], %[ftmp3], %[filter5] \n\t" \
373 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
374 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
376 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
377 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
379 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
380 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
381 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
382 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
383 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
385 MMI_SDC1(%[ftmp1], dst, 0x00)
388 #define PUT_VP8_EPEL8_H4_MMI(src, dst) \
389 MMI_ULDC1(%[ftmp1], src, 0x00) \
390 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
391 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
392 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
393 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
395 MMI_ULDC1(%[ftmp1], src, -0x01) \
396 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
397 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
398 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
399 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
400 "psubsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
401 "psubsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
403 MMI_ULDC1(%[ftmp1], src, 0x01) \
404 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
405 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
406 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
407 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
409 MMI_ULDC1(%[ftmp1], src, 0x02) \
410 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
411 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
412 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
413 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
414 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
415 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
417 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
418 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
420 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
421 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
422 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
423 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
425 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
426 MMI_SDC1(%[ftmp1], dst, 0x00)
429 #define PUT_VP8_EPEL8_V6_MMI(src, src1, dst, srcstride) \
430 MMI_ULDC1(%[ftmp1], src, 0x00) \
431 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
432 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
433 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
434 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
436 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
437 MMI_ULDC1(%[ftmp1], src1, 0x00) \
438 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
439 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
440 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
441 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
442 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
443 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
445 PTR_SUBU ""#src1", "#src1", "#srcstride" \n\t" \
446 MMI_ULDC1(%[ftmp1], src1, 0x00) \
447 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
448 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
449 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
450 "pmullh %[ftmp3], %[ftmp3], %[filter0] \n\t" \
451 "paddsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
452 "paddsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
454 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
455 MMI_ULDC1(%[ftmp1], src1, 0x00) \
456 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
457 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
458 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
459 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
461 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
462 MMI_ULDC1(%[ftmp1], src1, 0x00) \
463 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
464 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
465 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
466 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
467 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
468 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
470 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
471 MMI_ULDC1(%[ftmp1], src1, 0x00) \
472 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
473 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
474 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
475 "pmullh %[ftmp3], %[ftmp3], %[filter5] \n\t" \
476 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
477 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
479 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
480 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
482 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
483 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
484 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
485 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
486 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
488 MMI_SDC1(%[ftmp1], dst, 0x00)
491 #define PUT_VP8_EPEL8_V4_MMI(src, src1, dst, srcstride) \
492 MMI_ULDC1(%[ftmp1], src, 0x00) \
493 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
494 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
495 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
496 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
498 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
499 MMI_ULDC1(%[ftmp1], src1, 0x00) \
500 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
501 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
502 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
503 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
504 "psubsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
505 "psubsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
507 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
508 MMI_ULDC1(%[ftmp1], src1, 0x00) \
509 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
510 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
511 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
512 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
514 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
515 MMI_ULDC1(%[ftmp1], src1, 0x00) \
516 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
517 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
518 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
519 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
520 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
521 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
523 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
524 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
526 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
527 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
528 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
529 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
530 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
532 MMI_SDC1(%[ftmp1], dst, 0x00)
535 #define PUT_VP8_BILINEAR8_H_MMI(src, dst) \
536 MMI_ULDC1(%[ftmp1], src, 0x00) \
537 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
538 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
539 "pmullh %[ftmp5], %[ftmp2], %[a] \n\t" \
540 "pmullh %[ftmp6], %[ftmp3], %[a] \n\t" \
542 MMI_ULDC1(%[ftmp1], src, 0x01) \
543 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
544 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
545 "pmullh %[ftmp2], %[ftmp2], %[b] \n\t" \
546 "pmullh %[ftmp3], %[ftmp3], %[b] \n\t" \
547 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
548 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
550 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_4] \n\t" \
551 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_4] \n\t" \
552 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
553 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
555 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
556 MMI_SDC1(%[ftmp1], dst, 0x00)
559 #define PUT_VP8_BILINEAR4_H_MMI(src, dst) \
560 MMI_ULWC1(%[ftmp1], src, 0x00) \
561 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
562 "pmullh %[ftmp3], %[ftmp2], %[a] \n\t" \
564 MMI_ULWC1(%[ftmp1], src, 0x01) \
565 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
566 "pmullh %[ftmp2], %[ftmp2], %[b] \n\t" \
567 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
569 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t" \
570 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
572 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
573 MMI_SWC1(%[ftmp1], dst, 0x00)
576 #define PUT_VP8_BILINEAR8_V_MMI(src, src1, dst, sstride) \
577 MMI_ULDC1(%[ftmp1], src, 0x00) \
578 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
579 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
580 "pmullh %[ftmp5], %[ftmp2], %[c] \n\t" \
581 "pmullh %[ftmp6], %[ftmp3], %[c] \n\t" \
583 PTR_ADDU ""#src1", "#src", "#sstride" \n\t" \
584 MMI_ULDC1(%[ftmp1], src1, 0x00) \
585 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
586 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
587 "pmullh %[ftmp2], %[ftmp2], %[d] \n\t" \
588 "pmullh %[ftmp3], %[ftmp3], %[d] \n\t" \
589 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
590 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
592 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_4] \n\t" \
593 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_4] \n\t" \
594 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
595 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
597 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
598 MMI_SDC1(%[ftmp1], dst, 0x00)
601 #define PUT_VP8_BILINEAR4_V_MMI(src, src1, dst, sstride) \
602 MMI_ULWC1(%[ftmp1], src, 0x00) \
603 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
604 "pmullh %[ftmp3], %[ftmp2], %[c] \n\t" \
606 PTR_ADDU ""#src1", "#src", "#sstride" \n\t" \
607 MMI_ULWC1(%[ftmp1], src1, 0x00) \
608 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
609 "pmullh %[ftmp2], %[ftmp2], %[d] \n\t" \
610 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
612 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t" \
613 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
615 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
616 MMI_SWC1(%[ftmp1], dst, 0x00)
620 {0x0000000000000000, 0x0006000600060006, 0x007b007b007b007b,
621 0x000c000c000c000c, 0x0001000100010001, 0x0000000000000000},
623 {0x0002000200020002, 0x000b000b000b000b, 0x006c006c006c006c,
624 0x0024002400240024, 0x0008000800080008, 0x0001000100010001},
626 {0x0000000000000000, 0x0009000900090009, 0x005d005d005d005d,
627 0x0032003200320032, 0x0006000600060006, 0x0000000000000000},
629 {0x0003000300030003, 0x0010001000100010, 0x004d004d004d004d,
630 0x004d004d004d004d, 0x0010001000100010, 0x0003000300030003},
632 {0x0000000000000000, 0x0006000600060006, 0x0032003200320032,
633 0x005d005d005d005d, 0x0009000900090009, 0x0000000000000000},
635 {0x0001000100010001, 0x0008000800080008, 0x0024002400240024,
636 0x006c006c006c006c, 0x000b000b000b000b, 0x0002000200020002},
638 {0x0000000000000000, 0x0001000100010001, 0x000c000c000c000c,
639 0x007b007b007b007b, 0x0006000600060006, 0x0000000000000000}
643 #define FILTER_6TAP(src, F, stride) \
644 cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
645 F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] - \
646 F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7]
648 #define FILTER_4TAP(src, F, stride) \
649 cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
650 F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7]
653 { 0, 6, 123, 12, 1, 0 },
654 { 2, 11, 108, 36, 8, 1 },
655 { 0, 9, 93, 50, 6, 0 },
656 { 3, 16, 77, 77, 16, 3 },
657 { 0, 6, 50, 93, 9, 0 },
658 { 1, 8, 36, 108, 11, 2 },
659 { 0, 1, 12, 123, 6, 0 },
662 #define MUL_20091(a) ((((a) * 20091) >> 16) + (a))
663 #define MUL_35468(a) (((a) * 35468) >> 16)
666 #define clip_int8(n) (cm[(n) + 0x80] - 0x80)
683 f1 =
FFMIN(
a + 4, 127) >> 3;
684 f2 =
FFMIN(
a + 3, 127) >> 3;
707 f1 =
FFMIN(
a + 4, 127) >> 3;
708 f2 =
FFMIN(
a + 3, 127) >> 3;
755 a0 = (27 *
w + 63) >> 7;
756 a1 = (18 *
w + 63) >> 7;
757 a2 = (9 *
w + 63) >> 7;
786 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
795 "gsldlc1 %[q0], 0x07(%[dst]) \n\t"
796 "gsldrc1 %[q0], 0x00(%[dst]) \n\t"
797 PTR_SUBU "%[tmp0], %[dst], %[stride] \n\t"
798 "gsldlc1 %[p0], 0x07(%[tmp0]) \n\t"
799 "gsldrc1 %[p0], 0x00(%[tmp0]) \n\t"
800 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
801 "gsldlc1 %[p1], 0x07(%[tmp0]) \n\t"
802 "gsldrc1 %[p1], 0x00(%[tmp0]) \n\t"
803 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
804 "gsldlc1 %[p2], 0x07(%[tmp0]) \n\t"
805 "gsldrc1 %[p2], 0x00(%[tmp0]) \n\t"
806 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
807 "gsldlc1 %[p3], 0x07(%[tmp0]) \n\t"
808 "gsldrc1 %[p3], 0x00(%[tmp0]) \n\t"
809 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
810 "gsldlc1 %[q1], 0x07(%[tmp0]) \n\t"
811 "gsldrc1 %[q1], 0x00(%[tmp0]) \n\t"
812 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
813 "gsldlc1 %[q2], 0x07(%[tmp0]) \n\t"
814 "gsldrc1 %[q2], 0x00(%[tmp0]) \n\t"
815 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
816 "gsldlc1 %[q3], 0x07(%[tmp0]) \n\t"
817 "gsldrc1 %[q3], 0x00(%[tmp0]) \n\t"
820 "gssdlc1 %[q0], 0x07(%[dst]) \n\t"
821 "gssdrc1 %[q0], 0x00(%[dst]) \n\t"
822 PTR_SUBU "%[tmp0], %[dst], %[stride] \n\t"
823 "gssdlc1 %[p0], 0x07(%[tmp0]) \n\t"
824 "gssdrc1 %[p0], 0x00(%[tmp0]) \n\t"
825 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
826 "gssdlc1 %[p1], 0x07(%[tmp0]) \n\t"
827 "gssdrc1 %[p1], 0x00(%[tmp0]) \n\t"
828 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
829 "gssdlc1 %[p2], 0x07(%[tmp0]) \n\t"
830 "gssdrc1 %[p2], 0x00(%[tmp0]) \n\t"
831 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
832 "gssdlc1 %[q1], 0x07(%[tmp0]) \n\t"
833 "gssdrc1 %[q1], 0x00(%[tmp0]) \n\t"
834 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
835 "gssdlc1 %[q2], 0x07(%[tmp0]) \n\t"
836 "gssdrc1 %[q2], 0x00(%[tmp0]) \n\t"
837 : [p3]
"=&f"(ftmp[0]), [p2]
"=&f"(ftmp[1]),
838 [p1]
"=&f"(ftmp[2]), [p0]
"=&f"(ftmp[3]),
839 [
q0]
"=&f"(ftmp[4]), [
q1]
"=&f"(ftmp[5]),
840 [q2]
"=&f"(ftmp[6]), [q3]
"=&f"(ftmp[7]),
841 [ftmp0]
"=&f"(ftmp[8]), [ftmp1]
"=&f"(ftmp[9]),
842 [ftmp2]
"=&f"(ftmp[10]), [ftmp3]
"=&f"(ftmp[11]),
843 [
hev]
"=&f"(ftmp[12]), [
mask]
"=&f"(ftmp[13]),
844 [ftmp4]
"=&f"(ftmp[14]), [ftmp5]
"=&f"(ftmp[15]),
845 [ftmp6]
"=&f"(ftmp[16]), [ftmp7]
"=&f"(ftmp[17]),
846 [dst]
"+&r"(dst), [tmp0]
"=&r"(
tmp[0]),
856 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
860 for (
i = 0;
i < 8;
i++)
862 int hv =
hev(dst +
i * 1,
stride, hev_thresh);
871 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
880 "gsldlc1 %[p3], 0x03(%[dst]) \n\t"
881 "gsldrc1 %[p3], -0x04(%[dst]) \n\t"
882 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
883 "gsldlc1 %[p2], 0x03(%[tmp0]) \n\t"
884 "gsldrc1 %[p2], -0x04(%[tmp0]) \n\t"
885 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
886 "gsldlc1 %[p1], 0x03(%[tmp0]) \n\t"
887 "gsldrc1 %[p1], -0x04(%[tmp0]) \n\t"
888 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
889 "gsldlc1 %[p0], 0x03(%[tmp0]) \n\t"
890 "gsldrc1 %[p0], -0x04(%[tmp0]) \n\t"
891 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
892 "gsldlc1 %[q0], 0x03(%[tmp0]) \n\t"
893 "gsldrc1 %[q0], -0x04(%[tmp0]) \n\t"
894 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
895 "gsldlc1 %[q1], 0x03(%[tmp0]) \n\t"
896 "gsldrc1 %[q1], -0x04(%[tmp0]) \n\t"
897 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
898 "gsldlc1 %[q2], 0x03(%[tmp0]) \n\t"
899 "gsldrc1 %[q2], -0x04(%[tmp0]) \n\t"
900 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
901 "gsldlc1 %[q3], 0x03(%[tmp0]) \n\t"
902 "gsldrc1 %[q3], -0x04(%[tmp0]) \n\t"
905 %[
q0], %[
q1], %[q2], %[q3],
906 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
910 %[
q0], %[
q1], %[q2], %[q3],
911 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
913 "gssdlc1 %[p3], 0x03(%[dst]) \n\t"
914 "gssdrc1 %[p3], -0x04(%[dst]) \n\t"
915 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
916 "gssdlc1 %[p2], 0x03(%[dst]) \n\t"
917 "gssdrc1 %[p2], -0x04(%[dst]) \n\t"
918 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
919 "gssdlc1 %[p1], 0x03(%[dst]) \n\t"
920 "gssdrc1 %[p1], -0x04(%[dst]) \n\t"
921 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
922 "gssdlc1 %[p0], 0x03(%[dst]) \n\t"
923 "gssdrc1 %[p0], -0x04(%[dst]) \n\t"
924 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
925 "gssdlc1 %[q0], 0x03(%[dst]) \n\t"
926 "gssdrc1 %[q0], -0x04(%[dst]) \n\t"
927 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
928 "gssdlc1 %[q1], 0x03(%[dst]) \n\t"
929 "gssdrc1 %[q1], -0x04(%[dst]) \n\t"
930 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
931 "gssdlc1 %[q2], 0x03(%[dst]) \n\t"
932 "gssdrc1 %[q2], -0x04(%[dst]) \n\t"
933 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
934 "gssdlc1 %[q3], 0x03(%[dst]) \n\t"
935 "gssdrc1 %[q3], -0x04(%[dst]) \n\t"
936 : [p3]
"=&f"(ftmp[0]), [p2]
"=&f"(ftmp[1]),
937 [p1]
"=&f"(ftmp[2]), [p0]
"=&f"(ftmp[3]),
938 [
q0]
"=&f"(ftmp[4]), [
q1]
"=&f"(ftmp[5]),
939 [q2]
"=&f"(ftmp[6]), [q3]
"=&f"(ftmp[7]),
940 [ftmp0]
"=&f"(ftmp[8]), [ftmp1]
"=&f"(ftmp[9]),
941 [ftmp2]
"=&f"(ftmp[10]), [ftmp3]
"=&f"(ftmp[11]),
942 [
hev]
"=&f"(ftmp[12]), [
mask]
"=&f"(ftmp[13]),
943 [ftmp4]
"=&f"(ftmp[14]), [ftmp5]
"=&f"(ftmp[15]),
944 [ftmp6]
"=&f"(ftmp[16]), [ftmp7]
"=&f"(ftmp[17]),
945 [dst]
"+&r"(dst), [tmp0]
"=&r"(
tmp[0]),
955 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
959 for (
i = 0;
i < 8;
i++)
961 int hv =
hev(dst +
i *
stride, 1, hev_thresh);
976 MMI_LDC1(%[ftmp0], %[
dc], 0x00)
977 MMI_LDC1(%[ftmp1], %[
dc], 0x08)
978 MMI_LDC1(%[ftmp2], %[
dc], 0x10)
979 MMI_LDC1(%[ftmp3], %[
dc], 0x18)
980 "paddsh %[ftmp4], %[ftmp0], %[ftmp3] \n\t"
981 "psubsh %[ftmp5], %[ftmp0], %[ftmp3] \n\t"
982 "paddsh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
983 "psubsh %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
984 "paddsh %[ftmp0], %[ftmp4], %[ftmp6] \n\t"
985 "paddsh %[ftmp1], %[ftmp5], %[ftmp7] \n\t"
986 "psubsh %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
987 "psubsh %[ftmp3], %[ftmp5], %[ftmp7] \n\t"
988 MMI_SDC1(%[ftmp0], %[
dc], 0x00)
989 MMI_SDC1(%[ftmp1], %[
dc], 0x08)
990 MMI_SDC1(%[ftmp2], %[
dc], 0x10)
991 MMI_SDC1(%[ftmp3], %[
dc], 0x18)
992 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
993 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
994 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
995 [ftmp6]
"=&f"(ftmp[6]),
997 [ftmp7]
"=&f"(ftmp[7])
1012 block[2][0][0] = (
dc[8] +
dc[11] + 3 +
dc[9] +
dc[10]) >> 3;
1013 block[2][1][0] = (
dc[8] -
dc[11] + 3 +
dc[9] -
dc[10]) >> 3;
1014 block[2][2][0] = (
dc[8] +
dc[11] + 3 -
dc[9] -
dc[10]) >> 3;
1015 block[2][3][0] = (
dc[8] -
dc[11] + 3 -
dc[9] +
dc[10]) >> 3;
1017 block[3][0][0] = (
dc[12] +
dc[15] + 3 +
dc[13] +
dc[14]) >> 3;
1018 block[3][1][0] = (
dc[12] -
dc[15] + 3 +
dc[13] -
dc[14]) >> 3;
1019 block[3][2][0] = (
dc[12] +
dc[15] + 3 -
dc[13] -
dc[14]) >> 3;
1020 block[3][3][0] = (
dc[12] -
dc[15] + 3 -
dc[13] +
dc[14]) >> 3;
1023 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1024 MMI_SDC1(%[ftmp0], %[
dc], 0x00)
1025 MMI_SDC1(%[ftmp0], %[
dc], 0x08)
1026 MMI_SDC1(%[ftmp0], %[
dc], 0x10)
1027 MMI_SDC1(%[ftmp0], %[
dc], 0x18)
1028 : RESTRICT_ASM_ALL64
1029 [ftmp0]
"=&f"(ftmp[0])
1034 int t00, t01, t02, t03,
t10,
t11,
t12, t13, t20, t21, t22, t23, t30, t31, t32, t33;
1036 t00 =
dc[0] +
dc[12];
1038 t20 =
dc[2] +
dc[14];
1039 t30 =
dc[3] +
dc[15];
1041 t03 =
dc[0] -
dc[12];
1042 t13 =
dc[1] -
dc[13];
1043 t23 =
dc[2] -
dc[14];
1044 t33 =
dc[3] -
dc[15];
1046 t01 =
dc[4] +
dc[ 8];
1048 t21 =
dc[6] +
dc[10];
1049 t31 =
dc[7] +
dc[11];
1051 t02 =
dc[4] -
dc[ 8];
1053 t22 =
dc[6] -
dc[10];
1054 t32 =
dc[7] -
dc[11];
1086 block[2][0][0] = (
dc[8] +
dc[11] + 3 +
dc[9] +
dc[10]) >> 3;
1087 block[2][1][0] = (
dc[8] -
dc[11] + 3 +
dc[9] -
dc[10]) >> 3;
1088 block[2][2][0] = (
dc[8] +
dc[11] + 3 -
dc[9] -
dc[10]) >> 3;
1089 block[2][3][0] = (
dc[8] -
dc[11] + 3 -
dc[9] +
dc[10]) >> 3;
1091 block[3][0][0] = (
dc[12] +
dc[15] + 3 +
dc[13] +
dc[14]) >> 3;
1092 block[3][1][0] = (
dc[12] -
dc[15] + 3 +
dc[13] -
dc[14]) >> 3;
1093 block[3][2][0] = (
dc[12] +
dc[15] + 3 -
dc[13] -
dc[14]) >> 3;
1094 block[3][3][0] = (
dc[12] -
dc[15] + 3 -
dc[13] +
dc[14]) >> 3;
1105 int val = (
dc[0] + 3) >> 3;
1130 DECLARE_ALIGNED(8,
const uint64_t, ff_ph_4e7b) = {0x4e7b4e7b4e7b4e7bULL};
1131 DECLARE_ALIGNED(8,
const uint64_t, ff_ph_22a3) = {0x22a322a322a322a3ULL};
1138 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1139 MMI_LDC1(%[ftmp1], %[
block], 0x00)
1140 MMI_LDC1(%[ftmp2], %[
block], 0x08)
1141 MMI_LDC1(%[ftmp3], %[
block], 0x10)
1142 MMI_LDC1(%[ftmp4], %[
block], 0x18)
1144 "li %[tmp0], 0x02 \n\t"
1145 "mtc1 %[tmp0], %[ftmp11] \n\t"
1148 "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t"
1150 "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t"
1152 "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t"
1153 "pmulhh %[ftmp7], %[ftmp9], %[ff_ph_22a3] \n\t"
1155 "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t"
1156 "pmulhh %[ftmp8], %[ftmp9], %[ff_ph_22a3] \n\t"
1158 "pmulhh %[ftmp9], %[ftmp2], %[ff_ph_4e7b] \n\t"
1159 "paddh %[ftmp9], %[ftmp9], %[ftmp2] \n\t"
1161 "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t"
1162 "paddh %[ftmp10], %[ftmp10], %[ftmp4] \n\t"
1165 "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t"
1166 "paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
1168 "paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t"
1169 "psubh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
1171 "psubh %[ftmp3], %[ftmp6], %[ftmp8] \n\t"
1172 "paddh %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
1174 "psubh %[ftmp4], %[ftmp5], %[ftmp7] \n\t"
1175 "psubh %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
1177 MMI_SDC1(%[ftmp0], %[
block], 0x00)
1178 MMI_SDC1(%[ftmp0], %[
block], 0x08)
1179 MMI_SDC1(%[ftmp0], %[
block], 0x10)
1180 MMI_SDC1(%[ftmp0], %[
block], 0x18)
1183 %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
1186 "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t"
1188 "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t"
1190 "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t"
1191 "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t"
1192 "psubh %[ftmp7], %[ftmp9], %[ftmp4] \n\t"
1193 "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t"
1194 "psubh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1196 "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t"
1197 "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t"
1198 "paddh %[ftmp8], %[ftmp9], %[ftmp2] \n\t"
1199 "pmulhh %[ftmp10], %[ftmp2], %[ff_ph_4e7b] \n\t"
1200 "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1202 "li %[tmp0], 0x03 \n\t"
1203 "mtc1 %[tmp0], %[ftmp11] \n\t"
1204 "paddh %[ftmp1], %[ftmp5], %[ftmp8] \n\t"
1205 "paddh %[ftmp1], %[ftmp1], %[ff_pw_4] \n\t"
1206 "psrah %[ftmp1], %[ftmp1], %[ftmp11] \n\t"
1207 "paddh %[ftmp2], %[ftmp6], %[ftmp7] \n\t"
1208 "paddh %[ftmp2], %[ftmp2], %[ff_pw_4] \n\t"
1209 "psrah %[ftmp2], %[ftmp2], %[ftmp11] \n\t"
1210 "psubh %[ftmp3], %[ftmp6], %[ftmp7] \n\t"
1211 "paddh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t"
1212 "psrah %[ftmp3], %[ftmp3], %[ftmp11] \n\t"
1213 "psubh %[ftmp4], %[ftmp5], %[ftmp8] \n\t"
1214 "paddh %[ftmp4], %[ftmp4], %[ff_pw_4] \n\t"
1215 "psrah %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
1218 %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
1220 MMI_LWC1(%[ftmp5], %[dst0], 0x00)
1221 MMI_LWC1(%[ftmp6], %[dst1], 0x00)
1222 MMI_LWC1(%[ftmp7], %[dst2], 0x00)
1223 MMI_LWC1(%[ftmp8], %[dst3], 0x00)
1225 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1226 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1227 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1228 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1230 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1231 "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1232 "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1233 "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1235 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1236 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1237 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1238 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1240 MMI_SWC1(%[ftmp1], %[dst0], 0x00)
1241 MMI_SWC1(%[ftmp2], %[dst1], 0x00)
1242 MMI_SWC1(%[ftmp3], %[dst2], 0x00)
1243 MMI_SWC1(%[ftmp4], %[dst3], 0x00)
1244 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1245 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1246 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1247 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1248 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1249 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
1253 : [dst0]
"r"(dst), [dst1]
"r"(dst+
stride),
1256 [ff_ph_4e7b]
"f"(ff_ph_4e7b), [ff_ph_22a3]
"f"(ff_ph_22a3)
1263 for (
i = 0;
i < 4;
i++) {
1279 for (
i = 0;
i < 4;
i++) {
1282 t2 = MUL_35468(
tmp[4 +
i]) - MUL_20091(
tmp[12 +
i]);
1283 t3 = MUL_20091(
tmp[4 +
i]) + MUL_35468(
tmp[12 +
i]);
1304 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1305 "mtc1 %[dc], %[ftmp5] \n\t"
1306 MMI_LWC1(%[ftmp1], %[dst0], 0x00)
1307 MMI_LWC1(%[ftmp2], %[dst1], 0x00)
1308 MMI_LWC1(%[ftmp3], %[dst2], 0x00)
1309 MMI_LWC1(%[ftmp4], %[dst3], 0x00)
1310 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1311 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1312 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1313 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1314 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1315 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1316 "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1317 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1318 "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1319 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1320 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1321 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1322 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1323 MMI_SWC1(%[ftmp1], %[dst0], 0x00)
1324 MMI_SWC1(%[ftmp2], %[dst1], 0x00)
1325 MMI_SWC1(%[ftmp3], %[dst2], 0x00)
1326 MMI_SWC1(%[ftmp4], %[dst3], 0x00)
1327 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1328 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1329 [ftmp4]
"=&f"(ftmp[4]),
1331 [ftmp5]
"=&f"(ftmp[5])
1332 : [dst0]
"r"(dst), [dst1]
"r"(dst+
stride),
1342 for (
i = 0;
i < 4;
i++) {
1372 int flim_I,
int hev_thresh)
1379 int flim_I,
int hev_thresh)
1387 int flim_E,
int flim_I,
int hev_thresh)
1394 int flim_E,
int flim_I,
int hev_thresh)
1402 int flim_E,
int flim_I,
int hev_thresh)
1406 for (
i = 0;
i < 16;
i++)
1408 int hv =
hev(dst +
i * 1,
stride, hev_thresh);
1417 int flim_E,
int flim_I,
int hev_thresh)
1421 for (
i = 0;
i < 16;
i++)
1423 int hv =
hev(dst +
i *
stride, 1, hev_thresh);
1432 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
1439 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
1449 for (
i = 0;
i < 16;
i++)
1458 for (
i = 0;
i < 16;
i++)
1464 ptrdiff_t srcstride,
int h,
int x,
int y)
1474 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t"
1475 MMI_ULDC1(%[ftmp0], %[
src], 0x00)
1476 "ldl %[tmp0], 0x0f(%[src]) \n\t"
1477 "ldr %[tmp0], 0x08(%[src]) \n\t"
1478 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
1479 "ldl %[tmp1], 0x0f(%[addr0]) \n\t"
1480 "ldr %[tmp1], 0x08(%[addr0]) \n\t"
1481 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1482 MMI_SDC1(%[ftmp0], %[dst], 0x00)
1483 "sdl %[tmp0], 0x0f(%[dst]) \n\t"
1484 "sdr %[tmp0], 0x08(%[dst]) \n\t"
1485 "addiu %[h], %[h], -0x02 \n\t"
1486 MMI_SDC1(%[ftmp1], %[addr1], 0x00)
1487 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t"
1488 "sdl %[tmp1], 0x0f(%[addr1]) \n\t"
1489 "sdr %[tmp1], 0x08(%[addr1]) \n\t"
1490 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1491 "bnez %[h], 1b \n\t"
1492 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1493 [tmp0]
"=&r"(
tmp[0]), [tmp1]
"=&r"(
tmp[1]),
1495 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1496 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
1498 : [dststride]
"r"((
mips_reg)dststride),
1499 [srcstride]
"r"((
mips_reg)srcstride)
1505 for (
i = 0;
i <
h;
i++, dst += dststride,
src += srcstride)
1506 memcpy(dst,
src, 16);
1511 ptrdiff_t srcstride,
int h,
int x,
int y)
1521 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t"
1522 MMI_ULDC1(%[ftmp0], %[
src], 0x00)
1523 "ldl %[tmp0], 0x07(%[addr0]) \n\t"
1524 "ldr %[tmp0], 0x00(%[addr0]) \n\t"
1525 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1526 MMI_SDC1(%[ftmp0], %[dst], 0x00)
1527 "addiu %[h], %[h], -0x02 \n\t"
1528 "sdl %[tmp0], 0x07(%[addr1]) \n\t"
1529 "sdr %[tmp0], 0x00(%[addr1]) \n\t"
1530 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t"
1531 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1532 "bnez %[h], 1b \n\t"
1533 : [ftmp0]
"=&f"(ftmp[0]), [tmp0]
"=&r"(
tmp[0]),
1535 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1536 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
1538 : [dststride]
"r"((
mips_reg)dststride),
1539 [srcstride]
"r"((
mips_reg)srcstride)
1545 for (
i = 0;
i <
h;
i++, dst += dststride,
src += srcstride)
1546 memcpy(dst,
src, 8);
1551 ptrdiff_t srcstride,
int h,
int x,
int y)
1561 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t"
1562 MMI_LWC1(%[ftmp0], %[
src], 0x00)
1563 "lwl %[tmp0], 0x03(%[addr0]) \n\t"
1564 "lwr %[tmp0], 0x00(%[addr0]) \n\t"
1565 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1566 MMI_SWC1(%[ftmp0], %[dst], 0x00)
1567 "addiu %[h], %[h], -0x02 \n\t"
1568 "swl %[tmp0], 0x03(%[addr1]) \n\t"
1569 "swr %[tmp0], 0x00(%[addr1]) \n\t"
1570 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t"
1571 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1572 "bnez %[h], 1b \n\t"
1573 : [ftmp0]
"=&f"(ftmp[0]), [tmp0]
"=&r"(
tmp[0]),
1575 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1576 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
1578 : [dststride]
"r"((
mips_reg)dststride),
1579 [srcstride]
"r"((
mips_reg)srcstride)
1585 for (
i = 0;
i <
h;
i++, dst += dststride,
src += srcstride)
1586 memcpy(dst,
src, 4);
1591 ptrdiff_t srcstride,
int h,
int mx,
int my)
1620 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1621 "li %[tmp0], 0x07 \n\t"
1622 "mtc1 %[tmp0], %[ftmp4] \n\t"
1632 "addiu %[h], %[h], -0x01 \n\t"
1633 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1634 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1635 "bnez %[h], 1b \n\t"
1636 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1637 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1638 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1639 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1640 [ftmp8]
"=&f"(ftmp[8]),
1641 [tmp0]
"=&r"(
tmp[0]),
1643 [dst1]
"=&r"(dst1), [
src1]
"=&r"(
src1),
1645 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1647 [srcstride]
"r"((
mips_reg)srcstride),
1648 [dststride]
"r"((
mips_reg)dststride),
1658 for (y = 0; y <
h; y++) {
1659 for (x = 0; x < 16; x++)
1668 ptrdiff_t srcstride,
int h,
int mx,
int my)
1687 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1688 "li %[tmp0], 0x07 \n\t"
1689 "mtc1 %[tmp0], %[ftmp4] \n\t"
1694 "addiu %[h], %[h], -0x01 \n\t"
1695 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1696 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1697 "bnez %[h], 1b \n\t"
1698 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1699 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1700 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1701 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1702 [ftmp8]
"=&f"(ftmp[8]),
1703 [tmp0]
"=&r"(
tmp[0]),
1706 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1708 [srcstride]
"r"((
mips_reg)srcstride),
1709 [dststride]
"r"((
mips_reg)dststride),
1719 for (y = 0; y <
h; y++) {
1720 for (x = 0; x < 8; x++)
1729 ptrdiff_t srcstride,
int h,
int mx,
int my)
1744 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1745 "li %[tmp0], 0x07 \n\t"
1746 "mtc1 %[tmp0], %[ftmp4] \n\t"
1751 "addiu %[h], %[h], -0x01 \n\t"
1752 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1753 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1754 "bnez %[h], 1b \n\t"
1755 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1756 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1757 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1758 [tmp0]
"=&r"(
tmp[0]),
1761 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1763 [srcstride]
"r"((
mips_reg)srcstride),
1764 [dststride]
"r"((
mips_reg)dststride),
1774 for (y = 0; y <
h; y++) {
1775 for (x = 0; x < 4; x++)
1784 ptrdiff_t srcstride,
int h,
int mx,
int my)
1813 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1814 "li %[tmp0], 0x07 \n\t"
1815 "mtc1 %[tmp0], %[ftmp4] \n\t"
1825 "addiu %[h], %[h], -0x01 \n\t"
1826 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1827 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1828 "bnez %[h], 1b \n\t"
1829 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1830 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1831 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1832 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1833 [ftmp8]
"=&f"(ftmp[8]),
1834 [tmp0]
"=&r"(
tmp[0]),
1836 [dst1]
"=&r"(dst1), [
src1]
"=&r"(
src1),
1838 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1840 [srcstride]
"r"((
mips_reg)srcstride),
1841 [dststride]
"r"((
mips_reg)dststride),
1852 for (y = 0; y <
h; y++) {
1853 for (x = 0; x < 16; x++)
1862 ptrdiff_t srcstride,
int h,
int mx,
int my)
1881 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1882 "li %[tmp0], 0x07 \n\t"
1883 "mtc1 %[tmp0], %[ftmp4] \n\t"
1888 "addiu %[h], %[h], -0x01 \n\t"
1889 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1890 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1891 "bnez %[h], 1b \n\t"
1892 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1893 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1894 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1895 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1896 [ftmp8]
"=&f"(ftmp[8]),
1897 [tmp0]
"=&r"(
tmp[0]),
1900 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1902 [srcstride]
"r"((
mips_reg)srcstride),
1903 [dststride]
"r"((
mips_reg)dststride),
1914 for (y = 0; y <
h; y++) {
1915 for (x = 0; x < 8; x++)
1924 ptrdiff_t srcstride,
int h,
int mx,
int my)
1939 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1940 "li %[tmp0], 0x07 \n\t"
1941 "mtc1 %[tmp0], %[ftmp4] \n\t"
1946 "addiu %[h], %[h], -0x01 \n\t"
1947 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1948 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1949 "bnez %[h], 1b \n\t"
1950 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1951 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1952 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1953 [tmp0]
"=&r"(
tmp[0]),
1956 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1958 [srcstride]
"r"((
mips_reg)srcstride),
1959 [dststride]
"r"((
mips_reg)dststride),
1970 for (y = 0; y <
h; y++) {
1971 for (x = 0; x < 4; x++)
1980 ptrdiff_t srcstride,
int h,
int mx,
int my)
2009 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2010 "li %[tmp0], 0x07 \n\t"
2011 "mtc1 %[tmp0], %[ftmp4] \n\t"
2021 "addiu %[h], %[h], -0x01 \n\t"
2022 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2023 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2024 "bnez %[h], 1b \n\t"
2025 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2026 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2027 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2028 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2029 [ftmp8]
"=&f"(ftmp[8]),
2030 [tmp0]
"=&r"(
tmp[0]),
2032 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
2035 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2037 [srcstride]
"r"((
mips_reg)srcstride),
2038 [dststride]
"r"((
mips_reg)dststride),
2048 for (y = 0; y <
h; y++) {
2049 for (x = 0; x < 16; x++)
2058 ptrdiff_t srcstride,
int h,
int mx,
int my)
2078 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2079 "li %[tmp0], 0x07 \n\t"
2080 "mtc1 %[tmp0], %[ftmp4] \n\t"
2085 "addiu %[h], %[h], -0x01 \n\t"
2086 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2087 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2088 "bnez %[h], 1b \n\t"
2089 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2090 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2091 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2092 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2093 [ftmp8]
"=&f"(ftmp[8]),
2094 [tmp0]
"=&r"(
tmp[0]),
2098 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2100 [srcstride]
"r"((
mips_reg)srcstride),
2101 [dststride]
"r"((
mips_reg)dststride),
2111 for (y = 0; y <
h; y++) {
2112 for (x = 0; x < 8; x++)
2121 ptrdiff_t srcstride,
int h,
int mx,
int my)
2137 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2138 "li %[tmp0], 0x07 \n\t"
2139 "mtc1 %[tmp0], %[ftmp4] \n\t"
2144 "addiu %[h], %[h], -0x01 \n\t"
2145 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2146 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2147 "bnez %[h], 1b \n\t"
2148 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2149 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2150 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2151 [tmp0]
"=&r"(
tmp[0]),
2155 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2157 [srcstride]
"r"((
mips_reg)srcstride),
2158 [dststride]
"r"((
mips_reg)dststride),
2168 for (y = 0; y <
h; y++) {
2169 for (x = 0; x < 4; x++)
2178 ptrdiff_t srcstride,
int h,
int mx,
int my)
2207 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2208 "li %[tmp0], 0x07 \n\t"
2209 "mtc1 %[tmp0], %[ftmp4] \n\t"
2219 "addiu %[h], %[h], -0x01 \n\t"
2220 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2221 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2222 "bnez %[h], 1b \n\t"
2223 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2224 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2225 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2226 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2227 [ftmp8]
"=&f"(ftmp[8]),
2228 [tmp0]
"=&r"(
tmp[0]),
2230 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
2233 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2235 [srcstride]
"r"((
mips_reg)srcstride),
2236 [dststride]
"r"((
mips_reg)dststride),
2247 for (y = 0; y <
h; y++) {
2248 for (x = 0; x < 16; x++)
2257 ptrdiff_t srcstride,
int h,
int mx,
int my)
2277 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2278 "li %[tmp0], 0x07 \n\t"
2279 "mtc1 %[tmp0], %[ftmp4] \n\t"
2284 "addiu %[h], %[h], -0x01 \n\t"
2285 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2286 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2287 "bnez %[h], 1b \n\t"
2288 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2289 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2290 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2291 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2292 [ftmp8]
"=&f"(ftmp[8]),
2293 [tmp0]
"=&r"(
tmp[0]),
2297 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2299 [srcstride]
"r"((
mips_reg)srcstride),
2300 [dststride]
"r"((
mips_reg)dststride),
2311 for (y = 0; y <
h; y++) {
2312 for (x = 0; x < 8; x++)
2321 ptrdiff_t srcstride,
int h,
int mx,
int my)
2337 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2338 "li %[tmp0], 0x07 \n\t"
2339 "mtc1 %[tmp0], %[ftmp4] \n\t"
2344 "addiu %[h], %[h], -0x01 \n\t"
2345 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2346 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2347 "bnez %[h], 1b \n\t"
2348 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2349 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2350 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2351 [tmp0]
"=&r"(
tmp[0]),
2355 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2357 [srcstride]
"r"((
mips_reg)srcstride),
2358 [dststride]
"r"((
mips_reg)dststride),
2369 for (y = 0; y <
h; y++) {
2370 for (x = 0; x < 4; x++)
2379 ptrdiff_t srcstride,
int h,
int mx,
int my)
2387 tmp = tmp_array + 16;
2398 for (y = 0; y <
h + 3; y++) {
2399 for (x = 0; x < 16; x++)
2405 tmp = tmp_array + 16;
2408 for (y = 0; y <
h; y++) {
2409 for (x = 0; x < 16; x++)
2418 ptrdiff_t srcstride,
int h,
int mx,
int my)
2426 tmp = tmp_array + 8;
2437 for (y = 0; y <
h + 3; y++) {
2438 for (x = 0; x < 8; x++)
2444 tmp = tmp_array + 8;
2447 for (y = 0; y <
h; y++) {
2448 for (x = 0; x < 8; x++)
2457 ptrdiff_t srcstride,
int h,
int mx,
int my)
2465 tmp = tmp_array + 4;
2476 for (y = 0; y <
h + 3; y++) {
2477 for (x = 0; x < 4; x++)
2482 tmp = tmp_array + 4;
2485 for (y = 0; y <
h; y++) {
2486 for (x = 0; x < 4; x++)
2495 ptrdiff_t srcstride,
int h,
int mx,
int my)
2501 src -= 2 * srcstride;
2503 tmp = tmp_array + 32;
2512 src -= 2 * srcstride;
2514 for (y = 0; y <
h + 5; y++) {
2515 for (x = 0; x < 16; x++)
2521 tmp = tmp_array + 32;
2524 for (y = 0; y <
h; y++) {
2525 for (x = 0; x < 16; x++)
2534 ptrdiff_t srcstride,
int h,
int mx,
int my)
2540 src -= 2 * srcstride;
2542 tmp = tmp_array + 16;
2551 src -= 2 * srcstride;
2553 for (y = 0; y <
h + 5; y++) {
2554 for (x = 0; x < 8; x++)
2560 tmp = tmp_array + 16;
2563 for (y = 0; y <
h; y++) {
2564 for (x = 0; x < 8; x++)
2573 ptrdiff_t srcstride,
int h,
int mx,
int my)
2579 src -= 2 * srcstride;
2581 tmp = tmp_array + 8;
2590 src -= 2 * srcstride;
2592 for (y = 0; y <
h + 5; y++) {
2593 for (x = 0; x < 4; x++)
2599 tmp = tmp_array + 8;
2602 for (y = 0; y <
h; y++) {
2603 for (x = 0; x < 4; x++)
2612 ptrdiff_t srcstride,
int h,
int mx,
int my)
2620 tmp = tmp_array + 16;
2631 for (y = 0; y <
h + 3; y++) {
2632 for (x = 0; x < 16; x++)
2638 tmp = tmp_array + 16;
2641 for (y = 0; y <
h; y++) {
2642 for (x = 0; x < 16; x++)
2651 ptrdiff_t srcstride,
int h,
int mx,
int my)
2659 tmp = tmp_array + 8;
2670 for (y = 0; y <
h + 3; y++) {
2671 for (x = 0; x < 8; x++)
2677 tmp = tmp_array + 8;
2680 for (y = 0; y <
h; y++) {
2681 for (x = 0; x < 8; x++)
2690 ptrdiff_t srcstride,
int h,
int mx,
int my)
2698 tmp = tmp_array + 4;
2709 for (y = 0; y <
h + 3; y++) {
2710 for (x = 0; x < 4; x++)
2716 tmp = tmp_array + 4;
2719 for (y = 0; y <
h; y++) {
2720 for (x = 0; x < 4; x++)
2729 ptrdiff_t srcstride,
int h,
int mx,
int my)
2735 src -= 2 * srcstride;
2737 tmp = tmp_array + 32;
2746 src -= 2 * srcstride;
2748 for (y = 0; y <
h + 5; y++) {
2749 for (x = 0; x < 16; x++)
2755 tmp = tmp_array + 32;
2758 for (y = 0; y <
h; y++) {
2759 for (x = 0; x < 16; x++)
2768 ptrdiff_t srcstride,
int h,
int mx,
int my)
2774 src -= 2 * srcstride;
2776 tmp = tmp_array + 16;
2785 src -= 2 * srcstride;
2787 for (y = 0; y <
h + 5; y++) {
2788 for (x = 0; x < 8; x++)
2794 tmp = tmp_array + 16;
2797 for (y = 0; y <
h; y++) {
2798 for (x = 0; x < 8; x++)
2807 ptrdiff_t srcstride,
int h,
int mx,
int my)
2813 src -= 2 * srcstride;
2815 tmp = tmp_array + 8;
2824 src -= 2 * srcstride;
2826 for (y = 0; y <
h + 5; y++) {
2827 for (x = 0; x < 4; x++)
2833 tmp = tmp_array + 8;
2836 for (y = 0; y <
h; y++) {
2837 for (x = 0; x < 4; x++)
2846 ptrdiff_t sstride,
int h,
int mx,
int my)
2849 int a = 8 - mx,
b = mx;
2875 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2876 "li %[tmp0], 0x03 \n\t"
2877 "mtc1 %[tmp0], %[ftmp4] \n\t"
2878 "pshufh %[a], %[a], %[ftmp0] \n\t"
2879 "pshufh %[b], %[b], %[ftmp0] \n\t"
2889 "addiu %[h], %[h], -0x01 \n\t"
2890 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
2891 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
2892 "bnez %[h], 1b \n\t"
2893 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2894 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2895 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2896 [ftmp6]
"=&f"(ftmp[6]),
2897 [tmp0]
"=&r"(
tmp[0]),
2899 [dst0]
"=&r"(dst0), [
src0]
"=&r"(
src0),
2901 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
2902 [
a]
"+&f"(
a), [
b]
"+&f"(
b)
2909 int a = 8 - mx,
b = mx;
2912 for (y = 0; y <
h; y++) {
2913 for (x = 0; x < 16; x++)
2914 dst[x] = (
a *
src[x] +
b *
src[x + 1] + 4) >> 3;
2922 ptrdiff_t sstride,
int h,
int mx,
int my)
2925 int c = 8 - my, d = my;
2942 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2943 "li %[tmp0], 0x03 \n\t"
2944 "mtc1 %[tmp0], %[ftmp4] \n\t"
2945 "pshufh %[c], %[c], %[ftmp0] \n\t"
2946 "pshufh %[d], %[d], %[ftmp0] \n\t"
2956 "addiu %[h], %[h], -0x01 \n\t"
2957 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
2958 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
2959 "bnez %[h], 1b \n\t"
2960 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2961 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2962 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2963 [ftmp6]
"=&f"(ftmp[6]),
2964 [tmp0]
"=&r"(
tmp[0]),
2966 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
2969 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
2970 [
c]
"+&f"(
c), [d]
"+&f"(d)
2977 int c = 8 - my, d = my;
2980 for (y = 0; y <
h; y++) {
2981 for (x = 0; x < 16; x++)
2982 dst[x] = (
c *
src[x] + d *
src[x + sstride] + 4) >> 3;
2990 ptrdiff_t sstride,
int h,
int mx,
int my)
2999 int a = 8 - mx,
b = mx;
3000 int c = 8 - my, d = my;
3005 for (y = 0; y <
h + 1; y++) {
3006 for (x = 0; x < 16; x++)
3014 for (y = 0; y <
h; y++) {
3015 for (x = 0; x < 16; x++)
3016 dst[x] = (
c *
tmp[x] + d *
tmp[x + 16] + 4) >> 3;
3024 ptrdiff_t sstride,
int h,
int mx,
int my)
3027 int a = 8 - mx,
b = mx;
3043 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3044 "li %[tmp0], 0x03 \n\t"
3045 "mtc1 %[tmp0], %[ftmp4] \n\t"
3046 "pshufh %[a], %[a], %[ftmp0] \n\t"
3047 "pshufh %[b], %[b], %[ftmp0] \n\t"
3052 "addiu %[h], %[h], -0x01 \n\t"
3053 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3054 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3055 "bnez %[h], 1b \n\t"
3056 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3057 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3058 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
3059 [ftmp6]
"=&f"(ftmp[6]),
3060 [tmp0]
"=&r"(
tmp[0]),
3063 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3064 [
a]
"+&f"(
a), [
b]
"+&f"(
b)
3071 int a = 8 - mx,
b = mx;
3074 for (y = 0; y <
h; y++) {
3075 for (x = 0; x < 8; x++)
3076 dst[x] = (
a *
src[x] +
b *
src[x + 1] + 4) >> 3;
3084 ptrdiff_t sstride,
int h,
int mx,
int my)
3087 int c = 8 - my, d = my;
3104 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3105 "li %[tmp0], 0x03 \n\t"
3106 "mtc1 %[tmp0], %[ftmp4] \n\t"
3107 "pshufh %[c], %[c], %[ftmp0] \n\t"
3108 "pshufh %[d], %[d], %[ftmp0] \n\t"
3113 "addiu %[h], %[h], -0x01 \n\t"
3114 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3115 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3116 "bnez %[h], 1b \n\t"
3117 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3118 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3119 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
3120 [ftmp6]
"=&f"(ftmp[6]),
3121 [tmp0]
"=&r"(
tmp[0]),
3125 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3126 [
c]
"+&f"(
c), [d]
"+&f"(d)
3133 int c = 8 - my, d = my;
3136 for (y = 0; y <
h; y++) {
3137 for (x = 0; x < 8; x++)
3138 dst[x] = (
c *
src[x] + d *
src[x + sstride] + 4) >> 3;
3146 ptrdiff_t sstride,
int h,
int mx,
int my)
3155 int a = 8 - mx,
b = mx;
3156 int c = 8 - my, d = my;
3161 for (y = 0; y <
h + 1; y++) {
3162 for (x = 0; x < 8; x++)
3170 for (y = 0; y <
h; y++) {
3171 for (x = 0; x < 8; x++)
3172 dst[x] = (
c *
tmp[x] + d *
tmp[x + 8] + 4) >> 3;
3180 ptrdiff_t sstride,
int h,
int mx,
int my)
3183 int a = 8 - mx,
b = mx;
3196 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3197 "li %[tmp0], 0x03 \n\t"
3198 "mtc1 %[tmp0], %[ftmp4] \n\t"
3199 "pshufh %[a], %[a], %[ftmp0] \n\t"
3200 "pshufh %[b], %[b], %[ftmp0] \n\t"
3205 "addiu %[h], %[h], -0x01 \n\t"
3206 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3207 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3208 "bnez %[h], 1b \n\t"
3209 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3210 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3211 [ftmp4]
"=&f"(ftmp[4]),
3212 [tmp0]
"=&r"(
tmp[0]),
3216 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3217 [
a]
"+&f"(
a), [
b]
"+&f"(
b)
3224 int a = 8 - mx,
b = mx;
3227 for (y = 0; y <
h; y++) {
3228 for (x = 0; x < 4; x++)
3229 dst[x] = (
a *
src[x] +
b *
src[x + 1] + 4) >> 3;
3237 ptrdiff_t sstride,
int h,
int mx,
int my)
3240 int c = 8 - my, d = my;
3254 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3255 "li %[tmp0], 0x03 \n\t"
3256 "mtc1 %[tmp0], %[ftmp4] \n\t"
3257 "pshufh %[c], %[c], %[ftmp0] \n\t"
3258 "pshufh %[d], %[d], %[ftmp0] \n\t"
3263 "addiu %[h], %[h], -0x01 \n\t"
3264 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3265 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3266 "bnez %[h], 1b \n\t"
3267 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3268 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3269 [ftmp4]
"=&f"(ftmp[4]),
3270 [tmp0]
"=&r"(
tmp[0]),
3275 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3276 [
c]
"+&f"(
c), [d]
"+&f"(d)
3283 int c = 8 - my, d = my;
3286 for (y = 0; y <
h; y++) {
3287 for (x = 0; x < 4; x++)
3288 dst[x] = (
c *
src[x] + d *
src[x + sstride] + 4) >> 3;
3296 ptrdiff_t sstride,
int h,
int mx,
int my)
3305 int a = 8 - mx,
b = mx;
3306 int c = 8 - my, d = my;
3311 for (y = 0; y <
h + 1; y++) {
3312 for (x = 0; x < 4; x++)
3320 for (y = 0; y <
h; y++) {
3321 for (x = 0; x < 4; x++)
3322 dst[x] = (
c *
tmp[x] + d *
tmp[x + 4] + 4) >> 3;
#define PUT_VP8_EPEL4_V6_MMI(src, src1, dst, srcstride)
static const uint8_t q1[256]
void ff_put_vp8_epel4_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define FILTER_4TAP(src, F, stride)
static av_always_inline void vp8_filter_common_isnot4tap(uint8_t *p, ptrdiff_t stride)
void ff_vp8_h_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
void ff_vp8_v_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim)
void ff_put_vp8_pixels16_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
void ff_put_vp8_bilinear16_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
#define RESTRICT_ASM_DOUBLE_1
void ff_vp8_h_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim)
#define PUT_VP8_EPEL8_V4_MMI(src, src1, dst, srcstride)
void ff_vp8_luma_dc_wht_mmi(int16_t block[4][4][16], int16_t dc[16])
static av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride, int flim)
#define PUT_VP8_BILINEAR4_H_MMI(src, dst)
#define TRANSPOSE_4H(fr_i0, fr_i1, fr_i2, fr_i3, fr_t0, fr_t1, fr_t2, fr_t3)
brief: Transpose 4X4 half word packaged data.
void ff_put_vp8_bilinear16_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_bilinear8_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
static av_always_inline void vp8_filter_common_is4tap(uint8_t *p, ptrdiff_t stride)
void ff_put_vp8_epel8_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static double val(void *priv, double ch)
static const uint64_t fourtap_subpel_filters[7][6]
void ff_put_vp8_epel16_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel8_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static const uint16_t mask[17]
static av_always_inline void vp8_v_loop_filter8_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel4_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel16_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_BILINEAR4_V_MMI(src, src1, dst, sstride)
#define PUT_VP8_BILINEAR8_V_MMI(src, src1, dst, sstride)
void ff_vp8_h_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel8_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
void ff_put_vp8_epel4_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static const uint8_t q0[256]
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
#define FILTER_6TAP(src, F, stride)
void ff_put_vp8_epel4_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_BILINEAR8_H_MMI(src, dst)
#define RESTRICT_ASM_UINT32_T
#define PUT_VP8_EPEL4_V4_MMI(src, src1, dst, srcstride)
void ff_put_vp8_bilinear4_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_epel8_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel8_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define MMI_VP8_LOOP_FILTER
void ff_put_vp8_epel4_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static av_always_inline void vp8_v_loop_filter8_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel16_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_EPEL4_H4_MMI(src, dst)
void ff_vp8_v_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define RESTRICT_ASM_DOUBLE_2
void ff_vp8_v_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
static av_always_inline void vp8_h_loop_filter8_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
void ff_vp8_idct_dc_add4y_mmi(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
void ff_put_vp8_epel8_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel16_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear8_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_bilinear4_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_epel8_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_vp8_idct_dc_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
void ff_vp8_idct_dc_add4uv_mmi(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
static av_always_inline int vp8_normal_limit(uint8_t *p, ptrdiff_t stride, int E, int I)
void ff_put_vp8_epel16_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define DECLARE_ALIGNED(n, t, v)
void ff_put_vp8_epel4_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear16_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_bilinear8_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_vp8_v_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_v_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel16_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear4_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
#define PUT_VP8_EPEL8_V6_MMI(src, src1, dst, srcstride)
void ff_put_vp8_epel16_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_EPEL8_H4_MMI(src, dst)
void ff_put_vp8_epel8_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
static av_always_inline void vp8_h_loop_filter8_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_h_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_luma_dc_wht_dc_mmi(int16_t block[4][4][16], int16_t dc[16])
void ff_put_vp8_epel16_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_pixels4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
#define TRANSPOSE_8B(fr_i0, fr_i1, fr_i2, fr_i3, fr_i4, fr_i5, fr_i6, fr_i7, fr_t0, fr_t1, fr_t2, fr_t3)
brief: Transpose 8x8 byte packaged data.
#define PUT_VP8_EPEL4_H6_MMI(src, dst)
void ff_put_vp8_epel4_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
void ff_put_vp8_epel4_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_pixels8_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
The exact code depends on how similar the blocks are and how related they are to the block
#define PUT_VP8_EPEL8_H6_MMI(src, dst)
void ff_vp8_h_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
static const uint8_t subpel_filters[7][6]