47 a4 =
W0 *
b[0] -
W0 *
b[4];
48 a5 =
W5 *
b[5] +
W3 *
b[3];
49 a6 =
W6 *
b[2] -
W2 *
b[6];
50 a7 =
W7 *
b[1] -
W1 *
b[7];
53 s1 = (181 * (
a1 - a5 + a7 -
a3) + 128) >> 8;
54 s2 = (181 * (
a1 - a5 - a7 +
a3) + 128) >> 8;
57 b[0] = (
a0 +
a2 +
a1 + a5 + 128) >> 8;
58 b[1] = (a4 + a6 + s1 + 128) >> 8;
59 b[2] = (a4 - a6 + s2 + 128) >> 8;
60 b[3] = (
a0 -
a2 + a7 +
a3 + 128) >> 8;
61 b[4] = (
a0 -
a2 - a7 -
a3 + 128) >> 8;
62 b[5] = (a4 - a6 - s2 + 128) >> 8;
63 b[6] = (a4 + a6 - s1 + 128) >> 8;
64 b[7] = (
a0 +
a2 -
a1 - a5 + 128) >> 8;
73 a0 = (
W0 *
b[ 0] +
W0 *
b[32] ) >> 3;
74 a1 = (
W1 *
b[ 8] +
W7 *
b[56] + 4) >> 3;
75 a2 = (
W2 *
b[16] +
W6 *
b[48] + 4) >> 3;
76 a3 = (
W3 *
b[40] -
W5 *
b[24] + 4) >> 3;
77 a4 = (
W0 *
b[ 0] -
W0 *
b[32] ) >> 3;
78 a5 = (
W5 *
b[40] +
W3 *
b[24] + 4) >> 3;
79 a6 = (
W6 *
b[16] -
W2 *
b[48] + 4) >> 3;
80 a7 = (
W7 *
b[ 8] -
W1 *
b[56] + 4) >> 3;
83 s1 = (181 * (
a1 - a5 + a7 -
a3) + 128) >> 8;
84 s2 = (181 * (
a1 - a5 - a7 +
a3) + 128) >> 8;
87 b[ 0] = (
a0 +
a2 +
a1 + a5 + 8192) >> 14;
88 b[ 8] = (a4 + a6 + s1 + 8192) >> 14;
89 b[16] = (a4 - a6 + s2 + 8192) >> 14;
90 b[24] = (
a0 -
a2 + a7 +
a3 + 8192) >> 14;
92 b[32] = (
a0 -
a2 - a7 -
a3 + 8192) >> 14;
93 b[40] = (a4 - a6 - s2 + 8192) >> 14;
94 b[48] = (a4 + a6 - s1 + 8192) >> 14;
95 b[56] = (
a0 +
a2 -
a1 - a5 + 8192) >> 14;
103 for (
i = 0;
i < 64;
i += 8)
105 for (
i = 0;
i < 8;
i++)
109 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
112 MMI_LDC1(%[ftmp1], %[
block], 0x00)
113 MMI_LDC1(%[ftmp2], %[
block], 0x08)
114 MMI_LDC1(%[ftmp3], %[
block], 0x10)
115 MMI_LDC1(%[ftmp4], %[
block], 0x18)
116 MMI_LDC1(%[ftmp5], %[
block], 0x20)
117 MMI_LDC1(%[ftmp6], %[
block], 0x28)
118 MMI_LDC1(%[ftmp7], %[
block], 0x30)
119 MMI_LDC1(%[ftmp8], %[
block], 0x38)
121 MMI_LDC1(%[ftmp9], %[dest], 0x00)
122 "punpckhbh %[ftmp10], %[ftmp9], %[ftmp0] \n\t"
123 "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
124 "paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
125 "paddh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
126 "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
127 MMI_SDC1(%[ftmp1], %[dest], 0x00)
128 PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
130 MMI_LDC1(%[ftmp9], %[dest], 0x00)
131 "punpckhbh %[ftmp10], %[ftmp9], %[ftmp0] \n\t"
132 "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
133 "paddh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
134 "paddh %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
135 "packushb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
136 MMI_SDC1(%[ftmp3], %[dest], 0x00)
137 PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
139 MMI_LDC1(%[ftmp9], %[dest], 0x00)
140 "punpckhbh %[ftmp10], %[ftmp9], %[ftmp0] \n\t"
141 "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
142 "paddh %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
143 "paddh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
144 "packushb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
145 MMI_SDC1(%[ftmp5], %[dest], 0x00)
146 PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
148 MMI_LDC1(%[ftmp9], %[dest], 0x00)
149 "punpckhbh %[ftmp10], %[ftmp9], %[ftmp0] \n\t"
150 "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
151 "paddh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
152 "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
153 "packushb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
154 MMI_SDC1(%[ftmp7], %[dest], 0x00)
156 PTR_ADDIU "%[block], %[block], 0x40 \n\t"
157 PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
160 MMI_LDC1(%[ftmp1], %[
block], 0x00)
161 MMI_LDC1(%[ftmp2], %[
block], 0x08)
162 MMI_LDC1(%[ftmp3], %[
block], 0x10)
163 MMI_LDC1(%[ftmp4], %[
block], 0x18)
164 MMI_LDC1(%[ftmp5], %[
block], 0x20)
165 MMI_LDC1(%[ftmp6], %[
block], 0x28)
166 MMI_LDC1(%[ftmp7], %[
block], 0x30)
167 MMI_LDC1(%[ftmp8], %[
block], 0x38)
169 MMI_LDC1(%[ftmp9], %[dest], 0x00)
170 "punpckhbh %[ftmp10], %[ftmp9], %[ftmp0] \n\t"
171 "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
172 "paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
173 "paddh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
174 "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
175 MMI_SDC1(%[ftmp1], %[dest], 0x00)
176 PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
178 MMI_LDC1(%[ftmp9], %[dest], 0x00)
179 "punpckhbh %[ftmp10], %[ftmp9], %[ftmp0] \n\t"
180 "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
181 "paddh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
182 "paddh %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
183 "packushb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
184 MMI_SDC1(%[ftmp3], %[dest], 0x00)
185 PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
187 MMI_LDC1(%[ftmp9], %[dest], 0x00)
188 "punpckhbh %[ftmp10], %[ftmp9], %[ftmp0] \n\t"
189 "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
190 "paddh %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
191 "paddh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
192 "packushb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
193 MMI_SDC1(%[ftmp5], %[dest], 0x00)
194 PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
196 MMI_LDC1(%[ftmp9], %[dest], 0x00)
197 "punpckhbh %[ftmp10], %[ftmp9], %[ftmp0] \n\t"
198 "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
199 "paddh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
200 "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
201 "packushb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
202 MMI_SDC1(%[ftmp7], %[dest], 0x00)
203 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
204 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
205 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
206 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
207 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
208 [ftmp10]
"=&f"(ftmp[10]),
210 : [line_size]
"r"((
mips_reg)line_size)
220 for (
i = 0;
i < 64;
i += 8)
222 for (
i = 0;
i < 8;
i++)
227 MMI_LDC1(%[ftmp0], %[
block], 0x00)
228 MMI_LDC1(%[ftmp1], %[
block], 0x08)
229 MMI_LDC1(%[ftmp2], %[
block], 0x10)
230 MMI_LDC1(%[ftmp3], %[
block], 0x18)
231 MMI_LDC1(%[ftmp4], %[
block], 0x20)
232 MMI_LDC1(%[ftmp5], %[
block], 0x28)
233 MMI_LDC1(%[ftmp6], %[
block], 0x30)
234 MMI_LDC1(%[ftmp7], %[
block], 0x38)
235 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
236 "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
237 "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
238 "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
239 MMI_SDC1(%[ftmp0], %[dest], 0x00)
240 PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
241 MMI_SDC1(%[ftmp2], %[dest], 0x00)
242 PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
243 MMI_SDC1(%[ftmp4], %[dest], 0x00)
244 PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
245 MMI_SDC1(%[ftmp6], %[dest], 0x00)
247 PTR_ADDIU "%[block], %[block], 0x40 \n\t"
248 PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
251 MMI_LDC1(%[ftmp0], %[
block], 0x00)
252 MMI_LDC1(%[ftmp1], %[
block], 0x08)
253 MMI_LDC1(%[ftmp2], %[
block], 0x10)
254 MMI_LDC1(%[ftmp3], %[
block], 0x18)
255 MMI_LDC1(%[ftmp4], %[
block], 0x20)
256 MMI_LDC1(%[ftmp5], %[
block], 0x28)
257 MMI_LDC1(%[ftmp6], %[
block], 0x30)
258 MMI_LDC1(%[ftmp7], %[
block], 0x38)
259 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
260 "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
261 "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
262 "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
263 MMI_SDC1(%[ftmp0], %[dest], 0x00)
264 PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
265 MMI_SDC1(%[ftmp2], %[dest], 0x00)
266 PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
267 MMI_SDC1(%[ftmp4], %[dest], 0x00)
268 PTR_ADDU "%[dest], %[dest], %[line_size] \n\t"
269 MMI_SDC1(%[ftmp6], %[dest], 0x00)
270 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
271 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
272 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
273 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
275 : [line_size]
"r"((
mips_reg)line_size)