00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00064 #include <stdlib.h>
00065 #include <stdio.h>
00066 #include "libavutil/common.h"
00067 #include "dsputil.h"
00068
00069 #define SHIFT_TEMPS
00070 #define DCTSIZE 8
00071 #define BITS_IN_JSAMPLE 8
00072 #define GLOBAL(x) x
00073 #define RIGHT_SHIFT(x, n) ((x) >> (n))
00074 #define MULTIPLY16C16(var,const) ((var)*(const))
00075
00076 #if 1 //def USE_ACCURATE_ROUNDING
00077 #define DESCALE(x,n) RIGHT_SHIFT((x) + (1 << ((n) - 1)), n)
00078 #else
00079 #define DESCALE(x,n) RIGHT_SHIFT(x, n)
00080 #endif
00081
00082
00083
00084
00085
00086
00087 #if DCTSIZE != 8
00088 Sorry, this code only copes with 8x8 DCTs.
00089 #endif
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126 #if BITS_IN_JSAMPLE == 8
00127 #define CONST_BITS 13
00128 #define PASS1_BITS 4
00129 #else
00130 #define CONST_BITS 13
00131 #define PASS1_BITS 1
00132 #endif
00133
00134
00135
00136
00137
00138
00139
00140
00141 #if CONST_BITS == 13
00142 #define FIX_0_298631336 ((int32_t) 2446)
00143 #define FIX_0_390180644 ((int32_t) 3196)
00144 #define FIX_0_541196100 ((int32_t) 4433)
00145 #define FIX_0_765366865 ((int32_t) 6270)
00146 #define FIX_0_899976223 ((int32_t) 7373)
00147 #define FIX_1_175875602 ((int32_t) 9633)
00148 #define FIX_1_501321110 ((int32_t) 12299)
00149 #define FIX_1_847759065 ((int32_t) 15137)
00150 #define FIX_1_961570560 ((int32_t) 16069)
00151 #define FIX_2_053119869 ((int32_t) 16819)
00152 #define FIX_2_562915447 ((int32_t) 20995)
00153 #define FIX_3_072711026 ((int32_t) 25172)
00154 #else
00155 #define FIX_0_298631336 FIX(0.298631336)
00156 #define FIX_0_390180644 FIX(0.390180644)
00157 #define FIX_0_541196100 FIX(0.541196100)
00158 #define FIX_0_765366865 FIX(0.765366865)
00159 #define FIX_0_899976223 FIX(0.899976223)
00160 #define FIX_1_175875602 FIX(1.175875602)
00161 #define FIX_1_501321110 FIX(1.501321110)
00162 #define FIX_1_847759065 FIX(1.847759065)
00163 #define FIX_1_961570560 FIX(1.961570560)
00164 #define FIX_2_053119869 FIX(2.053119869)
00165 #define FIX_2_562915447 FIX(2.562915447)
00166 #define FIX_3_072711026 FIX(3.072711026)
00167 #endif
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177 #if BITS_IN_JSAMPLE == 8 && CONST_BITS<=13 && PASS1_BITS<=2
00178 #define MULTIPLY(var,const) MULTIPLY16C16(var,const)
00179 #else
00180 #define MULTIPLY(var,const) ((var) * (const))
00181 #endif
00182
00183
00184 static av_always_inline void row_fdct(DCTELEM * data){
00185 int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00186 int_fast32_t tmp10, tmp11, tmp12, tmp13;
00187 int_fast32_t z1, z2, z3, z4, z5;
00188 DCTELEM *dataptr;
00189 int ctr;
00190 SHIFT_TEMPS
00191
00192
00193
00194
00195
00196 dataptr = data;
00197 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
00198 tmp0 = dataptr[0] + dataptr[7];
00199 tmp7 = dataptr[0] - dataptr[7];
00200 tmp1 = dataptr[1] + dataptr[6];
00201 tmp6 = dataptr[1] - dataptr[6];
00202 tmp2 = dataptr[2] + dataptr[5];
00203 tmp5 = dataptr[2] - dataptr[5];
00204 tmp3 = dataptr[3] + dataptr[4];
00205 tmp4 = dataptr[3] - dataptr[4];
00206
00207
00208
00209
00210
00211 tmp10 = tmp0 + tmp3;
00212 tmp13 = tmp0 - tmp3;
00213 tmp11 = tmp1 + tmp2;
00214 tmp12 = tmp1 - tmp2;
00215
00216 dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
00217 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
00218
00219 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
00220 dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
00221 CONST_BITS-PASS1_BITS);
00222 dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
00223 CONST_BITS-PASS1_BITS);
00224
00225
00226
00227
00228
00229
00230 z1 = tmp4 + tmp7;
00231 z2 = tmp5 + tmp6;
00232 z3 = tmp4 + tmp6;
00233 z4 = tmp5 + tmp7;
00234 z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
00235
00236 tmp4 = MULTIPLY(tmp4, FIX_0_298631336);
00237 tmp5 = MULTIPLY(tmp5, FIX_2_053119869);
00238 tmp6 = MULTIPLY(tmp6, FIX_3_072711026);
00239 tmp7 = MULTIPLY(tmp7, FIX_1_501321110);
00240 z1 = MULTIPLY(z1, - FIX_0_899976223);
00241 z2 = MULTIPLY(z2, - FIX_2_562915447);
00242 z3 = MULTIPLY(z3, - FIX_1_961570560);
00243 z4 = MULTIPLY(z4, - FIX_0_390180644);
00244
00245 z3 += z5;
00246 z4 += z5;
00247
00248 dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
00249 dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
00250 dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
00251 dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
00252
00253 dataptr += DCTSIZE;
00254 }
00255 }
00256
00257
00258
00259
00260
00261 GLOBAL(void)
00262 ff_jpeg_fdct_islow (DCTELEM * data)
00263 {
00264 int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00265 int_fast32_t tmp10, tmp11, tmp12, tmp13;
00266 int_fast32_t z1, z2, z3, z4, z5;
00267 DCTELEM *dataptr;
00268 int ctr;
00269 SHIFT_TEMPS
00270
00271 row_fdct(data);
00272
00273
00274
00275
00276
00277
00278 dataptr = data;
00279 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
00280 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
00281 tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
00282 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
00283 tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
00284 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
00285 tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
00286 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
00287 tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
00288
00289
00290
00291
00292
00293 tmp10 = tmp0 + tmp3;
00294 tmp13 = tmp0 - tmp3;
00295 tmp11 = tmp1 + tmp2;
00296 tmp12 = tmp1 - tmp2;
00297
00298 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
00299 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
00300
00301 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
00302 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
00303 CONST_BITS+PASS1_BITS);
00304 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
00305 CONST_BITS+PASS1_BITS);
00306
00307
00308
00309
00310
00311
00312 z1 = tmp4 + tmp7;
00313 z2 = tmp5 + tmp6;
00314 z3 = tmp4 + tmp6;
00315 z4 = tmp5 + tmp7;
00316 z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
00317
00318 tmp4 = MULTIPLY(tmp4, FIX_0_298631336);
00319 tmp5 = MULTIPLY(tmp5, FIX_2_053119869);
00320 tmp6 = MULTIPLY(tmp6, FIX_3_072711026);
00321 tmp7 = MULTIPLY(tmp7, FIX_1_501321110);
00322 z1 = MULTIPLY(z1, - FIX_0_899976223);
00323 z2 = MULTIPLY(z2, - FIX_2_562915447);
00324 z3 = MULTIPLY(z3, - FIX_1_961570560);
00325 z4 = MULTIPLY(z4, - FIX_0_390180644);
00326
00327 z3 += z5;
00328 z4 += z5;
00329
00330 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3,
00331 CONST_BITS+PASS1_BITS);
00332 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4,
00333 CONST_BITS+PASS1_BITS);
00334 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3,
00335 CONST_BITS+PASS1_BITS);
00336 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4,
00337 CONST_BITS+PASS1_BITS);
00338
00339 dataptr++;
00340 }
00341 }
00342
00343
00344
00345
00346
00347
00348 GLOBAL(void)
00349 ff_fdct248_islow (DCTELEM * data)
00350 {
00351 int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00352 int_fast32_t tmp10, tmp11, tmp12, tmp13;
00353 int_fast32_t z1;
00354 DCTELEM *dataptr;
00355 int ctr;
00356 SHIFT_TEMPS
00357
00358 row_fdct(data);
00359
00360
00361
00362
00363
00364
00365 dataptr = data;
00366 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
00367 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
00368 tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
00369 tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
00370 tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
00371 tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
00372 tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
00373 tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
00374 tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
00375
00376 tmp10 = tmp0 + tmp3;
00377 tmp11 = tmp1 + tmp2;
00378 tmp12 = tmp1 - tmp2;
00379 tmp13 = tmp0 - tmp3;
00380
00381 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
00382 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
00383
00384 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
00385 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
00386 CONST_BITS+PASS1_BITS);
00387 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
00388 CONST_BITS+PASS1_BITS);
00389
00390 tmp10 = tmp4 + tmp7;
00391 tmp11 = tmp5 + tmp6;
00392 tmp12 = tmp5 - tmp6;
00393 tmp13 = tmp4 - tmp7;
00394
00395 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
00396 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
00397
00398 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
00399 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
00400 CONST_BITS+PASS1_BITS);
00401 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
00402 CONST_BITS+PASS1_BITS);
00403
00404 dataptr++;
00405 }
00406 }