FFmpeg
swscale_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <stdint.h>
22 
23 #include "libavutil/x86/asm.h"
25 
26 #undef REAL_MOVNTQ
27 #undef MOVNTQ
28 #undef MOVNTQ2
29 #undef PREFETCH
30 
31 
32 #if COMPILE_TEMPLATE_MMXEXT
33 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
34 #define MOVNTQ2 "movntq "
35 #else
36 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
37 #define MOVNTQ2 "movq "
38 #endif
39 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
40 
41 #define YSCALEYUV2PACKEDX_UV \
42  __asm__ volatile(\
43  "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
44  ".p2align 4 \n\t"\
45  "nop \n\t"\
46  "1: \n\t"\
47  "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d" \n\t"\
48  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
49  "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
50  "movq %%mm3, %%mm4 \n\t"\
51  ".p2align 4 \n\t"\
52  "2: \n\t"\
53  "movq 8(%%"FF_REG_d"), %%mm0 \n\t" /* filterCoeff */\
54  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm2 \n\t" /* UsrcData */\
55  "add %6, %%"FF_REG_S" \n\t" \
56  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm5 \n\t" /* VsrcData */\
57  "add $16, %%"FF_REG_d" \n\t"\
58  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
59  "pmulhw %%mm0, %%mm2 \n\t"\
60  "pmulhw %%mm0, %%mm5 \n\t"\
61  "paddw %%mm2, %%mm3 \n\t"\
62  "paddw %%mm5, %%mm4 \n\t"\
63  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
64  " jnz 2b \n\t"\
65 
66 #define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
67  "lea "offset"(%0), %%"FF_REG_d" \n\t"\
68  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
69  "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
70  "movq "#dst1", "#dst2" \n\t"\
71  ".p2align 4 \n\t"\
72  "2: \n\t"\
73  "movq 8(%%"FF_REG_d"), "#coeff" \n\t" /* filterCoeff */\
74  "movq (%%"FF_REG_S", %%"FF_REG_a", 2), "#src1" \n\t" /* Y1srcData */\
75  "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), "#src2" \n\t" /* Y2srcData */\
76  "add $16, %%"FF_REG_d" \n\t"\
77  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
78  "pmulhw "#coeff", "#src1" \n\t"\
79  "pmulhw "#coeff", "#src2" \n\t"\
80  "paddw "#src1", "#dst1" \n\t"\
81  "paddw "#src2", "#dst2" \n\t"\
82  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
83  " jnz 2b \n\t"\
84 
85 #define YSCALEYUV2PACKEDX \
86  YSCALEYUV2PACKEDX_UV \
87  YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
88 
89 #define YSCALEYUV2PACKEDX_END \
90  :: "r" (&c->redDither), \
91  "m" (dummy), "m" (dummy), "m" (dummy),\
92  "r" (dest), "m" (dstW_reg), "m"(uv_off) \
93  NAMED_CONSTRAINTS_ADD(bF8,bFC) \
94  : "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_S \
95  );
96 
97 #define YSCALEYUV2PACKEDX_ACCURATE_UV \
98  __asm__ volatile(\
99  "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
100  ".p2align 4 \n\t"\
101  "nop \n\t"\
102  "1: \n\t"\
103  "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d" \n\t"\
104  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
105  "pxor %%mm4, %%mm4 \n\t"\
106  "pxor %%mm5, %%mm5 \n\t"\
107  "pxor %%mm6, %%mm6 \n\t"\
108  "pxor %%mm7, %%mm7 \n\t"\
109  ".p2align 4 \n\t"\
110  "2: \n\t"\
111  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm0 \n\t" /* UsrcData */\
112  "add %6, %%"FF_REG_S" \n\t" \
113  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm2 \n\t" /* VsrcData */\
114  "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
115  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm1 \n\t" /* UsrcData */\
116  "movq %%mm0, %%mm3 \n\t"\
117  "punpcklwd %%mm1, %%mm0 \n\t"\
118  "punpckhwd %%mm1, %%mm3 \n\t"\
119  "movq "STR(APCK_COEF)"(%%"FF_REG_d"),%%mm1 \n\t" /* filterCoeff */\
120  "pmaddwd %%mm1, %%mm0 \n\t"\
121  "pmaddwd %%mm1, %%mm3 \n\t"\
122  "paddd %%mm0, %%mm4 \n\t"\
123  "paddd %%mm3, %%mm5 \n\t"\
124  "add %6, %%"FF_REG_S" \n\t" \
125  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm3 \n\t" /* VsrcData */\
126  "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
127  "add $"STR(APCK_SIZE)", %%"FF_REG_d" \n\t"\
128  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
129  "movq %%mm2, %%mm0 \n\t"\
130  "punpcklwd %%mm3, %%mm2 \n\t"\
131  "punpckhwd %%mm3, %%mm0 \n\t"\
132  "pmaddwd %%mm1, %%mm2 \n\t"\
133  "pmaddwd %%mm1, %%mm0 \n\t"\
134  "paddd %%mm2, %%mm6 \n\t"\
135  "paddd %%mm0, %%mm7 \n\t"\
136  " jnz 2b \n\t"\
137  "psrad $16, %%mm4 \n\t"\
138  "psrad $16, %%mm5 \n\t"\
139  "psrad $16, %%mm6 \n\t"\
140  "psrad $16, %%mm7 \n\t"\
141  "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
142  "packssdw %%mm5, %%mm4 \n\t"\
143  "packssdw %%mm7, %%mm6 \n\t"\
144  "paddw %%mm0, %%mm4 \n\t"\
145  "paddw %%mm0, %%mm6 \n\t"\
146  "movq %%mm4, "U_TEMP"(%0) \n\t"\
147  "movq %%mm6, "V_TEMP"(%0) \n\t"\
148 
149 #define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
150  "lea "offset"(%0), %%"FF_REG_d" \n\t"\
151  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
152  "pxor %%mm1, %%mm1 \n\t"\
153  "pxor %%mm5, %%mm5 \n\t"\
154  "pxor %%mm7, %%mm7 \n\t"\
155  "pxor %%mm6, %%mm6 \n\t"\
156  ".p2align 4 \n\t"\
157  "2: \n\t"\
158  "movq (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
159  "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
160  "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
161  "movq (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
162  "movq %%mm0, %%mm3 \n\t"\
163  "punpcklwd %%mm4, %%mm0 \n\t"\
164  "punpckhwd %%mm4, %%mm3 \n\t"\
165  "movq "STR(APCK_COEF)"(%%"FF_REG_d"), %%mm4 \n\t" /* filterCoeff */\
166  "pmaddwd %%mm4, %%mm0 \n\t"\
167  "pmaddwd %%mm4, %%mm3 \n\t"\
168  "paddd %%mm0, %%mm1 \n\t"\
169  "paddd %%mm3, %%mm5 \n\t"\
170  "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
171  "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
172  "add $"STR(APCK_SIZE)", %%"FF_REG_d" \n\t"\
173  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
174  "movq %%mm2, %%mm0 \n\t"\
175  "punpcklwd %%mm3, %%mm2 \n\t"\
176  "punpckhwd %%mm3, %%mm0 \n\t"\
177  "pmaddwd %%mm4, %%mm2 \n\t"\
178  "pmaddwd %%mm4, %%mm0 \n\t"\
179  "paddd %%mm2, %%mm7 \n\t"\
180  "paddd %%mm0, %%mm6 \n\t"\
181  " jnz 2b \n\t"\
182  "psrad $16, %%mm1 \n\t"\
183  "psrad $16, %%mm5 \n\t"\
184  "psrad $16, %%mm7 \n\t"\
185  "psrad $16, %%mm6 \n\t"\
186  "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
187  "packssdw %%mm5, %%mm1 \n\t"\
188  "packssdw %%mm6, %%mm7 \n\t"\
189  "paddw %%mm0, %%mm1 \n\t"\
190  "paddw %%mm0, %%mm7 \n\t"\
191  "movq "U_TEMP"(%0), %%mm3 \n\t"\
192  "movq "V_TEMP"(%0), %%mm4 \n\t"\
193 
194 #define YSCALEYUV2PACKEDX_ACCURATE \
195  YSCALEYUV2PACKEDX_ACCURATE_UV \
196  YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
197 
198 #define YSCALEYUV2RGBX \
199  "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
200  "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
201  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
202  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
203  "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
204  "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
205  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
206  "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
207  "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
208  "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
209  "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
210  "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
211  "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
212  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
213  "paddw %%mm3, %%mm4 \n\t"\
214  "movq %%mm2, %%mm0 \n\t"\
215  "movq %%mm5, %%mm6 \n\t"\
216  "movq %%mm4, %%mm3 \n\t"\
217  "punpcklwd %%mm2, %%mm2 \n\t"\
218  "punpcklwd %%mm5, %%mm5 \n\t"\
219  "punpcklwd %%mm4, %%mm4 \n\t"\
220  "paddw %%mm1, %%mm2 \n\t"\
221  "paddw %%mm1, %%mm5 \n\t"\
222  "paddw %%mm1, %%mm4 \n\t"\
223  "punpckhwd %%mm0, %%mm0 \n\t"\
224  "punpckhwd %%mm6, %%mm6 \n\t"\
225  "punpckhwd %%mm3, %%mm3 \n\t"\
226  "paddw %%mm7, %%mm0 \n\t"\
227  "paddw %%mm7, %%mm6 \n\t"\
228  "paddw %%mm7, %%mm3 \n\t"\
229  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
230  "packuswb %%mm0, %%mm2 \n\t"\
231  "packuswb %%mm6, %%mm5 \n\t"\
232  "packuswb %%mm3, %%mm4 \n\t"\
233 
234 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
235  "movq "#b", "#q2" \n\t" /* B */\
236  "movq "#r", "#t" \n\t" /* R */\
237  "punpcklbw "#g", "#b" \n\t" /* GBGBGBGB 0 */\
238  "punpcklbw "#a", "#r" \n\t" /* ARARARAR 0 */\
239  "punpckhbw "#g", "#q2" \n\t" /* GBGBGBGB 2 */\
240  "punpckhbw "#a", "#t" \n\t" /* ARARARAR 2 */\
241  "movq "#b", "#q0" \n\t" /* GBGBGBGB 0 */\
242  "movq "#q2", "#q3" \n\t" /* GBGBGBGB 2 */\
243  "punpcklwd "#r", "#q0" \n\t" /* ARGBARGB 0 */\
244  "punpckhwd "#r", "#b" \n\t" /* ARGBARGB 1 */\
245  "punpcklwd "#t", "#q2" \n\t" /* ARGBARGB 2 */\
246  "punpckhwd "#t", "#q3" \n\t" /* ARGBARGB 3 */\
247 \
248  MOVNTQ( q0, (dst, index, 4))\
249  MOVNTQ( b, 8(dst, index, 4))\
250  MOVNTQ( q2, 16(dst, index, 4))\
251  MOVNTQ( q3, 24(dst, index, 4))\
252 \
253  "add $8, "#index" \n\t"\
254  "cmp "dstw", "#index" \n\t"\
255  " jb 1b \n\t"
256 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
257 
258 static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
259  const int16_t **lumSrc, int lumFilterSize,
260  const int16_t *chrFilter, const int16_t **chrUSrc,
261  const int16_t **chrVSrc,
262  int chrFilterSize, const int16_t **alpSrc,
263  uint8_t *dest, int dstW, int dstY)
264 {
265  x86_reg dummy=0;
266  x86_reg dstW_reg = dstW;
267  x86_reg uv_off = c->uv_offx2;
268 
269  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
272  "movq %%mm2, "U_TEMP"(%0) \n\t"
273  "movq %%mm4, "V_TEMP"(%0) \n\t"
274  "movq %%mm5, "Y_TEMP"(%0) \n\t"
276  "movq "Y_TEMP"(%0), %%mm5 \n\t"
277  "psraw $3, %%mm1 \n\t"
278  "psraw $3, %%mm7 \n\t"
279  "packuswb %%mm7, %%mm1 \n\t"
280  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
282  } else {
285  "pcmpeqd %%mm7, %%mm7 \n\t"
286  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
288  }
289 }
290 
291 static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
292  const int16_t **lumSrc, int lumFilterSize,
293  const int16_t *chrFilter, const int16_t **chrUSrc,
294  const int16_t **chrVSrc,
295  int chrFilterSize, const int16_t **alpSrc,
296  uint8_t *dest, int dstW, int dstY)
297 {
298  x86_reg dummy=0;
299  x86_reg dstW_reg = dstW;
300  x86_reg uv_off = c->uv_offx2;
301 
302  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
305  YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
306  "psraw $3, %%mm1 \n\t"
307  "psraw $3, %%mm7 \n\t"
308  "packuswb %%mm7, %%mm1 \n\t"
309  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
311  } else {
314  "pcmpeqd %%mm7, %%mm7 \n\t"
315  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
317  }
318 }
319 
320 static void RENAME(yuv2bgr32_X)(SwsContext *c, const int16_t *lumFilter,
321  const int16_t **lumSrc, int lumFilterSize,
322  const int16_t *chrFilter, const int16_t **chrUSrc,
323  const int16_t **chrVSrc,
324  int chrFilterSize, const int16_t **alpSrc,
325  uint8_t *dest, int dstW, int dstY)
326 {
327  x86_reg dummy=0;
328  x86_reg dstW_reg = dstW;
329  x86_reg uv_off = c->uv_offx2;
330 
331  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
334  YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
335  "psraw $3, %%mm1 \n\t"
336  "psraw $3, %%mm7 \n\t"
337  "packuswb %%mm7, %%mm1 \n\t"
338  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm5, %%mm4, %%mm2, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
340  } else {
343  "pcmpeqd %%mm7, %%mm7 \n\t"
344  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm5, %%mm4, %%mm2, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
346  }
347 }
348 
349 #define REAL_WRITERGB16(dst, dstw, index) \
350  "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
351  "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
352  "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
353  "psrlq $3, %%mm2 \n\t"\
354 \
355  "movq %%mm2, %%mm1 \n\t"\
356  "movq %%mm4, %%mm3 \n\t"\
357 \
358  "punpcklbw %%mm7, %%mm3 \n\t"\
359  "punpcklbw %%mm5, %%mm2 \n\t"\
360  "punpckhbw %%mm7, %%mm4 \n\t"\
361  "punpckhbw %%mm5, %%mm1 \n\t"\
362 \
363  "psllq $3, %%mm3 \n\t"\
364  "psllq $3, %%mm4 \n\t"\
365 \
366  "por %%mm3, %%mm2 \n\t"\
367  "por %%mm4, %%mm1 \n\t"\
368 \
369  MOVNTQ(%%mm2, (dst, index, 2))\
370  MOVNTQ(%%mm1, 8(dst, index, 2))\
371 \
372  "add $8, "#index" \n\t"\
373  "cmp "dstw", "#index" \n\t"\
374  " jb 1b \n\t"
375 #define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
376 
377 static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
378  const int16_t **lumSrc, int lumFilterSize,
379  const int16_t *chrFilter, const int16_t **chrUSrc,
380  const int16_t **chrVSrc,
381  int chrFilterSize, const int16_t **alpSrc,
382  uint8_t *dest, int dstW, int dstY)
383 {
384  x86_reg dummy=0;
385  x86_reg dstW_reg = dstW;
386  x86_reg uv_off = c->uv_offx2;
387 
390  "pxor %%mm7, %%mm7 \n\t"
391  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
392 #ifdef DITHER1XBPP
393  "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
394  "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
395  "paddusb "RED_DITHER"(%0), %%mm5\n\t"
396 #endif
397  WRITERGB16(%4, "%5", %%FF_REGa)
399 }
400 
401 static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
402  const int16_t **lumSrc, int lumFilterSize,
403  const int16_t *chrFilter, const int16_t **chrUSrc,
404  const int16_t **chrVSrc,
405  int chrFilterSize, const int16_t **alpSrc,
406  uint8_t *dest, int dstW, int dstY)
407 {
408  x86_reg dummy=0;
409  x86_reg dstW_reg = dstW;
410  x86_reg uv_off = c->uv_offx2;
411 
414  "pxor %%mm7, %%mm7 \n\t"
415  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
416 #ifdef DITHER1XBPP
417  "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
418  "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
419  "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
420 #endif
421  WRITERGB16(%4, "%5", %%FF_REGa)
423 }
424 
425 #define REAL_WRITERGB15(dst, dstw, index) \
426  "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
427  "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\
428  "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
429  "psrlq $3, %%mm2 \n\t"\
430  "psrlq $1, %%mm5 \n\t"\
431 \
432  "movq %%mm2, %%mm1 \n\t"\
433  "movq %%mm4, %%mm3 \n\t"\
434 \
435  "punpcklbw %%mm7, %%mm3 \n\t"\
436  "punpcklbw %%mm5, %%mm2 \n\t"\
437  "punpckhbw %%mm7, %%mm4 \n\t"\
438  "punpckhbw %%mm5, %%mm1 \n\t"\
439 \
440  "psllq $2, %%mm3 \n\t"\
441  "psllq $2, %%mm4 \n\t"\
442 \
443  "por %%mm3, %%mm2 \n\t"\
444  "por %%mm4, %%mm1 \n\t"\
445 \
446  MOVNTQ(%%mm2, (dst, index, 2))\
447  MOVNTQ(%%mm1, 8(dst, index, 2))\
448 \
449  "add $8, "#index" \n\t"\
450  "cmp "dstw", "#index" \n\t"\
451  " jb 1b \n\t"
452 #define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
453 
454 static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
455  const int16_t **lumSrc, int lumFilterSize,
456  const int16_t *chrFilter, const int16_t **chrUSrc,
457  const int16_t **chrVSrc,
458  int chrFilterSize, const int16_t **alpSrc,
459  uint8_t *dest, int dstW, int dstY)
460 {
461  x86_reg dummy=0;
462  x86_reg dstW_reg = dstW;
463  x86_reg uv_off = c->uv_offx2;
464 
467  "pxor %%mm7, %%mm7 \n\t"
468  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
469 #ifdef DITHER1XBPP
470  "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
471  "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
472  "paddusb "RED_DITHER"(%0), %%mm5\n\t"
473 #endif
474  WRITERGB15(%4, "%5", %%FF_REGa)
476 }
477 
478 static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
479  const int16_t **lumSrc, int lumFilterSize,
480  const int16_t *chrFilter, const int16_t **chrUSrc,
481  const int16_t **chrVSrc,
482  int chrFilterSize, const int16_t **alpSrc,
483  uint8_t *dest, int dstW, int dstY)
484 {
485  x86_reg dummy=0;
486  x86_reg dstW_reg = dstW;
487  x86_reg uv_off = c->uv_offx2;
488 
491  "pxor %%mm7, %%mm7 \n\t"
492  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
493 #ifdef DITHER1XBPP
494  "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
495  "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
496  "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
497 #endif
498  WRITERGB15(%4, "%5", %%FF_REGa)
500 }
501 
502 #define WRITEBGR24MMX(dst, dstw, index) \
503  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
504  "movq %%mm2, %%mm1 \n\t" /* B */\
505  "movq %%mm5, %%mm6 \n\t" /* R */\
506  "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
507  "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
508  "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
509  "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
510  "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
511  "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
512  "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
513  "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
514  "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
515  "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
516 \
517  "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
518  "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\
519  "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\
520  "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\
521 \
522  "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\
523  "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\
524  "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\
525  "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\
526 \
527  "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\
528  "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\
529  "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\
530  "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\
531 \
532  "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\
533  "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\
534  "psllq $40, %%mm2 \n\t" /* GB000000 1 */\
535  "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
536  MOVNTQ(%%mm0, (dst))\
537 \
538  "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\
539  "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\
540  "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\
541  "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\
542  MOVNTQ(%%mm6, 8(dst))\
543 \
544  "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\
545  "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\
546  "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\
547  MOVNTQ(%%mm5, 16(dst))\
548 \
549  "add $24, "#dst" \n\t"\
550 \
551  "add $8, "#index" \n\t"\
552  "cmp "dstw", "#index" \n\t"\
553  " jb 1b \n\t"
554 
555 #define WRITEBGR24MMXEXT(dst, dstw, index) \
556  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
557  "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
558  "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
559  "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
560  "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
561  "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
562 \
563  "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\
564  "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\
565  "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\
566 \
567  "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\
568  "por %%mm1, %%mm6 \n\t"\
569  "por %%mm3, %%mm6 \n\t"\
570  MOVNTQ(%%mm6, (dst))\
571 \
572  "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\
573  "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\
574  "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
575  "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
576 \
577  "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5 B4 B3 */\
578  "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
579  "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
580 \
581  "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\
582  "por %%mm3, %%mm6 \n\t"\
583  MOVNTQ(%%mm6, 8(dst))\
584 \
585  "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\
586  "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\
587  "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\
588 \
589  "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
590  "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
591  "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7 R6 R5 */\
592 \
593  "por %%mm1, %%mm3 \n\t"\
594  "por %%mm3, %%mm6 \n\t"\
595  MOVNTQ(%%mm6, 16(dst))\
596 \
597  "add $24, "#dst" \n\t"\
598 \
599  "add $8, "#index" \n\t"\
600  "cmp "dstw", "#index" \n\t"\
601  " jb 1b \n\t"
602 
603 #if COMPILE_TEMPLATE_MMXEXT
604 #undef WRITEBGR24
605 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMXEXT(dst, dstw, index)
606 #else
607 #undef WRITEBGR24
608 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
609 #endif
610 
611 #if HAVE_6REGS
612 static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
613  const int16_t **lumSrc, int lumFilterSize,
614  const int16_t *chrFilter, const int16_t **chrUSrc,
615  const int16_t **chrVSrc,
616  int chrFilterSize, const int16_t **alpSrc,
617  uint8_t *dest, int dstW, int dstY)
618 {
619  x86_reg dummy=0;
620  x86_reg dstW_reg = dstW;
621  x86_reg uv_off = c->uv_offx2;
622 
625  "pxor %%mm7, %%mm7 \n\t"
626  "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c"\n\t" //FIXME optimize
627  "add %4, %%"FF_REG_c" \n\t"
628  WRITEBGR24(%%FF_REGc, "%5", %%FF_REGa)
629  :: "r" (&c->redDither),
630  "m" (dummy), "m" (dummy), "m" (dummy),
631  "r" (dest), "m" (dstW_reg), "m"(uv_off)
632  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
633  : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S
634  );
635 }
636 
637 static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
638  const int16_t **lumSrc, int lumFilterSize,
639  const int16_t *chrFilter, const int16_t **chrUSrc,
640  const int16_t **chrVSrc,
641  int chrFilterSize, const int16_t **alpSrc,
642  uint8_t *dest, int dstW, int dstY)
643 {
644  x86_reg dummy=0;
645  x86_reg dstW_reg = dstW;
646  x86_reg uv_off = c->uv_offx2;
647 
650  "pxor %%mm7, %%mm7 \n\t"
651  "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c" \n\t" //FIXME optimize
652  "add %4, %%"FF_REG_c" \n\t"
653  WRITEBGR24(%%FF_REGc, "%5", %%FF_REGa)
654  :: "r" (&c->redDither),
655  "m" (dummy), "m" (dummy), "m" (dummy),
656  "r" (dest), "m" (dstW_reg), "m"(uv_off)
657  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
658  : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S
659  );
660 }
661 #endif /* HAVE_6REGS */
662 
663 #define REAL_WRITEYUY2(dst, dstw, index) \
664  "packuswb %%mm3, %%mm3 \n\t"\
665  "packuswb %%mm4, %%mm4 \n\t"\
666  "packuswb %%mm7, %%mm1 \n\t"\
667  "punpcklbw %%mm4, %%mm3 \n\t"\
668  "movq %%mm1, %%mm7 \n\t"\
669  "punpcklbw %%mm3, %%mm1 \n\t"\
670  "punpckhbw %%mm3, %%mm7 \n\t"\
671 \
672  MOVNTQ(%%mm1, (dst, index, 2))\
673  MOVNTQ(%%mm7, 8(dst, index, 2))\
674 \
675  "add $8, "#index" \n\t"\
676  "cmp "dstw", "#index" \n\t"\
677  " jb 1b \n\t"
678 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
679 
680 static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
681  const int16_t **lumSrc, int lumFilterSize,
682  const int16_t *chrFilter, const int16_t **chrUSrc,
683  const int16_t **chrVSrc,
684  int chrFilterSize, const int16_t **alpSrc,
685  uint8_t *dest, int dstW, int dstY)
686 {
687  x86_reg dummy=0;
688  x86_reg dstW_reg = dstW;
689  x86_reg uv_off = c->uv_offx2;
690 
692  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
693  "psraw $3, %%mm3 \n\t"
694  "psraw $3, %%mm4 \n\t"
695  "psraw $3, %%mm1 \n\t"
696  "psraw $3, %%mm7 \n\t"
697  WRITEYUY2(%4, "%5", %%FF_REGa)
699 }
700 
701 static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
702  const int16_t **lumSrc, int lumFilterSize,
703  const int16_t *chrFilter, const int16_t **chrUSrc,
704  const int16_t **chrVSrc,
705  int chrFilterSize, const int16_t **alpSrc,
706  uint8_t *dest, int dstW, int dstY)
707 {
708  x86_reg dummy=0;
709  x86_reg dstW_reg = dstW;
710  x86_reg uv_off = c->uv_offx2;
711 
713  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
714  "psraw $3, %%mm3 \n\t"
715  "psraw $3, %%mm4 \n\t"
716  "psraw $3, %%mm1 \n\t"
717  "psraw $3, %%mm7 \n\t"
718  WRITEYUY2(%4, "%5", %%FF_REGa)
720 }
721 
722 #define REAL_YSCALEYUV2RGB_UV(index, c) \
723  "xor "#index", "#index" \n\t"\
724  ".p2align 4 \n\t"\
725  "1: \n\t"\
726  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
727  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
728  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
729  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
730  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
731  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
732  "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
733  "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
734  "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
735  "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
736  "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
737  "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
738  "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
739  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
740  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
741  "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
742  "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
743  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
744  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
745  "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
746  "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
747  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
748 
749 #define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
750  "movq ("#b1", "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
751  "movq ("#b2", "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
752  "movq 8("#b1", "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
753  "movq 8("#b2", "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
754  "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
755  "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
756  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
757  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
758  "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
759  "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
760  "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
761  "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
762 
763 #define REAL_YSCALEYUV2RGB_COEFF(c) \
764  "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
765  "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
766  "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
767  "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
768  "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
769  "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
770  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
771  "paddw %%mm3, %%mm4 \n\t"\
772  "movq %%mm2, %%mm0 \n\t"\
773  "movq %%mm5, %%mm6 \n\t"\
774  "movq %%mm4, %%mm3 \n\t"\
775  "punpcklwd %%mm2, %%mm2 \n\t"\
776  "punpcklwd %%mm5, %%mm5 \n\t"\
777  "punpcklwd %%mm4, %%mm4 \n\t"\
778  "paddw %%mm1, %%mm2 \n\t"\
779  "paddw %%mm1, %%mm5 \n\t"\
780  "paddw %%mm1, %%mm4 \n\t"\
781  "punpckhwd %%mm0, %%mm0 \n\t"\
782  "punpckhwd %%mm6, %%mm6 \n\t"\
783  "punpckhwd %%mm3, %%mm3 \n\t"\
784  "paddw %%mm7, %%mm0 \n\t"\
785  "paddw %%mm7, %%mm6 \n\t"\
786  "paddw %%mm7, %%mm3 \n\t"\
787  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
788  "packuswb %%mm0, %%mm2 \n\t"\
789  "packuswb %%mm6, %%mm5 \n\t"\
790  "packuswb %%mm3, %%mm4 \n\t"\
791 
792 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
793 
794 #define YSCALEYUV2RGB(index, c) \
795  REAL_YSCALEYUV2RGB_UV(index, c) \
796  REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
797  REAL_YSCALEYUV2RGB_COEFF(c)
798 
799 /**
800  * vertical bilinear scale YV12 to RGB
801  */
802 static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
803  const int16_t *ubuf[2], const int16_t *vbuf[2],
804  const int16_t *abuf[2], uint8_t *dest,
805  int dstW, int yalpha, int uvalpha, int y)
806 {
807  const int16_t *buf0 = buf[0], *buf1 = buf[1],
808  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
809 
810  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
811  const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
812 #if ARCH_X86_64
813  __asm__ volatile(
814  YSCALEYUV2RGB(%%r8, %5)
815  YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
816  "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
817  "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
818  "packuswb %%mm7, %%mm1 \n\t"
819  WRITEBGR32(%4, DSTW_OFFSET"(%5)", %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
820  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
821  "a" (&c->redDither),
822  "r" (abuf0), "r" (abuf1)
823  : "%r8"
824  );
825 #else
826  c->u_temp=(intptr_t)abuf0;
827  c->v_temp=(intptr_t)abuf1;
828  __asm__ volatile(
829  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
830  "mov %4, %%"FF_REG_b" \n\t"
831  "push %%"FF_REG_BP" \n\t"
832  YSCALEYUV2RGB(%%FF_REGBP, %5)
833  "push %0 \n\t"
834  "push %1 \n\t"
835  "mov "U_TEMP"(%5), %0 \n\t"
836  "mov "V_TEMP"(%5), %1 \n\t"
837  YSCALEYUV2RGB_YA(%%FF_REGBP, %5, %0, %1)
838  "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
839  "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
840  "packuswb %%mm7, %%mm1 \n\t"
841  "pop %1 \n\t"
842  "pop %0 \n\t"
843  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
844  "pop %%"FF_REG_BP" \n\t"
845  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
846  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
847  "a" (&c->redDither)
848  );
849 #endif
850  } else {
851  __asm__ volatile(
852  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
853  "mov %4, %%"FF_REG_b" \n\t"
854  "push %%"FF_REG_BP" \n\t"
855  YSCALEYUV2RGB(%%FF_REGBP, %5)
856  "pcmpeqd %%mm7, %%mm7 \n\t"
857  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
858  "pop %%"FF_REG_BP" \n\t"
859  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
860  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
861  "a" (&c->redDither)
862  );
863  }
864 }
865 
866 static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
867  const int16_t *ubuf[2], const int16_t *vbuf[2],
868  const int16_t *abuf[2], uint8_t *dest,
869  int dstW, int yalpha, int uvalpha, int y)
870 {
871  const int16_t *buf0 = buf[0], *buf1 = buf[1],
872  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
873 
874  __asm__ volatile(
875  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
876  "mov %4, %%"FF_REG_b" \n\t"
877  "push %%"FF_REG_BP" \n\t"
878  YSCALEYUV2RGB(%%FF_REGBP, %5)
879  "pxor %%mm7, %%mm7 \n\t"
880  WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
881  "pop %%"FF_REG_BP" \n\t"
882  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
883  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
884  "a" (&c->redDither)
885  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
886  );
887 }
888 
889 static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
890  const int16_t *ubuf[2], const int16_t *vbuf[2],
891  const int16_t *abuf[2], uint8_t *dest,
892  int dstW, int yalpha, int uvalpha, int y)
893 {
894  const int16_t *buf0 = buf[0], *buf1 = buf[1],
895  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
896 
897  __asm__ volatile(
898  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
899  "mov %4, %%"FF_REG_b" \n\t"
900  "push %%"FF_REG_BP" \n\t"
901  YSCALEYUV2RGB(%%FF_REGBP, %5)
902  "pxor %%mm7, %%mm7 \n\t"
903  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
904 #ifdef DITHER1XBPP
905  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
906  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
907  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
908 #endif
909  WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
910  "pop %%"FF_REG_BP" \n\t"
911  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
912  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
913  "a" (&c->redDither)
915  );
916 }
917 
918 static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
919  const int16_t *ubuf[2], const int16_t *vbuf[2],
920  const int16_t *abuf[2], uint8_t *dest,
921  int dstW, int yalpha, int uvalpha, int y)
922 {
923  const int16_t *buf0 = buf[0], *buf1 = buf[1],
924  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
925 
926  __asm__ volatile(
927  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
928  "mov %4, %%"FF_REG_b" \n\t"
929  "push %%"FF_REG_BP" \n\t"
930  YSCALEYUV2RGB(%%FF_REGBP, %5)
931  "pxor %%mm7, %%mm7 \n\t"
932  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
933 #ifdef DITHER1XBPP
934  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
935  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
936  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
937 #endif
938  WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
939  "pop %%"FF_REG_BP" \n\t"
940  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
941  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
942  "a" (&c->redDither)
943  NAMED_CONSTRAINTS_ADD(bF8,bFC)
944  );
945 }
946 
947 #define REAL_YSCALEYUV2PACKED(index, c) \
948  "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
949  "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
950  "psraw $3, %%mm0 \n\t"\
951  "psraw $3, %%mm1 \n\t"\
952  "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
953  "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
954  "xor "#index", "#index" \n\t"\
955  ".p2align 4 \n\t"\
956  "1: \n\t"\
957  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
958  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
959  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
960  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
961  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
962  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
963  "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
964  "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
965  "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
966  "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
967  "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
968  "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
969  "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
970  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
971  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
972  "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
973  "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
974  "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
975  "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
976  "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
977  "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
978  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
979  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
980  "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
981  "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
982  "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
983  "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
984 
985 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
986 
987 static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
988  const int16_t *ubuf[2], const int16_t *vbuf[2],
989  const int16_t *abuf[2], uint8_t *dest,
990  int dstW, int yalpha, int uvalpha, int y)
991 {
992  const int16_t *buf0 = buf[0], *buf1 = buf[1],
993  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
994 
995  __asm__ volatile(
996  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
997  "mov %4, %%"FF_REG_b" \n\t"
998  "push %%"FF_REG_BP" \n\t"
999  YSCALEYUV2PACKED(%%FF_REGBP, %5)
1000  WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1001  "pop %%"FF_REG_BP" \n\t"
1002  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1003  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1004  "a" (&c->redDither)
1005  );
1006 }
1007 
1008 #define REAL_YSCALEYUV2RGB1(index, c) \
1009  "xor "#index", "#index" \n\t"\
1010  ".p2align 4 \n\t"\
1011  "1: \n\t"\
1012  "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
1013  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1014  "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
1015  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1016  "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
1017  "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
1018  "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
1019  "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
1020  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
1021  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
1022  "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
1023  "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
1024  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
1025  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1026  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1027  "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1028  "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1029  "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
1030  "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
1031  "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
1032  "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
1033  "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
1034  "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
1035  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
1036  "paddw %%mm3, %%mm4 \n\t"\
1037  "movq %%mm2, %%mm0 \n\t"\
1038  "movq %%mm5, %%mm6 \n\t"\
1039  "movq %%mm4, %%mm3 \n\t"\
1040  "punpcklwd %%mm2, %%mm2 \n\t"\
1041  "punpcklwd %%mm5, %%mm5 \n\t"\
1042  "punpcklwd %%mm4, %%mm4 \n\t"\
1043  "paddw %%mm1, %%mm2 \n\t"\
1044  "paddw %%mm1, %%mm5 \n\t"\
1045  "paddw %%mm1, %%mm4 \n\t"\
1046  "punpckhwd %%mm0, %%mm0 \n\t"\
1047  "punpckhwd %%mm6, %%mm6 \n\t"\
1048  "punpckhwd %%mm3, %%mm3 \n\t"\
1049  "paddw %%mm7, %%mm0 \n\t"\
1050  "paddw %%mm7, %%mm6 \n\t"\
1051  "paddw %%mm7, %%mm3 \n\t"\
1052  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
1053  "packuswb %%mm0, %%mm2 \n\t"\
1054  "packuswb %%mm6, %%mm5 \n\t"\
1055  "packuswb %%mm3, %%mm4 \n\t"\
1056 
1057 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
1058 
1059 // do vertical chrominance interpolation
1060 #define REAL_YSCALEYUV2RGB1b(index, c) \
1061  "xor "#index", "#index" \n\t"\
1062  ".p2align 4 \n\t"\
1063  "1: \n\t"\
1064  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
1065  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
1066  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1067  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
1068  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
1069  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1070  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
1071  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
1072  "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
1073  "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\
1074  "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
1075  "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
1076  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
1077  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
1078  "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
1079  "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
1080  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
1081  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1082  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1083  "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1084  "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1085  "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
1086  "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
1087  "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
1088  "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
1089  "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
1090  "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
1091  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
1092  "paddw %%mm3, %%mm4 \n\t"\
1093  "movq %%mm2, %%mm0 \n\t"\
1094  "movq %%mm5, %%mm6 \n\t"\
1095  "movq %%mm4, %%mm3 \n\t"\
1096  "punpcklwd %%mm2, %%mm2 \n\t"\
1097  "punpcklwd %%mm5, %%mm5 \n\t"\
1098  "punpcklwd %%mm4, %%mm4 \n\t"\
1099  "paddw %%mm1, %%mm2 \n\t"\
1100  "paddw %%mm1, %%mm5 \n\t"\
1101  "paddw %%mm1, %%mm4 \n\t"\
1102  "punpckhwd %%mm0, %%mm0 \n\t"\
1103  "punpckhwd %%mm6, %%mm6 \n\t"\
1104  "punpckhwd %%mm3, %%mm3 \n\t"\
1105  "paddw %%mm7, %%mm0 \n\t"\
1106  "paddw %%mm7, %%mm6 \n\t"\
1107  "paddw %%mm7, %%mm3 \n\t"\
1108  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
1109  "packuswb %%mm0, %%mm2 \n\t"\
1110  "packuswb %%mm6, %%mm5 \n\t"\
1111  "packuswb %%mm3, %%mm4 \n\t"\
1112 
1113 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
1114 
1115 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
1116  "movq (%1, "#index", 2), %%mm7 \n\t" /* abuf0[index ] */\
1117  "movq 8(%1, "#index", 2), %%mm1 \n\t" /* abuf0[index+4] */\
1118  "psraw $7, %%mm7 \n\t" /* abuf0[index ] >>7 */\
1119  "psraw $7, %%mm1 \n\t" /* abuf0[index+4] >>7 */\
1120  "packuswb %%mm1, %%mm7 \n\t"
1121 #define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
1122 
1123 /**
1124  * YV12 to RGB without scaling or interpolating
1125  */
1126 static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
1127  const int16_t *ubuf[2], const int16_t *vbuf[2],
1128  const int16_t *abuf0, uint8_t *dest,
1129  int dstW, int uvalpha, int y)
1130 {
1131  const int16_t *ubuf0 = ubuf[0];
1132  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1133 
1134  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1135  const int16_t *ubuf1 = ubuf[0];
1136  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
1137  __asm__ volatile(
1138  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1139  "mov %4, %%"FF_REG_b" \n\t"
1140  "push %%"FF_REG_BP" \n\t"
1141  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1142  YSCALEYUV2RGB1_ALPHA(%%FF_REGBP)
1143  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1144  "pop %%"FF_REG_BP" \n\t"
1145  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1146  :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1147  "a" (&c->redDither)
1148  );
1149  } else {
1150  __asm__ volatile(
1151  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1152  "mov %4, %%"FF_REG_b" \n\t"
1153  "push %%"FF_REG_BP" \n\t"
1154  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1155  "pcmpeqd %%mm7, %%mm7 \n\t"
1156  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1157  "pop %%"FF_REG_BP" \n\t"
1158  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1159  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1160  "a" (&c->redDither)
1161  );
1162  }
1163  } else {
1164  const int16_t *ubuf1 = ubuf[1];
1165  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
1166  __asm__ volatile(
1167  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1168  "mov %4, %%"FF_REG_b" \n\t"
1169  "push %%"FF_REG_BP" \n\t"
1170  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1171  YSCALEYUV2RGB1_ALPHA(%%FF_REGBP)
1172  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1173  "pop %%"FF_REG_BP" \n\t"
1174  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1175  :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1176  "a" (&c->redDither)
1177  );
1178  } else {
1179  __asm__ volatile(
1180  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1181  "mov %4, %%"FF_REG_b" \n\t"
1182  "push %%"FF_REG_BP" \n\t"
1183  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1184  "pcmpeqd %%mm7, %%mm7 \n\t"
1185  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1186  "pop %%"FF_REG_BP" \n\t"
1187  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1188  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1189  "a" (&c->redDither)
1190  );
1191  }
1192  }
1193 }
1194 
1195 static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
1196  const int16_t *ubuf[2], const int16_t *vbuf[2],
1197  const int16_t *abuf0, uint8_t *dest,
1198  int dstW, int uvalpha, int y)
1199 {
1200  const int16_t *ubuf0 = ubuf[0];
1201  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1202 
1203  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1204  const int16_t *ubuf1 = ubuf[0];
1205  __asm__ volatile(
1206  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1207  "mov %4, %%"FF_REG_b" \n\t"
1208  "push %%"FF_REG_BP" \n\t"
1209  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1210  "pxor %%mm7, %%mm7 \n\t"
1211  WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1212  "pop %%"FF_REG_BP" \n\t"
1213  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1214  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1215  "a" (&c->redDither)
1216  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
1217  );
1218  } else {
1219  const int16_t *ubuf1 = ubuf[1];
1220  __asm__ volatile(
1221  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1222  "mov %4, %%"FF_REG_b" \n\t"
1223  "push %%"FF_REG_BP" \n\t"
1224  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1225  "pxor %%mm7, %%mm7 \n\t"
1226  WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1227  "pop %%"FF_REG_BP" \n\t"
1228  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1229  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1230  "a" (&c->redDither)
1231  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
1232  );
1233  }
1234 }
1235 
1236 static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
1237  const int16_t *ubuf[2], const int16_t *vbuf[2],
1238  const int16_t *abuf0, uint8_t *dest,
1239  int dstW, int uvalpha, int y)
1240 {
1241  const int16_t *ubuf0 = ubuf[0];
1242  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1243 
1244  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1245  const int16_t *ubuf1 = ubuf[0];
1246  __asm__ volatile(
1247  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1248  "mov %4, %%"FF_REG_b" \n\t"
1249  "push %%"FF_REG_BP" \n\t"
1250  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1251  "pxor %%mm7, %%mm7 \n\t"
1252  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1253 #ifdef DITHER1XBPP
1254  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1255  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1256  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1257 #endif
1258  WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1259  "pop %%"FF_REG_BP" \n\t"
1260  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1261  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1262  "a" (&c->redDither)
1264  );
1265  } else {
1266  const int16_t *ubuf1 = ubuf[1];
1267  __asm__ volatile(
1268  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1269  "mov %4, %%"FF_REG_b" \n\t"
1270  "push %%"FF_REG_BP" \n\t"
1271  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1272  "pxor %%mm7, %%mm7 \n\t"
1273  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1274 #ifdef DITHER1XBPP
1275  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1276  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1277  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1278 #endif
1279  WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1280  "pop %%"FF_REG_BP" \n\t"
1281  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1282  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1283  "a" (&c->redDither)
1285  );
1286  }
1287 }
1288 
1289 static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
1290  const int16_t *ubuf[2], const int16_t *vbuf[2],
1291  const int16_t *abuf0, uint8_t *dest,
1292  int dstW, int uvalpha, int y)
1293 {
1294  const int16_t *ubuf0 = ubuf[0];
1295  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1296 
1297  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1298  const int16_t *ubuf1 = ubuf[0];
1299  __asm__ volatile(
1300  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1301  "mov %4, %%"FF_REG_b" \n\t"
1302  "push %%"FF_REG_BP" \n\t"
1303  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1304  "pxor %%mm7, %%mm7 \n\t"
1305  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1306 #ifdef DITHER1XBPP
1307  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1308  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1309  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1310 #endif
1311  WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1312  "pop %%"FF_REG_BP" \n\t"
1313  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1314  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1315  "a" (&c->redDither)
1316  NAMED_CONSTRAINTS_ADD(bF8,bFC)
1317  );
1318  } else {
1319  const int16_t *ubuf1 = ubuf[1];
1320  __asm__ volatile(
1321  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1322  "mov %4, %%"FF_REG_b" \n\t"
1323  "push %%"FF_REG_BP" \n\t"
1324  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1325  "pxor %%mm7, %%mm7 \n\t"
1326  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1327 #ifdef DITHER1XBPP
1328  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1329  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1330  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1331 #endif
1332  WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1333  "pop %%"FF_REG_BP" \n\t"
1334  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1335  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1336  "a" (&c->redDither)
1337  NAMED_CONSTRAINTS_ADD(bF8,bFC)
1338  );
1339  }
1340 }
1341 
1342 #define REAL_YSCALEYUV2PACKED1(index, c) \
1343  "xor "#index", "#index" \n\t"\
1344  ".p2align 4 \n\t"\
1345  "1: \n\t"\
1346  "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
1347  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1348  "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
1349  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1350  "psraw $7, %%mm3 \n\t" \
1351  "psraw $7, %%mm4 \n\t" \
1352  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1353  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1354  "psraw $7, %%mm1 \n\t" \
1355  "psraw $7, %%mm7 \n\t" \
1356 
1357 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
1358 
1359 #define REAL_YSCALEYUV2PACKED1b(index, c) \
1360  "xor "#index", "#index" \n\t"\
1361  ".p2align 4 \n\t"\
1362  "1: \n\t"\
1363  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
1364  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
1365  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1366  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
1367  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
1368  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1369  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
1370  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
1371  "psrlw $8, %%mm3 \n\t" \
1372  "psrlw $8, %%mm4 \n\t" \
1373  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1374  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1375  "psraw $7, %%mm1 \n\t" \
1376  "psraw $7, %%mm7 \n\t"
1377 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
1378 
1379 static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
1380  const int16_t *ubuf[2], const int16_t *vbuf[2],
1381  const int16_t *abuf0, uint8_t *dest,
1382  int dstW, int uvalpha, int y)
1383 {
1384  const int16_t *ubuf0 = ubuf[0];
1385  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1386 
1387  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1388  const int16_t *ubuf1 = ubuf[0];
1389  __asm__ volatile(
1390  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1391  "mov %4, %%"FF_REG_b" \n\t"
1392  "push %%"FF_REG_BP" \n\t"
1393  YSCALEYUV2PACKED1(%%FF_REGBP, %5)
1394  WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1395  "pop %%"FF_REG_BP" \n\t"
1396  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1397  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1398  "a" (&c->redDither)
1399  );
1400  } else {
1401  const int16_t *ubuf1 = ubuf[1];
1402  __asm__ volatile(
1403  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1404  "mov %4, %%"FF_REG_b" \n\t"
1405  "push %%"FF_REG_BP" \n\t"
1406  YSCALEYUV2PACKED1b(%%FF_REGBP, %5)
1407  WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1408  "pop %%"FF_REG_BP" \n\t"
1409  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1410  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1411  "a" (&c->redDither)
1412  );
1413  }
1414 }
1416 {
1417  enum AVPixelFormat dstFormat = c->dstFormat;
1418 
1419  c->use_mmx_vfilter= 0;
1420  if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && !isSemiPlanarYUV(dstFormat)
1421  && dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE
1422  && !(c->flags & SWS_BITEXACT)) {
1423  if (c->flags & SWS_ACCURATE_RND) {
1424  if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1425  switch (c->dstFormat) {
1426  case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break;
1427 #if HAVE_6REGS
1428  case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break;
1429 #endif
1430  case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break;
1431  case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break;
1432  case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
1433  default: break;
1434  }
1435  }
1436  } else {
1437  c->use_mmx_vfilter= 1;
1438  if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1439  switch (c->dstFormat) {
1440  case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;
1441  case AV_PIX_FMT_BGR32: c->yuv2packedX = RENAME(yuv2bgr32_X); break;
1442 #if HAVE_6REGS
1443  case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break;
1444 #endif
1445  case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break;
1446  case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break;
1447  case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
1448  default: break;
1449  }
1450  }
1451  }
1452  if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1453  switch (c->dstFormat) {
1454  case AV_PIX_FMT_RGB32:
1455  c->yuv2packed1 = RENAME(yuv2rgb32_1);
1456  c->yuv2packed2 = RENAME(yuv2rgb32_2);
1457  break;
1458  case AV_PIX_FMT_BGR24:
1459  c->yuv2packed1 = RENAME(yuv2bgr24_1);
1460  c->yuv2packed2 = RENAME(yuv2bgr24_2);
1461  break;
1462  case AV_PIX_FMT_RGB555:
1463  c->yuv2packed1 = RENAME(yuv2rgb555_1);
1464  c->yuv2packed2 = RENAME(yuv2rgb555_2);
1465  break;
1466  case AV_PIX_FMT_RGB565:
1467  c->yuv2packed1 = RENAME(yuv2rgb565_1);
1468  c->yuv2packed2 = RENAME(yuv2rgb565_2);
1469  break;
1470  case AV_PIX_FMT_YUYV422:
1471  c->yuv2packed1 = RENAME(yuv2yuyv422_1);
1472  c->yuv2packed2 = RENAME(yuv2yuyv422_2);
1473  break;
1474  default:
1475  break;
1476  }
1477  }
1478  }
1479 
1480  if (c->srcBpc == 8 && c->dstBpc <= 14) {
1481  // Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one).
1482 #if COMPILE_TEMPLATE_MMXEXT
1483  if (c->flags & SWS_FAST_BILINEAR && c->canMMXEXTBeUsed) {
1484  c->hyscale_fast = ff_hyscale_fast_mmxext;
1485  c->hcscale_fast = ff_hcscale_fast_mmxext;
1486  } else {
1487 #endif /* COMPILE_TEMPLATE_MMXEXT */
1488  c->hyscale_fast = NULL;
1489  c->hcscale_fast = NULL;
1490 #if COMPILE_TEMPLATE_MMXEXT
1491  }
1492 #endif /* COMPILE_TEMPLATE_MMXEXT */
1493  }
1494 }
WRITEBGR32
#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
Definition: swscale_template.c:256
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
YSCALEYUV2PACKEDX_ACCURATE
#define YSCALEYUV2PACKEDX_ACCURATE
Definition: swscale_template.c:194
ALP_MMX_FILTER_OFFSET
#define ALP_MMX_FILTER_OFFSET
Definition: swscale_internal.h:494
YSCALEYUV2RGB1
#define YSCALEYUV2RGB1(index, c)
Definition: swscale_template.c:1057
YSCALEYUV2PACKEDX_YA
#define YSCALEYUV2PACKEDX_YA(offset, coeff, src1, src2, dst1, dst2)
Definition: swscale_template.c:66
AV_PIX_FMT_BGR32
#define AV_PIX_FMT_BGR32
Definition: pixfmt.h:379
NAMED_CONSTRAINTS_ADD
#define NAMED_CONSTRAINTS_ADD(...)
Definition: asm.h:145
YSCALEYUV2RGB
#define YSCALEYUV2RGB(index, c)
Definition: swscale_template.c:794
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:69
AV_PIX_FMT_GRAYF32LE
@ AV_PIX_FMT_GRAYF32LE
IEEE-754 single precision Y, 32bpp, little-endian.
Definition: pixfmt.h:331
SWS_FAST_BILINEAR
#define SWS_FAST_BILINEAR
Definition: swscale.h:59
is16BPS
static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:696
SWS_BITEXACT
#define SWS_BITEXACT
Definition: swscale.h:85
DSTW_OFFSET
#define DSTW_OFFSET
Definition: swscale_internal.h:488
isNBPS
static av_always_inline int isNBPS(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:710
ff_hcscale_fast_mmxext
void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, const uint8_t *src1, const uint8_t *src2, int srcW, int xInc)
av_cold
#define av_cold
Definition: attributes.h:90
sws_init_swscale
static av_cold void sws_init_swscale(SwsContext *c)
Definition: swscale.c:561
BLUE_DITHER
#define BLUE_DITHER
Definition: swscale_internal.h:477
YSCALEYUV2RGB1b
#define YSCALEYUV2RGB1b(index, c)
Definition: swscale_template.c:1113
WRITERGB15
#define WRITERGB15(dst, dstw, index)
Definition: swscale_template.c:452
WRITEBGR24
#define WRITEBGR24(dst, dstw, index)
Definition: swscale_template.c:608
isSemiPlanarYUV
static av_always_inline int isSemiPlanarYUV(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:742
NULL
#define NULL
Definition: coverity.c:32
YSCALEYUV2PACKEDX
#define YSCALEYUV2PACKEDX
Definition: swscale_template.c:85
AV_PIX_FMT_YUYV422
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:67
U_TEMP
#define U_TEMP
Definition: swscale_internal.h:491
GREEN_DITHER
#define GREEN_DITHER
Definition: swscale_internal.h:476
YSCALEYUV2RGB1_ALPHA
#define YSCALEYUV2RGB1_ALPHA(index)
Definition: swscale_template.c:1121
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
SWS_FULL_CHR_H_INT
#define SWS_FULL_CHR_H_INT
Definition: swscale.h:80
asm.h
RED_DITHER
#define RED_DITHER
Definition: swscale_internal.h:475
AV_PIX_FMT_RGB32
#define AV_PIX_FMT_RGB32
Definition: pixfmt.h:377
SWS_ACCURATE_RND
#define SWS_ACCURATE_RND
Definition: swscale.h:84
YSCALEYUV2RGB_YA
#define YSCALEYUV2RGB_YA(index, c, b1, b2)
Definition: swscale_template.c:792
RENAME
#define RENAME(name)
Definition: ffv1.h:195
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:392
swscale_internal.h
YSCALEYUV2PACKED
#define YSCALEYUV2PACKED(index, c)
Definition: swscale_template.c:985
AV_PIX_FMT_RGB565
#define AV_PIX_FMT_RGB565
Definition: pixfmt.h:391
V_TEMP
#define V_TEMP
Definition: swscale_internal.h:492
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
AV_PIX_FMT_GRAYF32BE
@ AV_PIX_FMT_GRAYF32BE
IEEE-754 single precision Y, 32bpp, big-endian.
Definition: pixfmt.h:330
WRITEYUY2
#define WRITEYUY2(dst, dstw, index)
Definition: swscale_template.c:678
dummy
int dummy
Definition: motion.c:65
Y_TEMP
#define Y_TEMP
Definition: swscale_internal.h:493
YSCALEYUV2PACKEDX_ACCURATE_YA
#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset)
Definition: swscale_template.c:149
YSCALEYUV2RGBX
#define YSCALEYUV2RGBX
Definition: swscale_template.c:198
ff_hyscale_fast_mmxext
void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst, int dstWidth, const uint8_t *src, int srcW, int xInc)
ESP_OFFSET
#define ESP_OFFSET
Definition: swscale_internal.h:489
x86_reg
int x86_reg
Definition: asm.h:72
YSCALEYUV2PACKEDX_END
#define YSCALEYUV2PACKEDX_END
Definition: swscale_template.c:89
WRITERGB16
#define WRITERGB16(dst, dstw, index)
Definition: swscale_template.c:375
YSCALEYUV2PACKED1
#define YSCALEYUV2PACKED1(index, c)
Definition: swscale_template.c:1357
SwsContext
Definition: swscale_internal.h:300
YSCALEYUV2PACKED1b
#define YSCALEYUV2PACKED1b(index, c)
Definition: swscale_template.c:1377