libavcodec/x86/dsputil_mmx.c File Reference

#include "libavutil/x86_cpu.h"
#include "libavcodec/dsputil.h"
#include "libavcodec/h263.h"
#include "libavcodec/mpegvideo.h"
#include "libavcodec/simple_idct.h"
#include "dsputil_mmx.h"
#include "mmx.h"
#include "vp3dsp_mmx.h"
#include "vp3dsp_sse2.h"
#include "vp6dsp_mmx.h"
#include "vp6dsp_sse2.h"
#include "idct_xvid.h"
#include "dsputil_mmx_rnd_template.c"
#include "dsputil_mmx_avg_template.c"
#include "h264dsp_mmx.c"
#include "rv40dsp_mmx.c"

Go to the source code of this file.

Defines

#define JUMPALIGN()   __asm__ volatile (ASMALIGN(3)::)

#define MOVQ_ZERO(regd)   __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)

#define MOVQ_BFE(regd)

#define MOVQ_BONE(regd)   __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_bone))

#define MOVQ_WTWO(regd)   __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_wtwo))

#define PAVGB_MMX_NO_RND(rega, regb, regr, regfe)

#define PAVGB_MMX(rega, regb, regr, regfe)

#define PAVGBP_MMX_NO_RND(rega, regb, regr,regc, regd, regp)

#define PAVGBP_MMX(rega, regb, regr, regc, regd, regp)

#define DEF(x, y)   x ## _no_rnd_ ## y ##_mmx

#define SET_RND   MOVQ_WONE

#define PAVGBP(a, b, c, d, e, f)   PAVGBP_MMX_NO_RND(a, b, c, d, e, f)

#define PAVGB(a, b, c, e)   PAVGB_MMX_NO_RND(a, b, c, e)

#define DEF(x, y)   x ## _ ## y ##_mmx

#define SET_RND   MOVQ_WTWO

#define PAVGBP(a, b, c, d, e, f)   PAVGBP_MMX(a, b, c, d, e, f)

#define PAVGB(a, b, c, e)   PAVGB_MMX(a, b, c, e)

#define DEF(x)   x ## _3dnow

#define PAVGB   "pavgusb"

#define DEF(x)   x ## _mmx2

#define PAVGB   "pavgb"

#define put_no_rnd_pixels16_mmx   put_pixels16_mmx

#define put_no_rnd_pixels8_mmx   put_pixels8_mmx

#define put_pixels16_mmx2   put_pixels16_mmx

#define put_pixels8_mmx2   put_pixels8_mmx

#define put_pixels4_mmx2   put_pixels4_mmx

#define put_no_rnd_pixels16_mmx2   put_no_rnd_pixels16_mmx

#define put_no_rnd_pixels8_mmx2   put_no_rnd_pixels8_mmx

#define put_pixels16_3dnow   put_pixels16_mmx

#define put_pixels8_3dnow   put_pixels8_mmx

#define put_pixels4_3dnow   put_pixels4_mmx

#define put_no_rnd_pixels16_3dnow   put_no_rnd_pixels16_mmx

#define put_no_rnd_pixels8_3dnow   put_no_rnd_pixels8_mmx

#define CLEAR_BLOCKS(name, n)

#define H263_LOOP_FILTER

#define PAETH(cpu, abs3)

#define ABS3_MMX2

#define ABS3_SSSE3

#define QPEL_V_LOW(m3, m4, m5, m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)

#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)

#define QPEL_OP(OPNAME, ROUNDER, RND, OP, MMX)

#define PUT_OP(a, b, temp, size)   "mov" #size " " #a ", " #b " \n\t"

#define AVG_3DNOW_OP(a, b, temp, size)

#define AVG_MMX2_OP(a, b, temp, size)

#define QPEL_2TAP_XY(OPNAME, SIZE, MMX, XY, HPEL)

#define QPEL_2TAP_L3(OPNAME, SIZE, MMX, XY, S0, S1, S2)

#define QPEL_2TAP(OPNAME, SIZE, MMX)

#define PREFETCH(name, op)

#define IF1(x)   x

#define IF0(x)

#define MIX5(mono, stereo)

#define MIX_MISC(stereo)

#define ff_float_to_int16_interleave6_sse(a, b, c)   float_to_int16_interleave_misc_sse(a,b,c,6)

#define ff_float_to_int16_interleave6_3dnow(a, b, c)   float_to_int16_interleave_misc_3dnow(a,b,c,6)

#define ff_float_to_int16_interleave6_3dn2(a, b, c)   float_to_int16_interleave_misc_3dnow(a,b,c,6)

#define ff_float_to_int16_interleave6_sse2   ff_float_to_int16_interleave6_sse

#define FLOAT_TO_INT16_INTERLEAVE(cpu, body)

#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU)

#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU)

#define H264_QPEL_FUNCS(x, y, CPU)

Functions

DECLARE_ALIGNED_8 (const uint64_t, ff_bone)=0x0101010101010101ULL

DECLARE_ALIGNED_8 (const uint64_t, ff_wtwo)=0x0002000200020002ULL

DECLARE_ALIGNED_16 (const uint64_t, ff_pdw_80000000[2])

DECLARE_ALIGNED_8 (const uint64_t, ff_pw_3)=0x0003000300030003ULL

DECLARE_ALIGNED_8 (const uint64_t, ff_pw_4)=0x0004000400040004ULL

DECLARE_ALIGNED_16 (const xmm_reg, ff_pw_5)

DECLARE_ALIGNED_16 (const xmm_reg, ff_pw_8)

DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15)=0x000F000F000F000FULL

DECLARE_ALIGNED_16 (const xmm_reg, ff_pw_16)

DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20)=0x0014001400140014ULL

DECLARE_ALIGNED_16 (const xmm_reg, ff_pw_28)

DECLARE_ALIGNED_16 (const xmm_reg, ff_pw_32)

DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42)=0x002A002A002A002AULL

DECLARE_ALIGNED_16 (const xmm_reg, ff_pw_64)

DECLARE_ALIGNED_8 (const uint64_t, ff_pw_96)=0x0060006000600060ULL

DECLARE_ALIGNED_8 (const uint64_t, ff_pw_128)=0x0080008000800080ULL

DECLARE_ALIGNED_8 (const uint64_t, ff_pw_255)=0x00ff00ff00ff00ffULL

DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1)=0x0101010101010101ULL

DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3)=0x0303030303030303ULL

DECLARE_ALIGNED_8 (const uint64_t, ff_pb_7)=0x0707070707070707ULL

DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1F)=0x1F1F1F1F1F1F1F1FULL

DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3F)=0x3F3F3F3F3F3F3F3FULL

DECLARE_ALIGNED_8 (const uint64_t, ff_pb_81)=0x8181818181818181ULL

DECLARE_ALIGNED_8 (const uint64_t, ff_pb_A1)=0xA1A1A1A1A1A1A1A1ULL

DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC)=0xFCFCFCFCFCFCFCFCULL

DECLARE_ALIGNED_16 (const double, ff_pd_1[2])

DECLARE_ALIGNED_16 (const double, ff_pd_2[2])

void put_pixels_clamped_mmx (const DCTELEM *block, uint8_t *pixels, int line_size)

static DECLARE_ALIGNED_8 (const unsigned char, vector128[8])

void put_signed_pixels_clamped_mmx (const DCTELEM *block, uint8_t *pixels, int line_size)

void add_pixels_clamped_mmx (const DCTELEM *block, uint8_t *pixels, int line_size)

static void put_pixels4_mmx (uint8_t *block, const uint8_t *pixels, int line_size, int h)

static void put_pixels8_mmx (uint8_t *block, const uint8_t *pixels, int line_size, int h)

static void put_pixels16_mmx (uint8_t *block, const uint8_t *pixels, int line_size, int h)

static void put_pixels16_sse2 (uint8_t *block, const uint8_t *pixels, int line_size, int h)

static void avg_pixels16_sse2 (uint8_t *block, const uint8_t *pixels, int line_size, int h)

static void clear_block_sse (DCTELEM *block)

static void add_bytes_mmx (uint8_t *dst, uint8_t *src, int w)

static void add_bytes_l2_mmx (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w)

static void h263_v_loop_filter_mmx (uint8_t *src, int stride, int qscale)

static void transpose4x4 (uint8_t *dst, uint8_t *src, int dst_stride, int src_stride)

static void h263_h_loop_filter_mmx (uint8_t *src, int stride, int qscale)

static void draw_edges_mmx (uint8_t *buf, int wrap, int width, int height, int w)

static void gmc_mmx (uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)

void ff_cavsdsp_init_mmx2 (DSPContext *c, AVCodecContext *avctx)

void ff_cavsdsp_init_3dnow (DSPContext *c, AVCodecContext *avctx)

void ff_put_cavs_qpel8_mc00_mmx2 (uint8_t *dst, uint8_t *src, int stride)

void ff_avg_cavs_qpel8_mc00_mmx2 (uint8_t *dst, uint8_t *src, int stride)

void ff_put_cavs_qpel16_mc00_mmx2 (uint8_t *dst, uint8_t *src, int stride)

void ff_avg_cavs_qpel16_mc00_mmx2 (uint8_t *dst, uint8_t *src, int stride)

void ff_vc1dsp_init_mmx (DSPContext *dsp, AVCodecContext *avctx)

void ff_put_vc1_mspel_mc00_mmx (uint8_t *dst, const uint8_t *src, int stride, int rnd)

void ff_mmx_idct (DCTELEM *block)

void ff_mmxext_idct (DCTELEM *block)

static void ff_idct_xvid_mmx_put (uint8_t *dest, int line_size, DCTELEM *block)

static void ff_idct_xvid_mmx_add (uint8_t *dest, int line_size, DCTELEM *block)

static void ff_idct_xvid_mmx2_put (uint8_t *dest, int line_size, DCTELEM *block)

static void ff_idct_xvid_mmx2_add (uint8_t *dest, int line_size, DCTELEM *block)

static void vorbis_inverse_coupling_3dnow (float *mag, float *ang, int blocksize)

static void vorbis_inverse_coupling_sse (float *mag, float *ang, int blocksize)

static void ac3_downmix_sse (float(*samples)[256], float(*matrix)[2], int out_ch, int in_ch, int len)

static void vector_fmul_3dnow (float *dst, const float *src, int len)

static void vector_fmul_sse (float *dst, const float *src, int len)

static void vector_fmul_reverse_3dnow2 (float *dst, const float *src0, const float *src1, int len)

static void vector_fmul_reverse_sse (float *dst, const float *src0, const float *src1, int len)

static void vector_fmul_add_add_3dnow (float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step)

static void vector_fmul_add_add_sse (float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step)

static void vector_fmul_window_3dnow2 (float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len)

static void vector_fmul_window_sse (float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len)

static void int32_to_float_fmul_scalar_sse (float *dst, const int *src, float mul, int len)

static void int32_to_float_fmul_scalar_sse2 (float *dst, const int *src, float mul, int len)

static void float_to_int16_3dnow (int16_t *dst, const float *src, long len)

static void float_to_int16_sse (int16_t *dst, const float *src, long len)

static void float_to_int16_sse2 (int16_t *dst, const float *src, long len)

FLOAT_TO_INT16_INTERLEAVE (3dnow,"1: \n""pf2id (%2,%0), %%mm0 \n""pf2id 8(%2,%0), %%mm1 \n""pf2id (%3,%0), %%mm2 \n""pf2id 8(%3,%0), %%mm3 \n""packssdw %%mm1, %%mm0 \n""packssdw %%mm3, %%mm2 \n""movq %%mm0, %%mm1 \n""punpcklwd %%mm2, %%mm0 \n""punpckhwd %%mm2, %%mm1 \n""movq %%mm0, (%1,%0)\n""movq %%mm1, 8(%1,%0)\n""add $16, %0 \n""js 1b \n""femms \n") FLOAT_TO_INT16_INTERLEAVE(sse

mm0 n cvtps2pi (%2,%0)

mm0 n mm1 n mm2 n mm3 n
packssdw mm0 n packssdw mm2 n
movq mm1 n punpcklwd mm0 n
punpckhwd mm1 n movq n movq n
n js n emms n FLOAT_TO_INT16_INTERLEAVE (sse2,"1: \n""cvtps2dq (%2,%0), %%xmm0 \n""cvtps2dq (%3,%0), %%xmm1 \n""packssdw %%xmm1, %%xmm0 \n""movhlps %%xmm0, %%xmm1 \n""punpcklwd %%xmm1, %%xmm0 \n""movdqa %%xmm0, (%1,%0) \n""add $16, %0 \n""js 1b \n") static void float_to_int16_interleave_3dn2(int16_t *dst

void ff_snow_horizontal_compose97i_mmx (IDWTELEM *b, int width)

void ff_snow_vertical_compose97i_sse2 (IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width)

void ff_snow_vertical_compose97i_mmx (IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width)

void ff_snow_inner_add_yblock_sse2 (const uint8_t *obmc, const int obmc_stride, uint8_t **block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer *sb, int add, uint8_t *dst8)

void ff_snow_inner_add_yblock_mmx (const uint8_t *obmc, const int obmc_stride, uint8_t **block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer *sb, int add, uint8_t *dst8)

static void add_int16_sse2 (int16_t *v1, int16_t *v2, int order)

static void sub_int16_sse2 (int16_t *v1, int16_t *v2, int order)

static int32_t scalarproduct_int16_sse2 (int16_t *v1, int16_t *v2, int order, int shift)

void dsputil_init_mmx (DSPContext *c, AVCodecContext *avctx)

Variables

int mm_flags

__pad0__

mm0 n mm1 n mm2 n mm3 n packssdw mm1

mm0 n mm1 n mm2 n mm3 n
packssdw mm0 n packssdw mm3

mm0 n mm1 n mm2 n mm3 n
packssdw mm0 n packssdw mm2 n
movq mm0

mm0 n mm1 n mm2 n mm3 n
packssdw mm0 n packssdw mm2 n
movq mm1 n punpcklwd mm2

mm0 n mm1 n mm2 n mm3 n
packssdw mm0 n packssdw mm2 n
movq mm1 n punpcklwd mm0 n
punpckhwd mm1 n movq n movq n add

mm0 n mm1 n mm2 n mm3 n
packssdw mm0 n packssdw mm2 n
movq mm1 n punpcklwd mm0 n
punpckhwd mm1 n movq n movq n
n js n emms n const float ** src

mm0 n mm1 n mm2 n mm3 n
packssdw mm0 n packssdw mm2 n
movq mm1 n punpcklwd mm0 n
punpckhwd mm1 n movq n movq n
n js n emms n const float long len

Define Documentation

#define ABS3_MMX2

Value:

"psubw     %%mm5, %%mm7 \n"\
        "pmaxsw    %%mm7, %%mm5 \n"\
        "pxor      %%mm6, %%mm6 \n"\
        "pxor      %%mm7, %%mm7 \n"\
        "psubw     %%mm3, %%mm6 \n"\
        "psubw     %%mm4, %%mm7 \n"\
        "pmaxsw    %%mm6, %%mm3 \n"\
        "pmaxsw    %%mm7, %%mm4 \n"\
        "pxor      %%mm7, %%mm7 \n"

Definition at line 903 of file dsputil_mmx.c.

#define ABS3_SSSE3

Value:

"pabsw     %%mm3, %%mm3 \n"\
        "pabsw     %%mm4, %%mm4 \n"\
        "pabsw     %%mm5, %%mm5 \n"

Definition at line 914 of file dsputil_mmx.c.

#define AVG_3DNOW_OP	(	a,
		b,
		temp,
		size	)

Value:

"mov" #size " " #b ", " #temp "   \n\t"\
"pavgusb " #temp ", " #a "        \n\t"\
"mov" #size " " #a ", " #b "      \n\t"

Definition at line 1572 of file dsputil_mmx.c.

#define AVG_MMX2_OP	(	a,
		b,
		temp,
		size	)

Value:

"mov" #size " " #b ", " #temp "   \n\t"\
"pavgb " #temp ", " #a "          \n\t"\
"mov" #size " " #a ", " #b "      \n\t"

Definition at line 1576 of file dsputil_mmx.c.

#define CLEAR_BLOCKS	(	name,
		n		)

Value:

static void name(DCTELEM *blocks)\
{\
    __asm__ volatile(\
                "pxor %%mm7, %%mm7              \n\t"\
                "mov     %1, %%"REG_a"          \n\t"\
                "1:                             \n\t"\
                "movq %%mm7, (%0, %%"REG_a")    \n\t"\
                "movq %%mm7, 8(%0, %%"REG_a")   \n\t"\
                "movq %%mm7, 16(%0, %%"REG_a")  \n\t"\
                "movq %%mm7, 24(%0, %%"REG_a")  \n\t"\
                "add $32, %%"REG_a"             \n\t"\
                " js 1b                         \n\t"\
                : : "r" (((uint8_t *)blocks)+128*n),\
                    "i" (-128*n)\
                : "%"REG_a\
        );\
}

Definition at line 469 of file dsputil_mmx.c.

#define DEF ( x ) x ## _mmx2

Definition at line 195 of file dsputil_mmx.c.

#define DEF ( x ) x ## _3dnow

Definition at line 195 of file dsputil_mmx.c.

#define DEF	(	x,
		y		)	x ## _ ## y ##_mmx

Definition at line 195 of file dsputil_mmx.c.

#define DEF	(	x,
		y		)	x ## _no_rnd_ ## y ##_mmx

Definition at line 195 of file dsputil_mmx.c.

#define ff_float_to_int16_interleave6_3dn2	(	a,
		b,
		c	)	float_to_int16_interleave_misc_3dnow(a,b,c,6)

Definition at line 2385 of file dsputil_mmx.c.

#define ff_float_to_int16_interleave6_3dnow	(	a,
		b,
		c	)	float_to_int16_interleave_misc_3dnow(a,b,c,6)

Definition at line 2384 of file dsputil_mmx.c.

#define ff_float_to_int16_interleave6_sse	(	a,
		b,
		c	)	float_to_int16_interleave_misc_sse(a,b,c,6)

Definition at line 2383 of file dsputil_mmx.c.

#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse

Definition at line 2387 of file dsputil_mmx.c.

#define FLOAT_TO_INT16_INTERLEAVE	(	cpu,
		body		)

Definition at line 2389 of file dsputil_mmx.c.

#define H263_LOOP_FILTER

Definition at line 588 of file dsputil_mmx.c.

Referenced by h263_h_loop_filter_mmx(), and h263_v_loop_filter_mmx().

#define H264_QPEL_FUNCS	(	x,
		y,
		CPU	)

Value:

c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\
            c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##CPU;\
            c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_##CPU;\
            c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;


Defines
#define	JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)
#define	MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)
#define	MOVQ_BFE(regd)
#define	MOVQ_BONE(regd) __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_bone))
#define	MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_wtwo))
#define	PAVGB_MMX_NO_RND(rega, regb, regr, regfe)
#define	PAVGB_MMX(rega, regb, regr, regfe)
#define	PAVGBP_MMX_NO_RND(rega, regb, regr,regc, regd, regp)
#define	PAVGBP_MMX(rega, regb, regr, regc, regd, regp)
#define	DEF(x, y) x ## _no_rnd_ ## y ##_mmx
#define	SET_RND MOVQ_WONE
#define	PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
#define	PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e)
#define	DEF(x, y) x ## _ ## y ##_mmx
#define	SET_RND MOVQ_WTWO
#define	PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f)
#define	PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e)
#define	DEF(x) x ## _3dnow
#define	PAVGB "pavgusb"
#define	DEF(x) x ## _mmx2
#define	PAVGB "pavgb"
#define	put_no_rnd_pixels16_mmx put_pixels16_mmx
#define	put_no_rnd_pixels8_mmx put_pixels8_mmx
#define	put_pixels16_mmx2 put_pixels16_mmx
#define	put_pixels8_mmx2 put_pixels8_mmx
#define	put_pixels4_mmx2 put_pixels4_mmx
#define	put_no_rnd_pixels16_mmx2 put_no_rnd_pixels16_mmx
#define	put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx
#define	put_pixels16_3dnow put_pixels16_mmx
#define	put_pixels8_3dnow put_pixels8_mmx
#define	put_pixels4_3dnow put_pixels4_mmx
#define	put_no_rnd_pixels16_3dnow put_no_rnd_pixels16_mmx
#define	put_no_rnd_pixels8_3dnow put_no_rnd_pixels8_mmx
#define	CLEAR_BLOCKS(name, n)
#define	H263_LOOP_FILTER
#define	PAETH(cpu, abs3)
#define	ABS3_MMX2
#define	ABS3_SSSE3
#define	QPEL_V_LOW(m3, m4, m5, m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)
#define	QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)
#define	QPEL_OP(OPNAME, ROUNDER, RND, OP, MMX)
#define	PUT_OP(a, b, temp, size) "mov" #size " " #a ", " #b " \n\t"
#define	AVG_3DNOW_OP(a, b, temp, size)
#define	AVG_MMX2_OP(a, b, temp, size)
#define	QPEL_2TAP_XY(OPNAME, SIZE, MMX, XY, HPEL)
#define	QPEL_2TAP_L3(OPNAME, SIZE, MMX, XY, S0, S1, S2)
#define	QPEL_2TAP(OPNAME, SIZE, MMX)
#define	PREFETCH(name, op)
#define	IF1(x) x
#define	IF0(x)
#define	MIX5(mono, stereo)
#define	MIX_MISC(stereo)
#define	ff_float_to_int16_interleave6_sse(a, b, c) float_to_int16_interleave_misc_sse(a,b,c,6)
#define	ff_float_to_int16_interleave6_3dnow(a, b, c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
#define	ff_float_to_int16_interleave6_3dn2(a, b, c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
#define	ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
#define	FLOAT_TO_INT16_INTERLEAVE(cpu, body)
#define	SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU)
#define	SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU)
#define	H264_QPEL_FUNCS(x, y, CPU)
Functions
	DECLARE_ALIGNED_8 (const uint64_t, ff_bone)=0x0101010101010101ULL
	DECLARE_ALIGNED_8 (const uint64_t, ff_wtwo)=0x0002000200020002ULL
	DECLARE_ALIGNED_16 (const uint64_t, ff_pdw_80000000[2])
	DECLARE_ALIGNED_8 (const uint64_t, ff_pw_3)=0x0003000300030003ULL
	DECLARE_ALIGNED_8 (const uint64_t, ff_pw_4)=0x0004000400040004ULL
	DECLARE_ALIGNED_16 (const xmm_reg, ff_pw_5)
	DECLARE_ALIGNED_16 (const xmm_reg, ff_pw_8)
	DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15)=0x000F000F000F000FULL
	DECLARE_ALIGNED_16 (const xmm_reg, ff_pw_16)
	DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20)=0x0014001400140014ULL
	DECLARE_ALIGNED_16 (const xmm_reg, ff_pw_28)
	DECLARE_ALIGNED_16 (const xmm_reg, ff_pw_32)
	DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42)=0x002A002A002A002AULL
	DECLARE_ALIGNED_16 (const xmm_reg, ff_pw_64)
	DECLARE_ALIGNED_8 (const uint64_t, ff_pw_96)=0x0060006000600060ULL
	DECLARE_ALIGNED_8 (const uint64_t, ff_pw_128)=0x0080008000800080ULL
	DECLARE_ALIGNED_8 (const uint64_t, ff_pw_255)=0x00ff00ff00ff00ffULL
	DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1)=0x0101010101010101ULL
	DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3)=0x0303030303030303ULL
	DECLARE_ALIGNED_8 (const uint64_t, ff_pb_7)=0x0707070707070707ULL
	DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1F)=0x1F1F1F1F1F1F1F1FULL
	DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3F)=0x3F3F3F3F3F3F3F3FULL
	DECLARE_ALIGNED_8 (const uint64_t, ff_pb_81)=0x8181818181818181ULL
	DECLARE_ALIGNED_8 (const uint64_t, ff_pb_A1)=0xA1A1A1A1A1A1A1A1ULL
	DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC)=0xFCFCFCFCFCFCFCFCULL
	DECLARE_ALIGNED_16 (const double, ff_pd_1[2])
	DECLARE_ALIGNED_16 (const double, ff_pd_2[2])
void	put_pixels_clamped_mmx (const DCTELEM block, uint8_t pixels, int line_size)
static	DECLARE_ALIGNED_8 (const unsigned char, vector128[8])
void	put_signed_pixels_clamped_mmx (const DCTELEM block, uint8_t pixels, int line_size)
void	add_pixels_clamped_mmx (const DCTELEM block, uint8_t pixels, int line_size)
static void	put_pixels4_mmx (uint8_t block, const uint8_t pixels, int line_size, int h)
static void	put_pixels8_mmx (uint8_t block, const uint8_t pixels, int line_size, int h)
static void	put_pixels16_mmx (uint8_t block, const uint8_t pixels, int line_size, int h)
static void	put_pixels16_sse2 (uint8_t block, const uint8_t pixels, int line_size, int h)
static void	avg_pixels16_sse2 (uint8_t block, const uint8_t pixels, int line_size, int h)
static void	clear_block_sse (DCTELEM *block)
static void	add_bytes_mmx (uint8_t dst, uint8_t src, int w)
static void	add_bytes_l2_mmx (uint8_t dst, uint8_t src1, uint8_t *src2, int w)
static void	h263_v_loop_filter_mmx (uint8_t *src, int stride, int qscale)
static void	transpose4x4 (uint8_t dst, uint8_t src, int dst_stride, int src_stride)
static void	h263_h_loop_filter_mmx (uint8_t *src, int stride, int qscale)
static void	draw_edges_mmx (uint8_t *buf, int wrap, int width, int height, int w)
static void	gmc_mmx (uint8_t dst, uint8_t src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
void	ff_cavsdsp_init_mmx2 (DSPContext c, AVCodecContext avctx)
void	ff_cavsdsp_init_3dnow (DSPContext c, AVCodecContext avctx)
void	ff_put_cavs_qpel8_mc00_mmx2 (uint8_t dst, uint8_t src, int stride)
void	ff_avg_cavs_qpel8_mc00_mmx2 (uint8_t dst, uint8_t src, int stride)
void	ff_put_cavs_qpel16_mc00_mmx2 (uint8_t dst, uint8_t src, int stride)
void	ff_avg_cavs_qpel16_mc00_mmx2 (uint8_t dst, uint8_t src, int stride)
void	ff_vc1dsp_init_mmx (DSPContext dsp, AVCodecContext avctx)
void	ff_put_vc1_mspel_mc00_mmx (uint8_t dst, const uint8_t src, int stride, int rnd)
void	ff_mmx_idct (DCTELEM *block)
void	ff_mmxext_idct (DCTELEM *block)
static void	ff_idct_xvid_mmx_put (uint8_t dest, int line_size, DCTELEM block)
static void	ff_idct_xvid_mmx_add (uint8_t dest, int line_size, DCTELEM block)
static void	ff_idct_xvid_mmx2_put (uint8_t dest, int line_size, DCTELEM block)
static void	ff_idct_xvid_mmx2_add (uint8_t dest, int line_size, DCTELEM block)
static void	vorbis_inverse_coupling_3dnow (float mag, float ang, int blocksize)
static void	vorbis_inverse_coupling_sse (float mag, float ang, int blocksize)
static void	ac3_downmix_sse (float(samples)[256], float(matrix)[2], int out_ch, int in_ch, int len)
static void	vector_fmul_3dnow (float dst, const float src, int len)
static void	vector_fmul_sse (float dst, const float src, int len)
static void	vector_fmul_reverse_3dnow2 (float dst, const float src0, const float *src1, int len)
static void	vector_fmul_reverse_sse (float dst, const float src0, const float *src1, int len)
static void	vector_fmul_add_add_3dnow (float dst, const float src0, const float src1, const float src2, int src3, int len, int step)
static void	vector_fmul_add_add_sse (float dst, const float src0, const float src1, const float src2, int src3, int len, int step)
static void	vector_fmul_window_3dnow2 (float dst, const float src0, const float src1, const float win, float add_bias, int len)
static void	vector_fmul_window_sse (float dst, const float src0, const float src1, const float win, float add_bias, int len)
static void	int32_to_float_fmul_scalar_sse (float dst, const int src, float mul, int len)
static void	int32_to_float_fmul_scalar_sse2 (float dst, const int src, float mul, int len)
static void	float_to_int16_3dnow (int16_t dst, const float src, long len)
static void	float_to_int16_sse (int16_t dst, const float src, long len)
static void	float_to_int16_sse2 (int16_t dst, const float src, long len)
	FLOAT_TO_INT16_INTERLEAVE (3dnow,"1: \n""pf2id (%2,%0), %%mm0 \n""pf2id 8(%2,%0), %%mm1 \n""pf2id (%3,%0), %%mm2 \n""pf2id 8(%3,%0), %%mm3 \n""packssdw %%mm1, %%mm0 \n""packssdw %%mm3, %%mm2 \n""movq %%mm0, %%mm1 \n""punpcklwd %%mm2, %%mm0 \n""punpckhwd %%mm2, %%mm1 \n""movq %%mm0, (%1,%0)\n""movq %%mm1, 8(%1,%0)\n""add $16, %0 \n""js 1b \n""femms \n") FLOAT_TO_INT16_INTERLEAVE(sse
mm0 n	cvtps2pi (%2,%0)
mm0 n mm1 n mm2 n mm3 n packssdw mm0 n packssdw mm2 n movq mm1 n punpcklwd mm0 n punpckhwd mm1 n movq n movq n n js n emms n	FLOAT_TO_INT16_INTERLEAVE (sse2,"1: \n""cvtps2dq (%2,%0), %%xmm0 \n""cvtps2dq (%3,%0), %%xmm1 \n""packssdw %%xmm1, %%xmm0 \n""movhlps %%xmm0, %%xmm1 \n""punpcklwd %%xmm1, %%xmm0 \n""movdqa %%xmm0, (%1,%0) \n""add $16, %0 \n""js 1b \n") static void float_to_int16_interleave_3dn2(int16_t *dst
void	ff_snow_horizontal_compose97i_mmx (IDWTELEM *b, int width)
void	ff_snow_vertical_compose97i_sse2 (IDWTELEM b0, IDWTELEM b1, IDWTELEM b2, IDWTELEM b3, IDWTELEM b4, IDWTELEM b5, int width)
void	ff_snow_vertical_compose97i_mmx (IDWTELEM b0, IDWTELEM b1, IDWTELEM b2, IDWTELEM b3, IDWTELEM b4, IDWTELEM b5, int width)
void	ff_snow_inner_add_yblock_sse2 (const uint8_t obmc, const int obmc_stride, uint8_t block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer sb, int add, uint8_t *dst8)
void	ff_snow_inner_add_yblock_mmx (const uint8_t obmc, const int obmc_stride, uint8_t block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer sb, int add, uint8_t *dst8)
static void	add_int16_sse2 (int16_t v1, int16_t v2, int order)
static void	sub_int16_sse2 (int16_t v1, int16_t v2, int order)
static int32_t	scalarproduct_int16_sse2 (int16_t v1, int16_t v2, int order, int shift)
void	dsputil_init_mmx (DSPContext c, AVCodecContext avctx)
Variables
int	mm_flags
	__pad0__
mm0 n mm1 n mm2 n mm3 n packssdw	mm1
mm0 n mm1 n mm2 n mm3 n packssdw mm0 n packssdw	mm3
mm0 n mm1 n mm2 n mm3 n packssdw mm0 n packssdw mm2 n movq	mm0
mm0 n mm1 n mm2 n mm3 n packssdw mm0 n packssdw mm2 n movq mm1 n punpcklwd	mm2
mm0 n mm1 n mm2 n mm3 n packssdw mm0 n packssdw mm2 n movq mm1 n punpcklwd mm0 n punpckhwd mm1 n movq n movq n	add
mm0 n mm1 n mm2 n mm3 n packssdw mm0 n packssdw mm2 n movq mm1 n punpcklwd mm0 n punpckhwd mm1 n movq n movq n n js n emms n const float **	src
mm0 n mm1 n mm2 n mm3 n packssdw mm0 n packssdw mm2 n movq mm1 n punpcklwd mm0 n punpckhwd mm1 n movq n movq n n js n emms n const float long	len

#define PAVGBP	(	a,
		b,
		c,
		d,
		e,
		f	)	PAVGBP_MMX_NO_RND(a, b, c, d, e, f)

#define PUT_OP	(	a,
		b,
		temp,
		size	)	"mov" #size " " #a ", " #b " \n\t"

#define QPEL_V_LOW	(	m3,
		m4,
		m5,
		m6,
		pw_20,
		pw_3,
		rnd,
		in0,
		in1,
		in2,
		in7,
		out,
		OP	)

static void ac3_downmix_sse	(	float(*)	samples[256],
		float(*)	matrix[2],
		int	out_ch,
		int	in_ch,
		int	len
	)			`[static]`

static void add_bytes_l2_mmx	(	uint8_t *	dst,
		uint8_t *	src1,
		uint8_t *	src2,
		int	w
	)			`[static]`

static void add_bytes_mmx	(	uint8_t *	dst,
		uint8_t *	src,
		int	w
	)			`[static]`

static void add_int16_sse2	(	int16_t *	v1,
		int16_t *	v2,
		int	order
	)			`[static]`

void add_pixels_clamped_mmx	(	const DCTELEM *	block,
		uint8_t *	pixels,
		int	line_size
	)

static void avg_pixels16_sse2	(	uint8_t *	block,
		const uint8_t *	pixels,
		int	line_size,
		int	h
	)			`[static]`

#define MIX5	(	mono,
		stereo		)

#define PAETH	(	cpu,
		abs3		)

#define PAVGB_MMX	(	rega,
		regb,
		regr,
		regfe	)

#define PAVGB_MMX_NO_RND	(	rega,
		regb,
		regr,
		regfe	)

static DECLARE_ALIGNED_8	(	const unsigned	char,
		vector128	[8]
	)			`[static]`

DECLARE_ALIGNED_8	(	const	uint64_t,
		ff_pw_255
	)			`[pure virtual]`

static void draw_edges_mmx	(	uint8_t *	buf,
		int	wrap,
		int	width,
		int	height,
		int	w
	)			`[static]`

void dsputil_init_mmx	(	DSPContext *	c,
		AVCodecContext *	avctx
	)

void ff_avg_cavs_qpel16_mc00_mmx2	(	uint8_t *	dst,
		uint8_t *	src,
		int	stride
	)

void ff_avg_cavs_qpel8_mc00_mmx2	(	uint8_t *	dst,
		uint8_t *	src,
		int	stride
	)

void ff_cavsdsp_init_3dnow	(	DSPContext *	c,
		AVCodecContext *	avctx
	)

void ff_cavsdsp_init_mmx2	(	DSPContext *	c,
		AVCodecContext *	avctx
	)

static void ff_idct_xvid_mmx2_add	(	uint8_t *	dest,
		int	line_size,
		DCTELEM *	block
	)			`[static]`

static void ff_idct_xvid_mmx2_put	(	uint8_t *	dest,
		int	line_size,
		DCTELEM *	block
	)			`[static]`

libavcodec/x86/dsputil_mmx.c File Reference

Defines

Functions

Variables

Define Documentation

Function Documentation

Variable Documentation

static void ff_idct_xvid_mmx_add	(	uint8_t *	dest,
		int	line_size,
		DCTELEM *	block
	)			`[static]`

static void ff_idct_xvid_mmx_put	(	uint8_t *	dest,
		int	line_size,
		DCTELEM *	block
	)			`[static]`

void ff_put_cavs_qpel16_mc00_mmx2	(	uint8_t *	dst,
		uint8_t *	src,
		int	stride
	)

void ff_put_cavs_qpel8_mc00_mmx2	(	uint8_t *	dst,
		uint8_t *	src,
		int	stride
	)

void ff_put_vc1_mspel_mc00_mmx	(	uint8_t *	dst,
		const uint8_t *	src,
		int	stride,
		int	rnd
	)

void ff_snow_horizontal_compose97i_mmx	(	IDWTELEM *	b,
		int	width
	)

void ff_snow_inner_add_yblock_mmx	(	const uint8_t *	obmc,
		const int	obmc_stride,
		uint8_t **	block,
		int	b_w,
		int	b_h,
		int	src_x,
		int	src_y,
		int	src_stride,
		slice_buffer *	sb,
		int	add,
		uint8_t *	dst8
	)

void ff_snow_inner_add_yblock_sse2	(	const uint8_t *	obmc,
		const int	obmc_stride,
		uint8_t **	block,
		int	b_w,
		int	b_h,
		int	src_x,
		int	src_y,
		int	src_stride,
		slice_buffer *	sb,
		int	add,
		uint8_t *	dst8
	)

void ff_snow_vertical_compose97i_mmx	(	IDWTELEM *	b0,
		IDWTELEM *	b1,
		IDWTELEM *	b2,
		IDWTELEM *	b3,
		IDWTELEM *	b4,
		IDWTELEM *	b5,
		int	width
	)

void ff_snow_vertical_compose97i_sse2	(	IDWTELEM *	b0,
		IDWTELEM *	b1,
		IDWTELEM *	b2,
		IDWTELEM *	b3,
		IDWTELEM *	b4,
		IDWTELEM *	b5,
		int	width
	)