Go to the documentation of this file.
   27 #define TABLE_DEF(name, size) \ 
   28     DECLARE_ALIGNED(32, TXSample, TX_TAB(ff_tx_tab_ ##name))[size] 
   30 #define SR_POW2_TABLES \ 
   47 #define SR_TABLE(len) \ 
   48     TABLE_DEF(len, len/4 + 1); 
   63 #define SR_TABLE(len)                                              \ 
   64 static av_cold void TX_TAB(ff_tx_init_tab_ ##len)(void)            \ 
   66     double freq = 2*M_PI/len;                                      \ 
   67     TXSample *tab = TX_TAB(ff_tx_tab_ ##len);                      \ 
   69     for (int i = 0; i < len/4; i++)                                \ 
   70         *tab++ = RESCALE(cos(i*freq));                             \ 
   78 #define SR_TABLE(len) TX_TAB(ff_tx_init_tab_ ##len), 
   84 #define SR_TABLE(len) AV_ONCE_INIT, 
   92     TX_TAB(ff_tx_tab_53)[0] = RESCALE(cos(2 * 
M_PI /  5));
 
   93     TX_TAB(ff_tx_tab_53)[1] = RESCALE(cos(2 * 
M_PI /  5));
 
   94     TX_TAB(ff_tx_tab_53)[2] = RESCALE(cos(2 * 
M_PI / 10));
 
   95     TX_TAB(ff_tx_tab_53)[3] = RESCALE(cos(2 * 
M_PI / 10));
 
   96     TX_TAB(ff_tx_tab_53)[4] = RESCALE(sin(2 * 
M_PI /  5));
 
   97     TX_TAB(ff_tx_tab_53)[5] = RESCALE(sin(2 * 
M_PI /  5));
 
   98     TX_TAB(ff_tx_tab_53)[6] = RESCALE(sin(2 * 
M_PI / 10));
 
   99     TX_TAB(ff_tx_tab_53)[7] = RESCALE(sin(2 * 
M_PI / 10));
 
  102     TX_TAB(ff_tx_tab_53)[ 8] = RESCALE(cos(2 * 
M_PI / 12));
 
  103     TX_TAB(ff_tx_tab_53)[ 9] = RESCALE(cos(2 * 
M_PI / 12));
 
  104     TX_TAB(ff_tx_tab_53)[10] = RESCALE(cos(2 * 
M_PI /  6));
 
  105     TX_TAB(ff_tx_tab_53)[11] = RESCALE(cos(8 * 
M_PI /  6));
 
  110     TX_TAB(ff_tx_tab_7)[0] = RESCALE(cos(2 * 
M_PI /  7));
 
  111     TX_TAB(ff_tx_tab_7)[1] = RESCALE(sin(2 * 
M_PI /  7));
 
  112     TX_TAB(ff_tx_tab_7)[2] = RESCALE(sin(2 * 
M_PI / 28));
 
  113     TX_TAB(ff_tx_tab_7)[3] = RESCALE(cos(2 * 
M_PI / 28));
 
  114     TX_TAB(ff_tx_tab_7)[4] = RESCALE(cos(2 * 
M_PI / 14));
 
  115     TX_TAB(ff_tx_tab_7)[5] = RESCALE(sin(2 * 
M_PI / 14));
 
  120     TX_TAB(ff_tx_tab_9)[0] = RESCALE(cos(2 * 
M_PI /  3));
 
  121     TX_TAB(ff_tx_tab_9)[1] = RESCALE(sin(2 * 
M_PI /  3));
 
  122     TX_TAB(ff_tx_tab_9)[2] = RESCALE(cos(2 * 
M_PI /  9));
 
  123     TX_TAB(ff_tx_tab_9)[3] = RESCALE(sin(2 * 
M_PI /  9));
 
  124     TX_TAB(ff_tx_tab_9)[4] = RESCALE(cos(2 * 
M_PI / 36));
 
  125     TX_TAB(ff_tx_tab_9)[5] = RESCALE(sin(2 * 
M_PI / 36));
 
  126     TX_TAB(ff_tx_tab_9)[6] = TX_TAB(ff_tx_tab_9)[2] + TX_TAB(ff_tx_tab_9)[5];
 
  127     TX_TAB(ff_tx_tab_9)[7] = TX_TAB(ff_tx_tab_9)[3] - TX_TAB(ff_tx_tab_9)[4];
 
  146         int idx = factor_2 - 3;
 
  147         for (
int i = 0; 
i <= idx; 
i++)
 
  175     const TXSample *
tab = TX_TAB(ff_tx_tab_53);
 
  188     mtmp[0] = (int64_t)
tab[ 8] * 
tmp[1].
re;
 
  189     mtmp[1] = (int64_t)
tab[ 9] * 
tmp[1].
im;
 
  190     mtmp[2] = (int64_t)
tab[10] * 
tmp[2].
re;
 
  191     mtmp[3] = (int64_t)
tab[10] * 
tmp[2].
im;
 
  192     out[1*
stride].re = 
tmp[0].re - (mtmp[2] + mtmp[0] + 0x40000000 >> 31);
 
  193     out[1*
stride].im = 
tmp[0].im - (mtmp[3] - mtmp[1] + 0x40000000 >> 31);
 
  194     out[2*
stride].re = 
tmp[0].re - (mtmp[2] - mtmp[0] + 0x40000000 >> 31);
 
  195     out[2*
stride].im = 
tmp[0].im - (mtmp[3] + mtmp[1] + 0x40000000 >> 31);
 
  208 #define DECL_FFT5(NAME, D0, D1, D2, D3, D4)                         \ 
  209 static av_always_inline void NAME(TXComplex *out, TXComplex *in,    \ 
  212     TXComplex dc, z0[4], t[6];                                      \ 
  213     const TXSample *tab = TX_TAB(ff_tx_tab_53);                     \ 
  216     BF(t[1].im, t[0].re, in[1].re, in[4].re);                       \ 
  217     BF(t[1].re, t[0].im, in[1].im, in[4].im);                       \ 
  218     BF(t[3].im, t[2].re, in[2].re, in[3].re);                       \ 
  219     BF(t[3].re, t[2].im, in[2].im, in[3].im);                       \ 
  221     out[D0*stride].re = dc.re + t[0].re + t[2].re;                  \ 
  222     out[D0*stride].im = dc.im + t[0].im + t[2].im;                  \ 
  224     SMUL(t[4].re, t[0].re, tab[0], tab[2], t[2].re, t[0].re);       \ 
  225     SMUL(t[4].im, t[0].im, tab[0], tab[2], t[2].im, t[0].im);       \ 
  226     CMUL(t[5].re, t[1].re, tab[4], tab[6], t[3].re, t[1].re);       \ 
  227     CMUL(t[5].im, t[1].im, tab[4], tab[6], t[3].im, t[1].im);       \ 
  229     BF(z0[0].re, z0[3].re, t[0].re, t[1].re);                       \ 
  230     BF(z0[0].im, z0[3].im, t[0].im, t[1].im);                       \ 
  231     BF(z0[2].re, z0[1].re, t[4].re, t[5].re);                       \ 
  232     BF(z0[2].im, z0[1].im, t[4].im, t[5].im);                       \ 
  234     out[D1*stride].re = dc.re + z0[3].re;                           \ 
  235     out[D1*stride].im = dc.im + z0[0].im;                           \ 
  236     out[D2*stride].re = dc.re + z0[2].re;                           \ 
  237     out[D2*stride].im = dc.im + z0[1].im;                           \ 
  238     out[D3*stride].re = dc.re + z0[1].re;                           \ 
  239     out[D3*stride].im = dc.im + z0[2].im;                           \ 
  240     out[D4*stride].re = dc.re + z0[0].re;                           \ 
  241     out[D4*stride].im = dc.im + z0[3].im;                           \ 
  266     out[0*
stride].re = 
dc.re + t[0].re + t[2].re + t[4].re;
 
  267     out[0*
stride].im = 
dc.im + t[0].im + t[2].im + t[4].im;
 
  270     mtmp[ 0] = ((int64_t)
tab[0].
re)*t[0].re - ((int64_t)
tab[2].
re)*t[4].re;
 
  271     mtmp[ 1] = ((int64_t)
tab[0].
re)*t[4].re - ((int64_t)
tab[1].
re)*t[0].re;
 
  272     mtmp[ 2] = ((int64_t)
tab[0].
re)*t[2].re - ((int64_t)
tab[2].
re)*t[0].re;
 
  273     mtmp[ 3] = ((int64_t)
tab[0].
re)*t[0].im - ((int64_t)
tab[1].
re)*t[2].im;
 
  274     mtmp[ 4] = ((int64_t)
tab[0].
re)*t[4].im - ((int64_t)
tab[1].
re)*t[0].im;
 
  275     mtmp[ 5] = ((int64_t)
tab[0].
re)*t[2].im - ((int64_t)
tab[2].
re)*t[0].im;
 
  277     mtmp[ 6] = ((int64_t)
tab[2].
im)*t[1].im + ((int64_t)
tab[1].
im)*t[5].im;
 
  278     mtmp[ 7] = ((int64_t)
tab[0].
im)*t[5].im + ((int64_t)
tab[2].
im)*t[3].im;
 
  279     mtmp[ 8] = ((int64_t)
tab[2].
im)*t[5].im + ((int64_t)
tab[1].
im)*t[3].im;
 
  280     mtmp[ 9] = ((int64_t)
tab[0].
im)*t[1].re + ((int64_t)
tab[1].
im)*t[3].re;
 
  281     mtmp[10] = ((int64_t)
tab[2].
im)*t[3].re + ((int64_t)
tab[0].
im)*t[5].re;
 
  282     mtmp[11] = ((int64_t)
tab[2].
im)*t[1].re + ((int64_t)
tab[1].
im)*t[5].re;
 
  284     z[0].re = (
int32_t)(mtmp[ 0] - ((int64_t)
tab[1].re)*t[2].
re + 0x40000000 >> 31);
 
  285     z[1].re = (
int32_t)(mtmp[ 1] - ((int64_t)
tab[2].re)*t[2].
re + 0x40000000 >> 31);
 
  286     z[2].re = (
int32_t)(mtmp[ 2] - ((int64_t)
tab[1].re)*t[4].
re + 0x40000000 >> 31);
 
  287     z[0].im = (
int32_t)(mtmp[ 3] - ((int64_t)
tab[2].re)*t[4].
im + 0x40000000 >> 31);
 
  288     z[1].im = (
int32_t)(mtmp[ 4] - ((int64_t)
tab[2].re)*t[2].
im + 0x40000000 >> 31);
 
  289     z[2].im = (
int32_t)(mtmp[ 5] - ((int64_t)
tab[1].re)*t[4].
im + 0x40000000 >> 31);
 
  291     t[0].re = (
int32_t)(mtmp[ 6] - ((int64_t)
tab[0].im)*t[3].
im + 0x40000000 >> 31);
 
  292     t[2].re = (
int32_t)(mtmp[ 7] - ((int64_t)
tab[1].im)*t[1].
im + 0x40000000 >> 31);
 
  293     t[4].re = (
int32_t)(mtmp[ 8] + ((int64_t)
tab[0].im)*t[1].
im + 0x40000000 >> 31);
 
  294     t[0].im = (
int32_t)(mtmp[ 9] + ((int64_t)
tab[2].im)*t[5].
re + 0x40000000 >> 31);
 
  295     t[2].im = (
int32_t)(mtmp[10] - ((int64_t)
tab[1].im)*t[1].
re + 0x40000000 >> 31);
 
  296     t[4].im = (
int32_t)(mtmp[11] - ((int64_t)
tab[0].im)*t[3].
re + 0x40000000 >> 31);
 
  298     z[0].re = 
tab[0].re*t[0].re - 
tab[2].re*t[4].re - 
tab[1].re*t[2].re;
 
  299     z[1].re = 
tab[0].re*t[4].re - 
tab[1].re*t[0].re - 
tab[2].re*t[2].re;
 
  300     z[2].re = 
tab[0].re*t[2].re - 
tab[2].re*t[0].re - 
tab[1].re*t[4].re;
 
  301     z[0].im = 
tab[0].re*t[0].im - 
tab[1].re*t[2].im - 
tab[2].re*t[4].im;
 
  302     z[1].im = 
tab[0].re*t[4].im - 
tab[1].re*t[0].im - 
tab[2].re*t[2].im;
 
  303     z[2].im = 
tab[0].re*t[2].im - 
tab[2].re*t[0].im - 
tab[1].re*t[4].im;
 
  308     t[0].re = 
tab[2].im*t[1].im + 
tab[1].im*t[5].im - 
tab[0].im*t[3].im;
 
  309     t[2].re = 
tab[0].im*t[5].im + 
tab[2].im*t[3].im - 
tab[1].im*t[1].im;
 
  310     t[4].re = 
tab[2].im*t[5].im + 
tab[1].im*t[3].im + 
tab[0].im*t[1].im;
 
  311     t[0].im = 
tab[0].im*t[1].re + 
tab[1].im*t[3].re + 
tab[2].im*t[5].re;
 
  312     t[2].im = 
tab[2].im*t[3].re + 
tab[0].im*t[5].re - 
tab[1].im*t[1].re;
 
  313     t[4].im = 
tab[2].im*t[1].re + 
tab[1].im*t[5].re - 
tab[0].im*t[3].re;
 
  356     w[0].re = t[0].re - t[6].re;
 
  357     w[0].im = t[0].im - t[6].im;
 
  358     w[1].re = t[2].re - t[6].re;
 
  359     w[1].im = t[2].im - t[6].im;
 
  360     w[2].re = t[1].re - t[7].re;
 
  361     w[2].im = t[1].im - t[7].im;
 
  362     w[3].re = t[3].re + t[7].re;
 
  363     w[3].im = t[3].im + t[7].im;
 
  365     z[0].re = 
dc.re + t[4].re;
 
  366     z[0].im = 
dc.im + t[4].im;
 
  368     z[1].re = t[0].re + t[2].re + t[6].re;
 
  369     z[1].im = t[0].im + t[2].im + t[6].im;
 
  375     mtmp[0] = t[1].re - t[3].re + t[7].re;
 
  376     mtmp[1] = t[1].im - t[3].im + t[7].im;
 
  378     y[3].re = (
int32_t)(((int64_t)
tab[0].
im)*mtmp[0] + 0x40000000 >> 31);
 
  379     y[3].im = (
int32_t)(((int64_t)
tab[0].im)*mtmp[1] + 0x40000000 >> 31);
 
  381     mtmp[0] = (
int32_t)(((int64_t)
tab[0].re)*z[1].
re + 0x40000000 >> 31);
 
  382     mtmp[1] = (
int32_t)(((int64_t)
tab[0].re)*z[1].
im + 0x40000000 >> 31);
 
  383     mtmp[2] = (
int32_t)(((int64_t)
tab[0].re)*t[4].
re + 0x40000000 >> 31);
 
  384     mtmp[3] = (
int32_t)(((int64_t)
tab[0].re)*t[4].
im + 0x40000000 >> 31);
 
  386     x[3].re = z[0].re  + (
int32_t)mtmp[0];
 
  387     x[3].im = z[0].im  + (
int32_t)mtmp[1];
 
  388     z[0].re = in[0].re + (
int32_t)mtmp[2];
 
  389     z[0].im = in[0].im + (
int32_t)mtmp[3];
 
  391     mtmp[0] = ((int64_t)
tab[1].
re)*
w[0].re;
 
  392     mtmp[1] = ((int64_t)
tab[1].
re)*
w[0].im;
 
  393     mtmp[2] = ((int64_t)
tab[2].
im)*
w[0].re;
 
  394     mtmp[3] = ((int64_t)
tab[2].
im)*
w[0].im;
 
  395     mtmp[4] = ((int64_t)
tab[1].
im)*
w[2].re;
 
  396     mtmp[5] = ((int64_t)
tab[1].
im)*
w[2].im;
 
  397     mtmp[6] = ((int64_t)
tab[2].
re)*
w[2].re;
 
  398     mtmp[7] = ((int64_t)
tab[2].
re)*
w[2].im;
 
  400     x[1].re = (
int32_t)(mtmp[0] + ((int64_t)
tab[2].im)*
w[1].
re + 0x40000000 >> 31);
 
  401     x[1].im = (
int32_t)(mtmp[1] + ((int64_t)
tab[2].im)*
w[1].
im + 0x40000000 >> 31);
 
  402     x[2].re = (
int32_t)(mtmp[2] - ((int64_t)
tab[3].re)*
w[1].
re + 0x40000000 >> 31);
 
  403     x[2].im = (
int32_t)(mtmp[3] - ((int64_t)
tab[3].re)*
w[1].
im + 0x40000000 >> 31);
 
  404     y[1].re = (
int32_t)(mtmp[4] + ((int64_t)
tab[2].re)*
w[3].
re + 0x40000000 >> 31);
 
  405     y[1].im = (
int32_t)(mtmp[5] + ((int64_t)
tab[2].re)*
w[3].
im + 0x40000000 >> 31);
 
  406     y[2].re = (
int32_t)(mtmp[6] - ((int64_t)
tab[3].im)*
w[3].
re + 0x40000000 >> 31);
 
  407     y[2].im = (
int32_t)(mtmp[7] - ((int64_t)
tab[3].im)*
w[3].
im + 0x40000000 >> 31);
 
  409     y[0].re = (
int32_t)(((int64_t)
tab[0].im)*t[5].
re + 0x40000000 >> 31);
 
  410     y[0].im = (
int32_t)(((int64_t)
tab[0].im)*t[5].
im + 0x40000000 >> 31);
 
  413     y[3].re = 
tab[0].im*(t[1].re - t[3].re + t[7].re);
 
  414     y[3].im = 
tab[0].im*(t[1].im - t[3].im + t[7].im);
 
  416     x[3].re = z[0].re  + 
tab[0].re*z[1].re;
 
  417     x[3].im = z[0].im  + 
tab[0].re*z[1].im;
 
  418     z[0].re = 
dc.re + 
tab[0].re*t[4].re;
 
  419     z[0].im = 
dc.im + 
tab[0].re*t[4].im;
 
  421     x[1].re = 
tab[1].re*
w[0].re + 
tab[2].im*
w[1].re;
 
  422     x[1].im = 
tab[1].re*
w[0].im + 
tab[2].im*
w[1].im;
 
  423     x[2].re = 
tab[2].im*
w[0].re - 
tab[3].re*
w[1].re;
 
  424     x[2].im = 
tab[2].im*
w[0].im - 
tab[3].re*
w[1].im;
 
  425     y[1].re = 
tab[1].im*
w[2].re + 
tab[2].re*
w[3].re;
 
  426     y[1].im = 
tab[1].im*
w[2].im + 
tab[2].re*
w[3].im;
 
  427     y[2].re = 
tab[2].re*
w[2].re - 
tab[3].im*
w[3].re;
 
  428     y[2].im = 
tab[2].re*
w[2].im - 
tab[3].im*
w[3].im;
 
  430     y[0].re = 
tab[0].im*t[5].re;
 
  431     y[0].im = 
tab[0].im*t[5].im;
 
  434     x[4].re = x[1].re + x[2].re;
 
  435     x[4].im = x[1].im + x[2].im;
 
  437     y[4].re = y[1].re - y[2].re;
 
  438     y[4].im = y[1].im - y[2].im;
 
  439     x[1].re = z[0].re + x[1].re;
 
  440     x[1].im = z[0].im + x[1].im;
 
  441     y[1].re = y[0].re + y[1].re;
 
  442     y[1].im = y[0].im + y[1].im;
 
  443     x[2].re = z[0].re + x[2].re;
 
  444     x[2].im = z[0].im + x[2].im;
 
  445     y[2].re = y[2].re - y[0].re;
 
  446     y[2].im = y[2].im - y[0].im;
 
  447     x[4].re = z[0].re - x[4].re;
 
  448     x[4].im = z[0].im - x[4].im;
 
  449     y[4].re = y[0].re - y[4].re;
 
  450     y[4].im = y[0].im - y[4].im;
 
  467     for (
int i = 0; 
i < 5; 
i++)
 
  493 #define DECL_FACTOR_S(n)                                                       \ 
  494 static void TX_NAME(ff_tx_fft##n)(AVTXContext *s, void *dst,                   \ 
  495                                   void *src, ptrdiff_t stride)                 \ 
  497     fft##n((TXComplex *)dst, (TXComplex *)src, stride / sizeof(TXComplex));    \ 
  499 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = {                    \ 
  500     .name       = TX_NAME_STR("fft" #n "_ns"),                                 \ 
  501     .function   = TX_NAME(ff_tx_fft##n),                                       \ 
  502     .type       = TX_TYPE(FFT),                                                \ 
  503     .flags      = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE |                         \ 
  504                   AV_TX_UNALIGNED | FF_TX_PRESHUFFLE,                          \ 
  509     .init       = TX_NAME(ff_tx_fft_factor_init),                              \ 
  510     .cpu_flags  = FF_TX_CPU_FLAGS_ALL,                                         \ 
  511     .prio       = FF_TX_PRIO_BASE,                                             \ 
  514 #define DECL_FACTOR_F(n)                                                       \ 
  516 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_fwd_def) = {                   \ 
  517     .name       = TX_NAME_STR("fft" #n "_fwd"),                                \ 
  518     .function   = TX_NAME(ff_tx_fft##n),                                       \ 
  519     .type       = TX_TYPE(FFT),                                                \ 
  520     .flags      = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE |                         \ 
  521                   AV_TX_UNALIGNED | FF_TX_FORWARD_ONLY,                        \ 
  526     .init       = TX_NAME(ff_tx_fft_factor_init),                              \ 
  527     .cpu_flags  = FF_TX_CPU_FLAGS_ALL,                                         \ 
  528     .prio       = FF_TX_PRIO_BASE,                                             \ 
  537 #define BUTTERFLIES(a0, a1, a2, a3)            \ 
  543         BF(t3, t5, t5, t1);                    \ 
  544         BF(a2.re, a0.re, r0, t5);              \ 
  545         BF(a3.im, a1.im, i1, t3);              \ 
  546         BF(t4, t6, t2, t6);                    \ 
  547         BF(a3.re, a1.re, r1, t4);              \ 
  548         BF(a2.im, a0.im, i0, t6);              \ 
  551 #define TRANSFORM(a0, a1, a2, a3, wre, wim)    \ 
  553         CMUL(t1, t2, a2.re, a2.im, wre, -wim); \ 
  554         CMUL(t5, t6, a3.re, a3.im, wre,  wim); \ 
  555         BUTTERFLIES(a0, a1, a2, a3);           \ 
  560                                                  const TXSample *cos, 
int len)
 
  565     const TXSample *wim = cos + o1 - 7;
 
  568     for (
int i = 0; 
i < 
len; 
i += 4) {
 
  569         TRANSFORM(z[0], z[o1 + 0], z[o2 + 0], z[o3 + 0], cos[0], wim[7]);
 
  570         TRANSFORM(z[2], z[o1 + 2], z[o2 + 2], z[o3 + 2], cos[2], wim[5]);
 
  571         TRANSFORM(z[4], z[o1 + 4], z[o2 + 4], z[o3 + 4], cos[4], wim[3]);
 
  572         TRANSFORM(z[6], z[o1 + 6], z[o2 + 6], z[o3 + 6], cos[6], wim[1]);
 
  574         TRANSFORM(z[1], z[o1 + 1], z[o2 + 1], z[o3 + 1], cos[1], wim[6]);
 
  575         TRANSFORM(z[3], z[o1 + 3], z[o2 + 3], z[o3 + 3], cos[3], wim[4]);
 
  576         TRANSFORM(z[5], z[o1 + 5], z[o2 + 5], z[o3 + 5], cos[5], wim[2]);
 
  577         TRANSFORM(z[7], z[o1 + 7], z[o2 + 7], z[o3 + 7], cos[7], wim[0]);
 
  596 #define DECL_SR_CODELET_DEF(n)                              \ 
  597 static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \ 
  598     .name       = TX_NAME_STR("fft" #n "_ns"),              \ 
  599     .function   = TX_NAME(ff_tx_fft##n##_ns),               \ 
  600     .type       = TX_TYPE(FFT),                             \ 
  601     .flags      = FF_TX_OUT_OF_PLACE | AV_TX_INPLACE |      \ 
  602                   AV_TX_UNALIGNED | FF_TX_PRESHUFFLE,       \ 
  607     .init       = TX_NAME(ff_tx_fft_sr_codelet_init),       \ 
  608     .cpu_flags  = FF_TX_CPU_FLAGS_ALL,                      \ 
  609     .prio       = FF_TX_PRIO_BASE,                          \ 
  612 #define DECL_SR_CODELET(n, n2, n4)                                    \ 
  613 static void TX_NAME(ff_tx_fft##n##_ns)(AVTXContext *s, void *_dst,    \ 
  614                                         void *_src, ptrdiff_t stride) \ 
  616     TXComplex *src = _src;                                            \ 
  617     TXComplex *dst = _dst;                                            \ 
  618     const TXSample *cos = TX_TAB(ff_tx_tab_##n);                      \ 
  620     TX_NAME(ff_tx_fft##n2##_ns)(s, dst,        src,        stride);   \ 
  621     TX_NAME(ff_tx_fft##n4##_ns)(s, dst + n4*2, src + n4*2, stride);   \ 
  622     TX_NAME(ff_tx_fft##n4##_ns)(s, dst + n4*3, src + n4*3, stride);   \ 
  623     TX_NAME(ff_tx_fft_sr_combine)(dst, cos, n4 >> 1);                 \ 
  626 DECL_SR_CODELET_DEF(n) 
  629                                    void *_src, ptrdiff_t 
stride)
 
  641                                    void *_src, ptrdiff_t 
stride)
 
  658                                    void *_src, ptrdiff_t 
stride)
 
  663     const TXSample cos = TX_TAB(ff_tx_tab_8)[1];
 
  673     TRANSFORM(dst[1], dst[3], dst[5], dst[7], cos, cos);
 
  677                                     void *_src, ptrdiff_t 
stride)
 
  681     const TXSample *cos = TX_TAB(ff_tx_tab_16);
 
  684     TXSample cos_16_1 = cos[1];
 
  685     TXSample cos_16_2 = cos[2];
 
  686     TXSample cos_16_3 = cos[3];
 
  698     TRANSFORM(dst[ 2], dst[ 6], dst[10], dst[14], cos_16_2, cos_16_2);
 
  699     TRANSFORM(dst[ 1], dst[ 5], dst[ 9], dst[13], cos_16_1, cos_16_3);
 
  700     TRANSFORM(dst[ 3], dst[ 7], dst[11], dst[15], cos_16_3, cos_16_1);
 
  761                                void *_src, ptrdiff_t 
stride)
 
  766     int *
map = 
s->sub[0].map;
 
  771     for (
int i = 0; 
i < 
len; 
i++)
 
  774     s->fn[0](&
s->sub[0], dst2, dst1, 
stride);
 
  778                                        void *_src, ptrdiff_t 
stride)
 
  783     const int *
map = 
s->sub->map;
 
  784     const int *inplace_idx = 
s->map;
 
  785     int src_idx, dst_idx;
 
  787     src_idx = *inplace_idx++;
 
  790         dst_idx = 
map[src_idx];
 
  793             dst_idx = 
map[dst_idx];
 
  794         } 
while (dst_idx != src_idx); 
 
  796     } 
while ((src_idx = *inplace_idx++));
 
  802     .
name       = TX_NAME_STR(
"fft"),
 
  816     .
name       = TX_NAME_STR(
"fft_inplace_small"),
 
  830     .
name       = TX_NAME_STR(
"fft_inplace"),
 
  855     for (
int i = 0; 
i < 
len; 
i++) {
 
  856         for (
int j = 0; j < 
len; j++) {
 
  857             const double factor = phase*
i*j;
 
  873     const int n = 
s->len;
 
  874     double phase = 
s->inv ? 2.0*
M_PI/n : -2.0*
M_PI/n;
 
  878     for (
int i = 0; 
i < n; 
i++) {
 
  880         for (
int j = 0; j < n; j++) {
 
  881             const double factor = phase*
i*j;
 
  900     const int n = 
s->len;
 
  904     for (
int i = 0; 
i < n; 
i++) {
 
  906         for (
int j = 0; j < n; j++) {
 
  918     .
name       = TX_NAME_STR(
"fft_naive_small"),
 
  932     .
name       = TX_NAME_STR(
"fft_naive"),
 
  954     size_t extra_tmp_len = 0;
 
  961     for (
int i = 0; 
i < 
ret; 
i++) {
 
  962         int len1 = len_list[
i];
 
  963         int len2 = 
len / len1;
 
  966         if (len2 & (len2 - 1))
 
  981         } 
else if (
ret < 0) { 
 
 1002         } 
else if (
ret < 0) { 
 
 1009             } 
else if (
ret < 0) {
 
 1029                                           s->sub[0].len, 
s->sub[1].len)))
 
 1036     tmp = (
int *)
s->tmp;
 
 1037     for (
int k = 0; k < 
len; k += 
s->sub[0].len) {
 
 1038         memcpy(
tmp, &
s->map[k], 
s->sub[0].len*
sizeof(*
tmp));
 
 1039         for (
int i = 0; 
i < 
s->sub[0].len; 
i++)
 
 1040             s->map[k + 
i] = 
tmp[
s->sub[0].map[
i]];
 
 1045         extra_tmp_len = 
len;
 
 1047         extra_tmp_len = 
s->sub[0].len;
 
 1049     if (extra_tmp_len && !(
s->exp = 
av_malloc(extra_tmp_len*
sizeof(*
s->exp))))
 
 1056                                    void *_in, ptrdiff_t 
stride)
 
 1058     const int n = 
s->sub[0].len, m = 
s->sub[1].len, l = 
s->len;
 
 1059     const int *in_map = 
s->map, *out_map = in_map + l;
 
 1060     const int *sub_map = 
s->sub[1].map;
 
 1066     for (
int i = 0; 
i < m; 
i++) {
 
 1067         for (
int j = 0; j < n; j++)
 
 1068             s->exp[j] = in[in_map[
i*n + j]];
 
 1069         s->fn[0](&
s->sub[0], &
s->tmp[sub_map[
i]], 
s->exp, m*
sizeof(
TXComplex));
 
 1072     for (
int i = 0; 
i < n; 
i++)
 
 1073         s->fn[1](&
s->sub[1], &tmp1[m*
i], &
s->tmp[m*
i], 
sizeof(
TXComplex));
 
 1075     for (
int i = 0; 
i < l; 
i++)
 
 1080                                       void *_in, ptrdiff_t 
stride)
 
 1082     const int n = 
s->sub[0].len, m = 
s->sub[1].len, l = 
s->len;
 
 1083     const int *in_map = 
s->map, *out_map = in_map + l;
 
 1084     const int *sub_map = 
s->sub[1].map;
 
 1090     for (
int i = 0; 
i < m; 
i++)
 
 1091         s->fn[0](&
s->sub[0], &
s->tmp[sub_map[
i]], &in[
i*n], m*
sizeof(
TXComplex));
 
 1093     for (
int i = 0; 
i < n; 
i++)
 
 1094         s->fn[1](&
s->sub[1], &tmp1[m*
i], &
s->tmp[m*
i], 
sizeof(
TXComplex));
 
 1096     for (
int i = 0; 
i < l; 
i++)
 
 1101     .
name       = TX_NAME_STR(
"fft_pfa"),
 
 1115     .
name       = TX_NAME_STR(
"fft_pfa_ns"),
 
 1136     s->scale_d = *((SCALE_TYPE *)
scale);
 
 1137     s->scale_f = 
s->scale_d;
 
 1142                                           void *_src, ptrdiff_t 
stride)
 
 1144     TXSample *
src = _src;
 
 1145     TXSample *dst = _dst;
 
 1146     double scale = 
s->scale_d;
 
 1148     const double phase = 
M_PI/(4.0*
len);
 
 1152     for (
int i = 0; 
i < 
len; 
i++) {
 
 1154         for (
int j = 0; j < 
len*2; j++) {
 
 1155             int a = (2*j + 1 + 
len) * (2*
i + 1);
 
 1156             sum += UNSCALE(
src[j]) * cos(
a * phase);
 
 1163                                           void *_src, ptrdiff_t 
stride)
 
 1165     TXSample *
src = _src;
 
 1166     TXSample *dst = _dst;
 
 1167     double scale = 
s->scale_d;
 
 1168     int len = 
s->len >> 1;
 
 1170     const double phase = 
M_PI/(4.0*len2);
 
 1174     for (
int i = 0; 
i < 
len; 
i++) {
 
 1177         double i_d = phase * (4*
len  - 2*
i - 1);
 
 1178         double i_u = phase * (3*len2 + 2*
i + 1);
 
 1179         for (
int j = 0; j < len2; j++) {
 
 1180             double a = (2 * j + 1);
 
 1181             double a_d = cos(
a * i_d);
 
 1182             double a_u = cos(
a * i_u);
 
 1193     .
name       = TX_NAME_STR(
"mdct_naive_fwd"),
 
 1207     .
name       = TX_NAME_STR(
"mdct_naive_inv"),
 
 1232     s->scale_d = *((SCALE_TYPE *)
scale);
 
 1233     s->scale_f = 
s->scale_d;
 
 1253         memcpy(
s->map, 
s->sub->map, (
len >> 1)*
sizeof(*
s->map));
 
 1255         for (
int i = 0; i < len >> 1; 
i++)
 
 1264         for (
int i = 0; 
i < (
s->len >> 1); 
i++)
 
 1273     TXSample *
src = _src, *dst = _dst;
 
 1275     const int len2 = 
s->len >> 1;
 
 1276     const int len4 = 
s->len >> 2;
 
 1277     const int len3 = len2 * 3;
 
 1278     const int *sub_map = 
s->map;
 
 1282     for (
int i = 0; 
i < len2; 
i++) { 
 
 1284         const int idx = sub_map[
i];
 
 1286             tmp.re = FOLD(-
src[ len2 + k],  
src[1*len2 - 1 - k]);
 
 1287             tmp.im = FOLD(-
src[ len3 + k], -
src[1*len3 - 1 - k]);
 
 1289             tmp.re = FOLD(-
src[ len2 + k], -
src[5*len2 - 1 - k]);
 
 1290             tmp.im = FOLD( 
src[-len2 + k], -
src[1*len3 - 1 - k]);
 
 1297     for (
int i = 0; 
i < len4; 
i++) {
 
 1298         const int i0 = len4 + 
i, i1 = len4 - 
i - 1;
 
 1313     const TXSample *
src = _src, *in1, *in2;
 
 1314     const int len2 = 
s->len >> 1;
 
 1315     const int len4 = 
s->len >> 2;
 
 1316     const int *sub_map = 
s->map;
 
 1322     for (
int i = 0; 
i < len2; 
i++) {
 
 1331     for (
int i = 0; 
i < len4; 
i++) {
 
 1332         const int i0 = len4 + 
i, i1 = len4 - 
i - 1;
 
 1342     .
name       = TX_NAME_STR(
"mdct_fwd"),
 
 1356     .
name       = TX_NAME_STR(
"mdct_inv"),
 
 1378     s->scale_d = *((SCALE_TYPE *)
scale);
 
 1379     s->scale_f = 
s->scale_d;
 
 1390                                          void *_src, ptrdiff_t 
stride)
 
 1392     int len  = 
s->len << 1;
 
 1393     int len2 = 
len >> 1;
 
 1394     int len4 = 
len >> 2;
 
 1395     TXSample *dst = _dst;
 
 1397     s->fn[0](&
s->sub[0], dst + len4, _src, 
stride);
 
 1401     for (
int i = 0; 
i < len4; 
i++) {
 
 1408     .
name       = TX_NAME_STR(
"mdct_inv_full"),
 
 1433     sub_len = 
len / cd->factors[0];
 
 1435     s->scale_d = *((SCALE_TYPE *)
scale);
 
 1436     s->scale_f = 
s->scale_d;
 
 1443                                 sub_len, inv, 
scale)))
 
 1450     if (cd->factors[0] == 15)
 
 1457     for (
int i = 0; 
i < 
len; 
i++)
 
 1468 #define DECL_COMP_IMDCT(N)                                                     \ 
 1469 static void TX_NAME(ff_tx_mdct_pfa_##N##xM_inv)(AVTXContext *s, void *_dst,    \ 
 1470                                                 void *_src, ptrdiff_t stride)  \ 
 1472     TXComplex fft##N##in[N];                                                   \ 
 1473     TXComplex *z = _dst, *exp = s->exp;                                        \ 
 1474     const TXSample *src = _src, *in1, *in2;                                    \ 
 1475     const int len4 = s->len >> 2;                                              \ 
 1476     const int len2 = s->len >> 1;                                              \ 
 1477     const int m = s->sub->len;                                                 \ 
 1478     const int *in_map = s->map, *out_map = in_map + N*m;                       \ 
 1479     const int *sub_map = s->sub->map;                                          \ 
 1481     stride /= sizeof(*src);                      \ 
 1483     in2 = src + ((N*m*2) - 1) * stride;                                        \ 
 1485     for (int i = 0; i < len2; i += N) {                                        \ 
 1486         for (int j = 0; j < N; j++) {                                          \ 
 1487             const int k = in_map[j];                                           \ 
 1488             TXComplex tmp = { in2[-k*stride], in1[k*stride] };                 \ 
 1489             CMUL3(fft##N##in[j], tmp, exp[j]);                                 \ 
 1491         fft##N(s->tmp + *(sub_map++), fft##N##in, m);                          \ 
 1496     for (int i = 0; i < N; i++)                                                \ 
 1497         s->fn[0](&s->sub[0], s->tmp + m*i, s->tmp + m*i, sizeof(TXComplex));   \ 
 1499     for (int i = 0; i < len4; i++) {                                           \ 
 1500         const int i0 = len4 + i, i1 = len4 - i - 1;                            \ 
 1501         const int s0 = out_map[i0], s1 = out_map[i1];                          \ 
 1502         TXComplex src1 = { s->tmp[s1].im, s->tmp[s1].re };                     \ 
 1503         TXComplex src0 = { s->tmp[s0].im, s->tmp[s0].re };                     \ 
 1505         CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re);    \ 
 1506         CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re);    \ 
 1510 static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_inv_def) = {           \ 
 1511     .name       = TX_NAME_STR("mdct_pfa_" #N "xM_inv"),                        \ 
 1512     .function   = TX_NAME(ff_tx_mdct_pfa_##N##xM_inv),                         \ 
 1513     .type       = TX_TYPE(MDCT),                                               \ 
 1514     .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,   \ 
 1515     .factors    = { N, TX_FACTOR_ANY },                                        \ 
 1518     .max_len    = TX_LEN_UNLIMITED,                                            \ 
 1519     .init       = TX_NAME(ff_tx_mdct_pfa_init),                                \ 
 1520     .cpu_flags  = FF_TX_CPU_FLAGS_ALL,                                         \ 
 1521     .prio       = FF_TX_PRIO_BASE,                                             \ 
 1530 #define DECL_COMP_MDCT(N)                                                      \ 
 1531 static void TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd)(AVTXContext *s, void *_dst,    \ 
 1532                                                 void *_src, ptrdiff_t stride)  \ 
 1534     TXComplex fft##N##in[N];                                                   \ 
 1535     TXSample *src = _src, *dst = _dst;                                         \ 
 1536     TXComplex *exp = s->exp, tmp;                                              \ 
 1537     const int m = s->sub->len;                                                 \ 
 1538     const int len4 = N*m;                                                      \ 
 1539     const int len3 = len4 * 3;                                                 \ 
 1540     const int len8 = s->len >> 2;                                              \ 
 1541     const int *in_map = s->map, *out_map = in_map + N*m;                       \ 
 1542     const int *sub_map = s->sub->map;                                          \ 
 1544     stride /= sizeof(*dst);                                                    \ 
 1546     for (int i = 0; i < m; i++) {              \ 
 1547         for (int j = 0; j < N; j++) {                                          \ 
 1548             const int k = in_map[i*N + j];                                     \ 
 1550                 tmp.re = FOLD(-src[ len4 + k],  src[1*len4 - 1 - k]);          \ 
 1551                 tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]);          \ 
 1553                 tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]);          \ 
 1554                 tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]);          \ 
 1556             CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im,           \ 
 1557                  exp[k >> 1].re, exp[k >> 1].im);                              \ 
 1559         fft##N(s->tmp + sub_map[i], fft##N##in, m);                            \ 
 1562     for (int i = 0; i < N; i++)                                                \ 
 1563         s->fn[0](&s->sub[0], s->tmp + m*i, s->tmp + m*i, sizeof(TXComplex));   \ 
 1565     for (int i = 0; i < len8; i++) {                                           \ 
 1566         const int i0 = len8 + i, i1 = len8 - i - 1;                            \ 
 1567         const int s0 = out_map[i0], s1 = out_map[i1];                          \ 
 1568         TXComplex src1 = { s->tmp[s1].re, s->tmp[s1].im };                     \ 
 1569         TXComplex src0 = { s->tmp[s0].re, s->tmp[s0].im };                     \ 
 1571         CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im,    \ 
 1572              exp[i0].im, exp[i0].re);                                          \ 
 1573         CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im,    \ 
 1574              exp[i1].im, exp[i1].re);                                          \ 
 1578 static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd_def) = {           \ 
 1579     .name       = TX_NAME_STR("mdct_pfa_" #N "xM_fwd"),                        \ 
 1580     .function   = TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd),                         \ 
 1581     .type       = TX_TYPE(MDCT),                                               \ 
 1582     .flags      = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,   \ 
 1583     .factors    = { N, TX_FACTOR_ANY },                                        \ 
 1586     .max_len    = TX_LEN_UNLIMITED,                                            \ 
 1587     .init       = TX_NAME(ff_tx_mdct_pfa_init),                                \ 
 1588     .cpu_flags  = FF_TX_CPU_FLAGS_ALL,                                         \ 
 1589     .prio       = FF_TX_PRIO_BASE,                                             \ 
 1609     s->scale_d = *((SCALE_TYPE *)
scale);
 
 1610     s->scale_f = 
s->scale_d;
 
 1618     tab = (TXSample *)
s->exp;
 
 1622     m = (inv ? 2*
s->scale_d : 
s->scale_d);
 
 1624     *
tab++ = RESCALE((inv ? 0.5 : 1.0) * m);
 
 1625     *
tab++ = RESCALE(inv ? 0.5*m : 1.0*m);
 
 1626     *
tab++ = RESCALE( m);
 
 1627     *
tab++ = RESCALE(-m);
 
 1629     *
tab++ = RESCALE( (0.5 - 0.0) * m);
 
 1630     *
tab++ = RESCALE( (0.0 - 0.5) * m);
 
 1631     *
tab++ = RESCALE( (0.5 - inv) * m);
 
 1632     *
tab++ = RESCALE(-(0.5 - inv) * m);
 
 1634     for (
int i = 0; i < len >> 2; 
i++)
 
 1635         *
tab++ = RESCALE(cos(
i*
f));
 
 1636     for (
int i = 
len >> 2; 
i >= 0; 
i--)
 
 1637         *
tab++ = RESCALE(cos(
i*
f) * (inv ? +1.0 : -1.0));
 
 1642 #define DECL_RDFT(name, inv)                                                   \ 
 1643 static void TX_NAME(ff_tx_rdft_ ##name)(AVTXContext *s, void *_dst,            \ 
 1644                                        void *_src, ptrdiff_t stride)           \ 
 1646     const int len2 = s->len >> 1;                                              \ 
 1647     const int len4 = s->len >> 2;                                              \ 
 1648     const TXSample *fact = (void *)s->exp;                                     \ 
 1649     const TXSample *tcos = fact + 8;                                           \ 
 1650     const TXSample *tsin = tcos + len4;                                        \ 
 1651     TXComplex *data = inv ? _src : _dst;                                       \ 
 1655         s->fn[0](&s->sub[0], data, _src, sizeof(TXComplex));                   \ 
 1657         data[0].im = data[len2].re;                                            \ 
 1662     t[0].re = data[0].re;                                                      \ 
 1663     data[0].re = t[0].re + data[0].im;                                         \ 
 1664     data[0].im = t[0].re - data[0].im;                                         \ 
 1665     data[   0].re = MULT(fact[0], data[   0].re);                              \ 
 1666     data[   0].im = MULT(fact[1], data[   0].im);                              \ 
 1667     data[len4].re = MULT(fact[2], data[len4].re);                              \ 
 1668     data[len4].im = MULT(fact[3], data[len4].im);                              \ 
 1670     for (int i = 1; i < len4; i++) {                                           \ 
 1672         t[0].re = MULT(fact[4], (data[i].re + data[len2 - i].re));             \ 
 1673         t[0].im = MULT(fact[5], (data[i].im - data[len2 - i].im));             \ 
 1674         t[1].re = MULT(fact[6], (data[i].im + data[len2 - i].im));             \ 
 1675         t[1].im = MULT(fact[7], (data[i].re - data[len2 - i].re));             \ 
 1678         CMUL(t[2].re, t[2].im, t[1].re, t[1].im, tcos[i], tsin[i]);            \ 
 1680         data[       i].re = t[0].re + t[2].re;                                 \ 
 1681         data[       i].im = t[2].im - t[0].im;                                 \ 
 1682         data[len2 - i].re = t[0].re - t[2].re;                                 \ 
 1683         data[len2 - i].im = t[2].im + t[0].im;                                 \ 
 1687         s->fn[0](&s->sub[0], _dst, data, sizeof(TXComplex));                   \ 
 1690         data[len2].re = data[0].im;                                            \ 
 1691         data[   0].im = data[len2].im = 0;                                     \ 
 1699     .
name       = TX_NAME_STR(
"rdft_r2c"),
 
 1700     .function   = 
TX_NAME(ff_tx_rdft_r2c),
 
 1714     .
name       = TX_NAME_STR(
"rdft_c2r"),
 
 1715     .function   = 
TX_NAME(ff_tx_rdft_c2r),
 
 1738     SCALE_TYPE rsc = *((SCALE_TYPE *)
scale);
 
 1753     tab = (TXSample *)
s->exp;
 
 1757     for (
int i = 0; 
i < 
len; 
i++)
 
 1758         tab[
i] = RESCALE(cos(
i*freq)*(!inv + 1));
 
 1761         for (
int i = 0; 
i < 
len/2; 
i++)
 
 1762             tab[
len + 
i] = RESCALE(0.5 / sin((2*
i + 1)*freq));
 
 1764         for (
int i = 0; 
i < 
len/2; 
i++)
 
 1765             tab[
len + 
i] = RESCALE(cos((
len - 2*
i - 1)*freq));
 
 1772                                  void *_src, ptrdiff_t 
stride)
 
 1774     TXSample *dst = _dst;
 
 1775     TXSample *
src = _src;
 
 1776     const int len = 
s->len;
 
 1777     const int len2 = 
len >> 1;
 
 1778     const TXSample *
exp = (
void *)
s->exp;
 
 1783     TXSample tmp1, tmp2;
 
 1786     for (
int i = 0; 
i < len2; 
i++) {
 
 1787         TXSample in1 = 
src[
i];
 
 1788         TXSample in2 = 
src[
len - 
i - 1];
 
 1798         tmp2 = (tmp2 + 0x40000000) >> 31;
 
 1800         tmp1 = (in1 + in2)*0.5;
 
 1801         tmp2 = (in1 - in2)*
s;
 
 1804         src[
i]           = tmp1 + tmp2;
 
 1805         src[
len - 
i - 1] = tmp1 - tmp2;
 
 1812     for (
int i = 
len - 2; 
i > 0; 
i -= 2) {
 
 1823     tmp1 = ((int64_t)
exp[0]) * ((int64_t)dst[0]);
 
 1824     dst[0] = (tmp1 + 0x40000000) >> 31;
 
 1826     dst[0] = 
exp[0] * dst[0];
 
 1832                                   void *_src, ptrdiff_t 
stride)
 
 1834     TXSample *dst = _dst;
 
 1835     TXSample *
src = _src;
 
 1836     const int len = 
s->len;
 
 1837     const int len2 = 
len >> 1;
 
 1838     const TXSample *
exp = (
void *)
s->exp;
 
 1840     int64_t  tmp1, tmp2 = 
src[
len - 1];
 
 1841     tmp2 = (2*tmp2 + 0x40000000) >> 31;
 
 1843     TXSample tmp1, tmp2 = 2*
src[
len - 1];
 
 1848     for (
int i = 
len - 2; 
i >= 2; 
i -= 2) {
 
 1849         TXSample val1 = 
src[
i - 0];
 
 1850         TXSample val2 = 
src[
i - 1] - 
src[
i + 1];
 
 1855     s->fn[0](&
s->sub[0], dst, 
src, 
sizeof(
float));
 
 1857     for (
int i = 0; 
i < len2; 
i++) {
 
 1858         TXSample in1 = dst[
i];
 
 1859         TXSample in2 = dst[
len - 
i - 1];
 
 1866         tmp2 = (tmp2 + 0x40000000) >> 31;
 
 1869         dst[
i]            = tmp1 + tmp2;
 
 1870         dst[
len - 
i - 1]  = tmp1 - tmp2;
 
 1875     .
name       = TX_NAME_STR(
"dctII"),
 
 1889     .
name       = TX_NAME_STR(
"dctIII"),
 
 1905     int len4 = 
s->len >> 1;
 
 1906     double scale = 
s->scale_d;
 
 1907     const double theta = (
scale < 0 ? len4 : 0) + 1.0/8.0;
 
 1908     size_t alloc = pre_tab ? 2*len4 : len4;
 
 1918     for (
int i = 0; 
i < len4; 
i++) {
 
 1925         for (
int i = 0; 
i < len4; 
i++)
 
 1926             s->exp[
i] = 
s->exp[len4 + pre_tab[
i]];
 
 1939     &
TX_NAME(ff_tx_fft128_ns_def),
 
 1940     &
TX_NAME(ff_tx_fft256_ns_def),
 
 1941     &
TX_NAME(ff_tx_fft512_ns_def),
 
 1942     &
TX_NAME(ff_tx_fft1024_ns_def),
 
 1943     &
TX_NAME(ff_tx_fft2048_ns_def),
 
 1944     &
TX_NAME(ff_tx_fft4096_ns_def),
 
 1945     &
TX_NAME(ff_tx_fft8192_ns_def),
 
 1946     &
TX_NAME(ff_tx_fft16384_ns_def),
 
 1947     &
TX_NAME(ff_tx_fft32768_ns_def),
 
 1948     &
TX_NAME(ff_tx_fft65536_ns_def),
 
 1949     &
TX_NAME(ff_tx_fft131072_ns_def),
 
 1966     &
TX_NAME(ff_tx_fft_inplace_def),
 
 1967     &
TX_NAME(ff_tx_fft_inplace_small_def),
 
 1969     &
TX_NAME(ff_tx_fft_pfa_ns_def),
 
 1970     &
TX_NAME(ff_tx_fft_naive_def),
 
 1971     &
TX_NAME(ff_tx_fft_naive_small_def),
 
 1974     &
TX_NAME(ff_tx_mdct_pfa_3xM_fwd_def),
 
 1975     &
TX_NAME(ff_tx_mdct_pfa_5xM_fwd_def),
 
 1976     &
TX_NAME(ff_tx_mdct_pfa_7xM_fwd_def),
 
 1977     &
TX_NAME(ff_tx_mdct_pfa_9xM_fwd_def),
 
 1978     &
TX_NAME(ff_tx_mdct_pfa_15xM_fwd_def),
 
 1979     &
TX_NAME(ff_tx_mdct_pfa_3xM_inv_def),
 
 1980     &
TX_NAME(ff_tx_mdct_pfa_5xM_inv_def),
 
 1981     &
TX_NAME(ff_tx_mdct_pfa_7xM_inv_def),
 
 1982     &
TX_NAME(ff_tx_mdct_pfa_9xM_inv_def),
 
 1983     &
TX_NAME(ff_tx_mdct_pfa_15xM_inv_def),
 
 1984     &
TX_NAME(ff_tx_mdct_naive_fwd_def),
 
 1985     &
TX_NAME(ff_tx_mdct_naive_inv_def),
 
 1986     &
TX_NAME(ff_tx_mdct_inv_full_def),
 
  
int(* func)(AVBPrint *dst, const char *in, const char *arg)
 
static void TX_NAME() ff_tx_fft_sr_combine(TXComplex *z, const TXSample *cos, int len)
 
static av_cold int TX_NAME() ff_tx_dct_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
 
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
 
static void TX_NAME() ff_tx_fft(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
#define TX_MAX_DECOMPOSITIONS
 
static void TX_NAME() ff_tx_fft_pfa(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
 
static void TX_NAME() ff_tx_fft16_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
#define DECL_RDFT(name, inv)
 
int ff_tx_gen_inplace_map(AVTXContext *s, int len)
 
static av_always_inline void fft15(TXComplex *out, TXComplex *in, ptrdiff_t stride)
 
#define FF_TX_CPU_FLAGS_ALL
 
int ff_tx_gen_compound_mapping(AVTXContext *s, FFTXCodeletOptions *opts, int inv, int n, int m)
 
static void TX_NAME() ff_tx_fft_naive(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
#define DECL_FFT5(NAME, D0, D1, D2, D3, D4)
 
static void TX_NAME() ff_tx_mdct_naive_fwd(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
static av_cold int TX_NAME() ff_tx_rdft_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
#define DECL_SR_CODELET_DEF(n)
 
static SR_POW2_TABLES void(*const sr_tabs_init_funcs[])(void)
 
static const FFTXCodelet TX_NAME(ff_tx_fft_def)
 
static void sum_d(const int *input, int *output, int len)
 
static AVOnce sr_tabs_init_once[]
 
static double val(void *priv, double ch)
 
static av_always_inline float scale(float x, float s)
 
#define TABLE_DEF(name, size)
 
static int16_t mult(Float11 *f1, Float11 *f2)
 
static int ff_thread_once(char *control, void(*routine)(void))
 
#define FF_ARRAY_ELEMS(a)
 
static void c2r(float *buffer, int size)
 
static av_cold int TX_NAME() ff_tx_fft_factor_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
static void TX_NAME() ff_tx_mdct_fwd(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
static av_cold int TX_NAME() ff_tx_mdct_naive_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
#define FF_TX_FORWARD_ONLY
 
@ AV_TX_FULL_IMDCT
Performs a full inverse MDCT rather than leaving out samples that can be derived through symmetry.
 
static __device__ float fabs(float a)
 
static av_cold int TX_NAME() ff_tx_mdct_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
@ AV_TX_INPLACE
Allows for in-place transformations, where input == output.
 
int ff_tx_gen_ptwo_revtab(AVTXContext *s, FFTXCodeletOptions *opts)
 
static void r2c(float *buffer, int size)
 
#define FF_TX_OUT_OF_PLACE
 
@ AV_TX_UNALIGNED
Relaxes alignment requirement for the in and out arrays of av_tx_fn().
 
static void TX_NAME() ff_tx_dctIII(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
#define DECL_COMP_MDCT(N)
 
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
 
static const uint8_t tab[16]
 
static av_cold int TX_NAME() ff_tx_fft_pfa_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
void ff_tx_clear_ctx(AVTXContext *s)
 
static void TX_NAME() ff_tx_fft2_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
static av_cold int TX_NAME() ff_tx_fft_sr_codelet_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
int ff_tx_gen_default_map(AVTXContext *s, FFTXCodeletOptions *opts)
 
static av_cold void TX_TAB() ff_tx_init_tab_53(void)
 
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
 
static void TX_NAME() ff_tx_fft8_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
static av_always_inline void fft9(TXComplex *out, TXComplex *in, ptrdiff_t stride)
 
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
 
#define TX_EMBED_INPUT_PFA_MAP(map, tot_len, d1, d2)
 
static void TX_NAME() ff_tx_fft_inplace(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
static av_cold int TX_NAME() ff_tx_fft_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
static void TX_NAME() ff_tx_mdct_inv(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
#define i(width, name, range_min, range_max)
 
#define av_malloc_array(a, b)
 
static AVOnce nptwo_tabs_init_once[]
 
static av_cold int TX_NAME() ff_tx_fft_init_naive_small(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
#define DECL_SR_CODELET(n, n2, n4)
 
#define DECL_COMP_IMDCT(N)
 
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
 
static av_always_inline void fft3(TXComplex *out, TXComplex *in, ptrdiff_t stride)
 
static const FFTabInitData nptwo_tabs_init_data[]
 
av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
#define FFSWAP(type, a, b)
 
static av_cold void TX_TAB() ff_tx_init_tab_7(void)
 
#define FF_TX_INVERSE_ONLY
 
static void TX_NAME() ff_tx_fft_naive_small(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
static av_cold void TX_TAB() ff_tx_init_tab_9(void)
 
av_cold void TX_TAB() ff_tx_init_tabs(int len)
 
static void TX_NAME() ff_tx_mdct_naive_inv(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
static void TX_NAME() ff_tx_dctII(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
#define BUTTERFLIES(a0, a1, a2, a3)
 
static void TX_NAME() ff_tx_fft_pfa_ns(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
 
static const int factor[16]
 
static av_cold int TX_NAME() ff_tx_fft_inplace_small_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
const VDPAUPixFmtMap * map
 
static const int16_t alpha[]
 
static av_always_inline void fft7(TXComplex *out, TXComplex *in, ptrdiff_t stride)
 
#define flags(name, subs,...)
 
int TX_TAB() ff_tx_mdct_gen_exp(AVTXContext *s, int *pre_tab)
 
int ff_tx_gen_pfa_input_map(AVTXContext *s, FFTXCodeletOptions *opts, int d1, int d2)
 
static av_cold int TX_NAME() ff_tx_mdct_pfa_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
static void TX_NAME() ff_tx_fft4_ns(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
 
static av_cold int TX_NAME() ff_tx_mdct_inv_full_init(AVTXContext *s, const FFTXCodelet *cd, uint64_t flags, FFTXCodeletOptions *opts, int len, int inv, const void *scale)
 
int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type, int len, int inv)
 
#define CMUL(dre, dim, are, aim, bre, bim)
 
static void TX_NAME() ff_tx_mdct_inv_full(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)