Go to the documentation of this file.
   46 #define FRAME_SIZE_SHIFT 2 
   47 #define FRAME_SIZE (120<<FRAME_SIZE_SHIFT) 
   48 #define WINDOW_SIZE (2*FRAME_SIZE) 
   49 #define FREQ_SIZE (FRAME_SIZE + 1) 
   51 #define PITCH_MIN_PERIOD 60 
   52 #define PITCH_MAX_PERIOD 768 
   53 #define PITCH_FRAME_SIZE 960 
   54 #define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE) 
   56 #define SQUARE(x) ((x)*(x)) 
   61 #define NB_DELTA_CEPS 6 
   63 #define NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2) 
   65 #define WEIGHTS_SCALE (1.f/256) 
   67 #define MAX_NEURONS 128 
   69 #define ACTIVATION_TANH    0 
   70 #define ACTIVATION_SIGMOID 1 
   71 #define ACTIVATION_RELU    2 
  151 #define F_ACTIVATION_TANH       0 
  152 #define F_ACTIVATION_SIGMOID    1 
  153 #define F_ACTIVATION_RELU       2 
  157 #define FREE_MAYBE(ptr) do { if (ptr) free(ptr); } while (0) 
  158 #define FREE_DENSE(name) do { \ 
  160         av_free((void *) model->name->input_weights); \ 
  161         av_free((void *) model->name->bias); \ 
  162         av_free((void *) model->name); \ 
  165 #define FREE_GRU(name) do { \ 
  167         av_free((void *) model->name->input_weights); \ 
  168         av_free((void *) model->name->recurrent_weights); \ 
  169         av_free((void *) model->name->bias); \ 
  170         av_free((void *) model->name); \ 
  196     if (fscanf(
f, 
"rnnoise-nu model file version %d\n", &
in) != 1 || 
in != 1)
 
  203 #define ALLOC_LAYER(type, name) \ 
  204     name = av_calloc(1, sizeof(type)); \ 
  206         rnnoise_model_free(ret); \ 
  218 #define INPUT_VAL(name) do { \ 
  219     if (fscanf(f, "%d", &in) != 1 || in < 0 || in > 128) { \ 
  220         rnnoise_model_free(ret); \ 
  226 #define INPUT_ACTIVATION(name) do { \ 
  228     INPUT_VAL(activation); \ 
  229     switch (activation) { \ 
  230     case F_ACTIVATION_SIGMOID: \ 
  231         name = ACTIVATION_SIGMOID; \ 
  233     case F_ACTIVATION_RELU: \ 
  234         name = ACTIVATION_RELU; \ 
  237         name = ACTIVATION_TANH; \ 
  241 #define INPUT_ARRAY(name, len) do { \ 
  242     float *values = av_calloc((len), sizeof(float)); \ 
  244         rnnoise_model_free(ret); \ 
  248     for (int i = 0; i < (len); i++) { \ 
  249         if (fscanf(f, "%d", &in) != 1) { \ 
  250             rnnoise_model_free(ret); \ 
  257 #define INPUT_ARRAY3(name, len0, len1, len2) do { \ 
  258     float *values = av_calloc(FFALIGN((len0), 4) * FFALIGN((len1), 4) * (len2), sizeof(float)); \ 
  260         rnnoise_model_free(ret); \ 
  264     for (int k = 0; k < (len0); k++) { \ 
  265         for (int i = 0; i < (len2); i++) { \ 
  266             for (int j = 0; j < (len1); j++) { \ 
  267                 if (fscanf(f, "%d", &in) != 1) { \ 
  268                     rnnoise_model_free(ret); \ 
  271                 values[j * (len2) * FFALIGN((len0), 4) + i * FFALIGN((len0), 4) + k] = in; \ 
  277 #define INPUT_DENSE(name) do { \ 
  278     INPUT_VAL(name->nb_inputs); \ 
  279     INPUT_VAL(name->nb_neurons); \ 
  280     ret->name ## _size = name->nb_neurons; \ 
  281     INPUT_ACTIVATION(name->activation); \ 
  282     INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons); \ 
  283     INPUT_ARRAY(name->bias, name->nb_neurons); \ 
  286 #define INPUT_GRU(name) do { \ 
  287     INPUT_VAL(name->nb_inputs); \ 
  288     INPUT_VAL(name->nb_neurons); \ 
  289     ret->name ## _size = name->nb_neurons; \ 
  290     INPUT_ACTIVATION(name->activation); \ 
  291     INPUT_ARRAY3(name->input_weights, name->nb_inputs, name->nb_neurons, 3); \ 
  292     INPUT_ARRAY3(name->recurrent_weights, name->nb_neurons, name->nb_neurons, 3); \ 
  293     INPUT_ARRAY(name->bias, name->nb_neurons * 3); \ 
  348     s->channels = 
inlink->channels;
 
  354     for (
int i = 0; 
i < 
s->channels; 
i++) {
 
  378 static void biquad(
float *y, 
float mem[2], 
const float *x,
 
  379                    const float *
b, 
const float *
a, 
int N)
 
  381     for (
int i = 0; 
i < 
N; 
i++) {
 
  386         mem[0] = mem[1] + (
b[0]*
xi - 
a[0]*yi);
 
  387         mem[1] = (
b[1]*
xi - 
a[1]*yi);
 
  392 #define RNN_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) 
  393 #define RNN_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst)))) 
  394 #define RNN_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) 
  406     st->
tx_fn(st->
tx, y, x, 
sizeof(
float));
 
  424     st->
txi_fn(st->
txi, y, x, 
sizeof(
float));
 
  432   0,  1,  2,  3,  4,   5, 6,  7,  8,  10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
 
  443         for (
int j = 0; j < band_size; j++) {
 
  444             float tmp, frac = (float)j / band_size;
 
  448             sum[
i]     += (1.f - frac) * 
tmp;
 
  449             sum[
i + 1] +=        frac  * 
tmp;
 
  468         for (
int j = 0; j < band_size; j++) {
 
  469             float tmp, frac = (float)j / band_size;
 
  473             sum[
i]     += (1 - frac) * 
tmp;
 
  474             sum[
i + 1] +=      frac  * 
tmp;
 
  508 static inline void xcorr_kernel(
const float *x, 
const float *y, 
float sum[4], 
int len)
 
  510     float y_0, y_1, y_2, y_3 = 0;
 
  517     for (j = 0; j < 
len - 3; j += 4) {
 
  578                                     const float *y, 
int N)
 
  582     for (
int i = 0; 
i < 
N; 
i++)
 
  589                              float *xcorr, 
int len, 
int max_pitch)
 
  593     for (
i = 0; 
i < max_pitch - 3; 
i += 4) {
 
  594         float sum[4] = { 0, 0, 0, 0};
 
  599         xcorr[
i + 1] = sum[1];
 
  600         xcorr[
i + 2] = sum[2];
 
  601         xcorr[
i + 3] = sum[3];
 
  604     for (; 
i < max_pitch; 
i++) {
 
  624         for (
int i = 0; 
i < n; 
i++)
 
  626         for (
int i = 0; 
i < overlap; 
i++) {
 
  636     for (
int k = 0; k <= lag; k++) {
 
  639         for (
int i = k + fastN; 
i < n; 
i++)
 
  640             d += xptr[
i] * xptr[
i-k];
 
  655         for (
int i = 0; 
i < p; 
i++) {
 
  658             for (
int j = 0; j < 
i; j++)
 
  659                 rr += (lpc[j] * ac[
i - j]);
 
  664             for (
int j = 0; j < (
i + 1) >> 1; j++) {
 
  668                 lpc[j]     = tmp1 + (
r*tmp2);
 
  669                 lpc[
i-1-j] = tmp2 + (
r*tmp1);
 
  674             if (
error < .001
f * ac[0])
 
  686     float num0, num1, num2, num3, num4;
 
  687     float mem0, mem1, mem2, mem3, mem4;
 
  700     for (
int i = 0; 
i < 
N; 
i++) {
 
  728     float lpc[4], mem[5]={0,0,0,0,0};
 
  732     for (
int i = 1; i < len >> 1; 
i++)
 
  733         x_lp[
i] = .5
f * (.5
f * (x[0][(2*
i-1)]+x[0][(2*
i+1)])+x[0][2*
i]);
 
  734     x_lp[0] = .5f * (.5f * (x[0][1])+x[0][0]);
 
  736         for (
int i = 1; i < len >> 1; 
i++)
 
  737             x_lp[
i] += (.5
f * (.5
f * (x[1][(2*
i-1)]+x[1][(2*
i+1)])+x[1][2*
i]));
 
  738         x_lp[0] += .5f * (.5f * (x[1][1])+x[1][0]);
 
  746     for (
int i = 1; 
i <= 4; 
i++) {
 
  748         ac[
i] -= ac[
i]*(.008f*
i)*(.008
f*
i);
 
  752     for (
int i = 0; 
i < 4; 
i++) {
 
  754         lpc[
i] = (lpc[
i] * 
tmp);
 
  757     lpc2[0] = lpc[0] + .8f;
 
  758     lpc2[1] = lpc[1] + (
c1 * lpc[0]);
 
  759     lpc2[2] = lpc[2] + (
c1 * lpc[1]);
 
  760     lpc2[3] = lpc[3] + (
c1 * lpc[2]);
 
  761     lpc2[4] = (
c1 * lpc[3]);
 
  765 static inline void dual_inner_prod(
const float *x, 
const float *y01, 
const float *y02,
 
  766                                    int N, 
float *xy1, 
float *xy2)
 
  768     float xy01 = 0, xy02 = 0;
 
  770     for (
int i = 0; 
i < 
N; 
i++) {
 
  771         xy01 += (x[
i] * y01[
i]);
 
  772         xy02 += (x[
i] * y02[
i]);
 
  781     return xy / sqrtf(1.
f + xx * yy);
 
  784 static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
 
  786                              int *T0_, 
int prev_period, 
float prev_gain)
 
  793     float best_xy, best_yy;
 
  798     minperiod0 = minperiod;
 
  812     for (
i = 1; 
i <= maxperiod; 
i++) {
 
  813         yy = yy+(x[-
i] * x[-
i])-(x[
N-
i] * x[
N-
i]);
 
  814         yy_lookup[
i] = 
FFMAX(0, yy);
 
  821     for (k = 2; k <= 15; k++) {
 
  841         xy = .5f * (xy + xy2);
 
  842         yy = .5f * (yy_lookup[T1] + yy_lookup[T1b]);
 
  844         if (
FFABS(T1-prev_period)<=1)
 
  846         else if (
FFABS(T1-prev_period)<=2 && 5 * k * k < T0)
 
  847             cont = prev_gain * .5f;
 
  850         thresh = 
FFMAX(.3
f, (.7
f * g0) - cont);
 
  854             thresh = 
FFMAX(.4
f, (.85
f * g0) - cont);
 
  855         else if (T1<2*minperiod)
 
  856             thresh = 
FFMAX(.5
f, (.9
f * g0) - cont);
 
  865     best_xy = 
FFMAX(0, best_xy);
 
  866     if (best_yy <= best_xy)
 
  869         pg = best_xy/(best_yy + 1);
 
  871     for (k = 0; k < 3; k++)
 
  873     if ((xcorr[2]-xcorr[0]) > .7f * (xcorr[1]-xcorr[0]))
 
  875     else if ((xcorr[0]-xcorr[2]) > (.7f * (xcorr[1] - xcorr[2])))
 
  889                             int max_pitch, 
int *best_pitch)
 
  902     for (
int j = 0; j < 
len; j++)
 
  905     for (
int i = 0; 
i < max_pitch; 
i++) {
 
  914             num = xcorr16 * xcorr16;
 
  915             if ((num * best_den[1]) > (best_num[1] * Syy)) {
 
  916                 if ((num * best_den[0]) > (best_num[0] * Syy)) {
 
  917                     best_num[1] = best_num[0];
 
  918                     best_den[1] = best_den[0];
 
  919                     best_pitch[1] = best_pitch[0];
 
  936                          int len, 
int max_pitch, 
int *pitch)
 
  939     int best_pitch[2]={0,0};
 
  949     for (
int j = 0; j < len >> 2; j++)
 
  950         x_lp4[j] = x_lp[2*j];
 
  951     for (
int j = 0; j < lag >> 2; j++)
 
  961     for (
int i = 0; i < max_pitch >> 1; 
i++) {
 
  964         if (
FFABS(
i-2*best_pitch[0])>2 && 
FFABS(
i-2*best_pitch[1])>2)
 
  967         xcorr[
i] = 
FFMAX(-1, sum);
 
  973     if (best_pitch[0] > 0 && best_pitch[0] < (max_pitch >> 1) - 1) {
 
  976         a = xcorr[best_pitch[0] - 1];
 
  977         b = xcorr[best_pitch[0]];
 
  978         c = xcorr[best_pitch[0] + 1];
 
  979         if (
c - 
a > .7
f * (
b - 
a))
 
  981         else if (
a - 
c > .7
f * (
b-
c))
 
  989     *pitch = 2 * best_pitch[0] - 
offset;
 
  997         for (
int j = 0; j < 
NB_BANDS; j++) {
 
 1000         out[
i] = sum * sqrtf(2.
f / 22);
 
 1005                                   float *Ex, 
float *Ep, 
float *Exp, 
float *features, 
const float *
in)
 
 1008     float *ceps_0, *ceps_1, *ceps_2;
 
 1009     float spec_variability = 0;
 
 1017     float follow, logMax;
 
 1042         Exp[
i] = Exp[
i] / sqrtf(.001
f+Ex[
i]*Ep[
i]);
 
 1058         logMax = 
FFMAX(logMax, Ly[
i]);
 
 1059         follow = 
FFMAX(follow-1.5, Ly[
i]);
 
 1069     dct(
s, features, Ly);
 
 1077         ceps_0[
i] = features[
i];
 
 1081         features[
i] = ceps_0[
i] + ceps_1[
i] + ceps_2[
i];
 
 1090         float mindist = 1e15f;
 
 1091         for (
int j = 0; j < 
CEPS_MEM; j++) {
 
 1093             for (
int k = 0; k < 
NB_BANDS; k++) {
 
 1101                 mindist = 
FFMIN(mindist, dist);
 
 1104         spec_variability += mindist;
 
 1119         for (
int j = 0; j < band_size; j++) {
 
 1120             float frac = (float)j / band_size;
 
 1128                          const float *Exp, 
const float *
g)
 
 1137         if (Exp[
i]>
g[
i]) 
r[
i] = 1;
 
 1139         r[
i]  = sqrtf(av_clipf(
r[
i], 0, 1));
 
 1140         r[
i] *= sqrtf(Ex[
i]/(1e-8+Ep[
i]));
 
 1144         X[
i].re += rf[
i]*
P[
i].re;
 
 1145         X[
i].im += rf[
i]*
P[
i].im;
 
 1149         norm[
i] = sqrtf(Ex[
i] / (1e-8+newE[
i]));
 
 1153         X[
i].re *= normf[
i];
 
 1154         X[
i].im *= normf[
i];
 
 1159     0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f,
 
 1160     0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f,
 
 1161     0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,
 
 1162     0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f,
 
 1163     0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f,
 
 1164     0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f,
 
 1165     0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f,
 
 1166     0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f,
 
 1167     0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f,
 
 1168     0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f,
 
 1169     0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f,
 
 1170     0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f,
 
 1171     0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f,
 
 1172     0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f,
 
 1173     0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f,
 
 1174     0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f,
 
 1175     0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f,
 
 1176     0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f,
 
 1177     0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f,
 
 1178     0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f,
 
 1179     0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f,
 
 1180     0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f,
 
 1181     0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f,
 
 1182     0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f,
 
 1183     0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f,
 
 1184     0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f,
 
 1185     0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f,
 
 1186     0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f,
 
 1187     0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f,
 
 1188     0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f,
 
 1189     0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f,
 
 1190     0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f,
 
 1191     0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f,
 
 1192     0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f,
 
 1193     0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f,
 
 1194     0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f,
 
 1195     0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
 
 1196     0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
 
 1197     1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
 
 1198     1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
 
 1222     i = (
int)floor(.5
f+25*x);
 
 1226     y = y + x*dy*(1 - y*x);
 
 1239     for (
int i = 0; 
i < 
N; 
i++) {
 
 1241         float sum = layer->
bias[
i];
 
 1243         for (
int j = 0; j < 
M; j++)
 
 1250         for (
int i = 0; 
i < 
N; 
i++)
 
 1253         for (
int i = 0; 
i < 
N; 
i++)
 
 1256         for (
int i = 0; 
i < 
N; 
i++)
 
 1272     const int stride = 3 * AN, istride = 3 * AM;
 
 1274     for (
int i = 0; 
i < 
N; 
i++) {
 
 1276         float sum = gru->
bias[
i];
 
 1283     for (
int i = 0; 
i < 
N; 
i++) {
 
 1285         float sum = gru->
bias[
N + 
i];
 
 1292     for (
int i = 0; 
i < 
N; 
i++) {
 
 1294         float sum = gru->
bias[2 * 
N + 
i];
 
 1297         for (
int j = 0; j < 
N; j++)
 
 1314 #define INPUT_SIZE 42 
 1327         noise_input[
i] = dense_out[
i];
 
 1357     static const float a_hp[2] = {-1.99599, 0.99600};
 
 1358     static const float b_hp[2] = {-2, 1};
 
 1397     const int start = (
out->channels * jobnr) / nb_jobs;
 
 1398     const int end = (
out->channels * (jobnr+1)) / nb_jobs;
 
 1400     for (
int ch = start; ch < 
end; ch++) {
 
 1402                         (
float *)
out->extended_data[ch],
 
 1403                         (
const float *)
in->extended_data[ch]);
 
 1479         for (
int j = 0; j < 
NB_BANDS; j++) {
 
 1498         for (
int ch = 0; ch < 
s->channels; ch++) {
 
 1500             av_freep(&
s->st[ch].rnn.noise_gru_state);
 
 1501             av_freep(&
s->st[ch].rnn.denoise_gru_state);
 
 1526 #define OFFSET(x) offsetof(AudioRNNContext, x) 
 1527 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM 
 1539     .description   = 
NULL_IF_CONFIG_SMALL(
"Reduce noise from speech using Recurrent Neural Networks."),
 
 1542     .priv_class    = &arnndn_class,
 
  
static void error(const char *err)
float dct_table[NB_BANDS *NB_BANDS]
static void compute_dense(const DenseLayer *layer, float *output, const float *input)
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
@ AV_SAMPLE_FMT_FLTP
float, planar
A list of supported channel layouts.
static void pitch_downsample(float *x[], float *x_lp, int len, int C)
float synthesis_mem[FRAME_SIZE]
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
static int activate(AVFilterContext *ctx)
static void dual_inner_prod(const float *x, const float *y01, const float *y02, int N, float *xy1, float *xy2)
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
static enum AVSampleFormat sample_fmts[]
enum MovChannelLayoutTag * layouts
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
static av_cold int end(AVCodecContext *avctx)
static av_cold void uninit(AVFilterContext *ctx)
static void inverse_transform(DenoiseState *st, float *out, const AVComplexFloat *in)
This structure describes decoded (raw) audio or video data.
static const AVOption arnndn_options[]
static void frame_synthesis(AudioRNNContext *s, DenoiseState *st, float *out, const AVComplexFloat *y)
const char * name
Filter name.
static const float tansig_table[201]
AVFormatInternal * internal
An opaque field for libavformat internal usage.
A link between two filters.
static void find_best_pitch(float *xcorr, float *y, int len, int max_pitch, int *best_pitch)
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
int channels
Number of channels.
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration Currently power of two lengths from 2 to ...
#define RNN_CLEAR(dst, n)
static void compute_band_energy(float *bandE, const AVComplexFloat *X)
static void compute_rnn(AudioRNNContext *s, RNNState *rnn, float *gains, float *vad, const float *input)
float * denoise_gru_state
static int rnnoise_channels(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
static SDL_Window * window
static void rnnoise_model_free(RNNModel *model)
float cepstral_mem[CEPS_MEM][NB_BANDS]
static RNNModel * rnnoise_model_from_file(FILE *f)
A filter pad used for either input or output.
static void compute_band_corr(float *bandE, const AVComplexFloat *X, const AVComplexFloat *P)
s EdgeDetect Foobar g libavfilter vf_edgedetect c libavfilter vf_foobar c edit libavfilter and add an entry for foobar following the pattern of the other filters edit libavfilter allfilters and add an entry for foobar following the pattern of the other filters configure make j< whatever > ffmpeg ffmpeg i you should get a foobar png with Lena edge detected That s your new playground is ready Some little details about what s going which in turn will define variables for the build system and the C
FILE * av_fopen_utf8(const char *path, const char *mode)
Open a file using a UTF-8 filename.
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
static void frame_analysis(AudioRNNContext *s, DenoiseState *st, AVComplexFloat *X, float *Ex, const float *in)
static const AVFilterPad inputs[]
static float celt_inner_prod(const float *x, const float *y, int N)
#define av_assert0(cond)
assert() equivalent, that is always enabled.
@ AV_TX_FLOAT_FFT
Standard complex to complex FFT with sample data type AVComplexFloat.
#define xi(width, name, var, range_min, range_max, subs,...)
static int config_input(AVFilterLink *inlink)
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
static float rnnoise_channel(AudioRNNContext *s, DenoiseState *st, float *out, const float *in)
Describe the class of an AVClass context structure.
int ff_inlink_consume_samples(AVFilterLink *link, unsigned min, unsigned max, AVFrame **rframe)
Take samples from the link's FIFO and update the link's stats.
static float sigmoid_approx(float x)
const DenseLayer * vad_output
const float * recurrent_weights
static const AVFilterPad outputs[]
const DenseLayer * input_dense
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
const float * input_weights
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N)
float pitch_buf[PITCH_BUF_SIZE]
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
static int celt_autocorr(const float *x, float *ac, const float *window, int overlap, int lag, int n)
static void celt_lpc(float *lpc, const float *ac, int p)
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
#define RNN_MOVE(dst, src, n)
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
FF_FILTER_FORWARD_WANTED(outlink, inlink)
const GRULayer * denoise_gru
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets ctx to NULL, does nothing when ctx == NULL.
#define ACTIVATION_SIGMOID
#define DECLARE_ALIGNED(n, t, v)
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
#define i(width, name, range_min, range_max)
#define RNN_COPY(dst, src, n)
int ff_filter_get_nb_threads(AVFilterContext *ctx)
Get number of threads for current filter instance.
AVSampleFormat
Audio sample formats.
Used for passing data between threads.
static void interp_band_gain(float *g, const float *bandE)
static void dct(AudioRNNContext *s, float *out, const float *in)
const char * name
Pad name.
static int compute_frame_features(AudioRNNContext *s, DenoiseState *st, AVComplexFloat *X, AVComplexFloat *P, float *Ex, float *Ep, float *Exp, float *features, const float *in)
const float * input_weights
float window[WINDOW_SIZE]
static float remove_doubling(float *x, int maxperiod, int minperiod, int N, int *T0_, int prev_period, float prev_gain)
void * av_calloc(size_t nmemb, size_t size)
Non-inlined equivalent of av_mallocz_array().
static float compute_pitch_gain(float xy, float xx, float yy)
AVFILTER_DEFINE_CLASS(arnndn)
static void xcorr_kernel(const float *x, const float *y, float sum[4], int len)
static void pitch_search(const float *x_lp, float *y, int len, int max_pitch, int *pitch)
static void pitch_filter(AVComplexFloat *X, const AVComplexFloat *P, const float *Ex, const float *Ep, const float *Exp, const float *g)
static void celt_pitch_xcorr(const float *x, const float *y, float *xcorr, int len, int max_pitch)
static void celt_fir5(const float *x, const float *num, float *y, int N, float *mem)
static const int second_check[16]
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
float pitch_enh_buf[PITCH_BUF_SIZE]
static int shift(int a, int b)
#define AVFILTER_FLAG_SLICE_THREADS
The filter supports multithreading by splitting frames into multiple parts and processing them concur...
static float tansig_approx(float x)
static int query_formats(AVFilterContext *ctx)
static void forward_transform(DenoiseState *st, AVComplexFloat *out, const float *in)
FF_FILTER_FORWARD_STATUS(inlink, outlink)
static const int16_t alpha[]
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
#define flags(name, subs,...)
const DenseLayer * denoise_output
#define LOCAL_ALIGNED_32(t, v,...)
#define ALLOC_LAYER(type, name)
static void compute_gru(AudioRNNContext *s, const GRULayer *gru, float *state, const float *input)
static const uint8_t eband5ms[]
#define INPUT_DENSE(name)
const GRULayer * noise_gru
static av_cold int init(AVFilterContext *ctx)
float analysis_mem[FRAME_SIZE]