FFmpeg
af_atempo.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012 Pavel Koshevoy <pkoshevoy at gmail dot com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * tempo scaling audio filter -- an implementation of WSOLA algorithm
24  *
25  * Based on MIT licensed yaeAudioTempoFilter.h and yaeAudioFragment.h
26  * from Apprentice Video player by Pavel Koshevoy.
27  * https://sourceforge.net/projects/apprenticevideo/
28  *
29  * An explanation of SOLA algorithm is available at
30  * http://www.surina.net/article/time-and-pitch-scaling.html
31  *
32  * WSOLA is very similar to SOLA, only one major difference exists between
33  * these algorithms. SOLA shifts audio fragments along the output stream,
34  * where as WSOLA shifts audio fragments along the input stream.
35  *
36  * The advantage of WSOLA algorithm is that the overlap region size is
37  * always the same, therefore the blending function is constant and
38  * can be precomputed.
39  */
40 
41 #include <float.h>
42 #include "libavcodec/avfft.h"
43 #include "libavutil/avassert.h"
44 #include "libavutil/avstring.h"
46 #include "libavutil/eval.h"
47 #include "libavutil/opt.h"
48 #include "libavutil/samplefmt.h"
49 #include "avfilter.h"
50 #include "audio.h"
51 #include "internal.h"
52 
53 /**
54  * A fragment of audio waveform
55  */
56 typedef struct AudioFragment {
57  // index of the first sample of this fragment in the overall waveform;
58  // 0: input sample position
59  // 1: output sample position
60  int64_t position[2];
61 
62  // original packed multi-channel samples:
64 
65  // number of samples in this fragment:
66  int nsamples;
67 
68  // rDFT transform of the down-mixed mono fragment, used for
69  // fast waveform alignment via correlation in frequency domain:
72 
73 /**
74  * Filter state machine states
75  */
76 typedef enum {
82 } FilterState;
83 
84 /**
85  * Filter state machine
86  */
87 typedef struct ATempoContext {
88  const AVClass *class;
89 
90  // ring-buffer of input samples, necessary because some times
91  // input fragment position may be adjusted backwards:
93 
94  // ring-buffer maximum capacity, expressed in sample rate time base:
95  int ring;
96 
97  // ring-buffer house keeping:
98  int size;
99  int head;
100  int tail;
101 
102  // 0: input sample position corresponding to the ring buffer tail
103  // 1: output sample position
104  int64_t position[2];
105 
106  // first input timestamp, all other timestamps are offset by this one
107  int64_t start_pts;
108 
109  // sample format:
111 
112  // number of channels:
113  int channels;
114 
115  // row of bytes to skip from one sample to next, across multple channels;
116  // stride = (number-of-channels * bits-per-sample-per-channel) / 8
117  int stride;
118 
119  // fragment window size, power-of-two integer:
120  int window;
121 
122  // Hann window coefficients, for feathering
123  // (blending) the overlapping fragment region:
124  float *hann;
125 
126  // tempo scaling factor:
127  double tempo;
128 
129  // a snapshot of previous fragment input and output position values
130  // captured when the tempo scale factor was set most recently:
131  int64_t origin[2];
132 
133  // current/previous fragment ring-buffer:
135 
136  // current fragment index:
137  uint64_t nfrag;
138 
139  // current state:
141 
142  // for fast correlation calculation in frequency domain:
146 
147  // for managing AVFilterPad.request_frame and AVFilterPad.filter_frame
151  uint64_t nsamples_in;
152  uint64_t nsamples_out;
153 } ATempoContext;
154 
155 #define YAE_ATEMPO_MIN 0.5
156 #define YAE_ATEMPO_MAX 100.0
157 
158 #define OFFSET(x) offsetof(ATempoContext, x)
159 
160 static const AVOption atempo_options[] = {
161  { "tempo", "set tempo scale factor",
162  OFFSET(tempo), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 },
166  { NULL }
167 };
168 
169 AVFILTER_DEFINE_CLASS(atempo);
170 
172 {
173  return &atempo->frag[atempo->nfrag % 2];
174 }
175 
177 {
178  return &atempo->frag[(atempo->nfrag + 1) % 2];
179 }
180 
181 /**
182  * Reset filter to initial state, do not deallocate existing local buffers.
183  */
184 static void yae_clear(ATempoContext *atempo)
185 {
186  atempo->size = 0;
187  atempo->head = 0;
188  atempo->tail = 0;
189 
190  atempo->nfrag = 0;
191  atempo->state = YAE_LOAD_FRAGMENT;
192  atempo->start_pts = AV_NOPTS_VALUE;
193 
194  atempo->position[0] = 0;
195  atempo->position[1] = 0;
196 
197  atempo->origin[0] = 0;
198  atempo->origin[1] = 0;
199 
200  atempo->frag[0].position[0] = 0;
201  atempo->frag[0].position[1] = 0;
202  atempo->frag[0].nsamples = 0;
203 
204  atempo->frag[1].position[0] = 0;
205  atempo->frag[1].position[1] = 0;
206  atempo->frag[1].nsamples = 0;
207 
208  // shift left position of 1st fragment by half a window
209  // so that no re-normalization would be required for
210  // the left half of the 1st fragment:
211  atempo->frag[0].position[0] = -(int64_t)(atempo->window / 2);
212  atempo->frag[0].position[1] = -(int64_t)(atempo->window / 2);
213 
214  av_frame_free(&atempo->dst_buffer);
215  atempo->dst = NULL;
216  atempo->dst_end = NULL;
217 
218  atempo->nsamples_in = 0;
219  atempo->nsamples_out = 0;
220 }
221 
222 /**
223  * Reset filter to initial state and deallocate all buffers.
224  */
225 static void yae_release_buffers(ATempoContext *atempo)
226 {
227  yae_clear(atempo);
228 
229  av_freep(&atempo->frag[0].data);
230  av_freep(&atempo->frag[1].data);
231  av_freep(&atempo->frag[0].xdat);
232  av_freep(&atempo->frag[1].xdat);
233 
234  av_freep(&atempo->buffer);
235  av_freep(&atempo->hann);
236  av_freep(&atempo->correlation);
237 
238  av_rdft_end(atempo->real_to_complex);
239  atempo->real_to_complex = NULL;
240 
241  av_rdft_end(atempo->complex_to_real);
242  atempo->complex_to_real = NULL;
243 }
244 
245 /* av_realloc is not aligned enough; fortunately, the data does not need to
246  * be preserved */
247 #define RE_MALLOC_OR_FAIL(field, field_size) \
248  do { \
249  av_freep(&field); \
250  field = av_malloc(field_size); \
251  if (!field) { \
252  yae_release_buffers(atempo); \
253  return AVERROR(ENOMEM); \
254  } \
255  } while (0)
256 
257 /**
258  * Prepare filter for processing audio data of given format,
259  * sample rate and number of channels.
260  */
261 static int yae_reset(ATempoContext *atempo,
262  enum AVSampleFormat format,
263  int sample_rate,
264  int channels)
265 {
266  const int sample_size = av_get_bytes_per_sample(format);
267  uint32_t nlevels = 0;
268  uint32_t pot;
269  int i;
270 
271  atempo->format = format;
272  atempo->channels = channels;
273  atempo->stride = sample_size * channels;
274 
275  // pick a segment window size:
276  atempo->window = sample_rate / 24;
277 
278  // adjust window size to be a power-of-two integer:
279  nlevels = av_log2(atempo->window);
280  pot = 1 << nlevels;
281  av_assert0(pot <= atempo->window);
282 
283  if (pot < atempo->window) {
284  atempo->window = pot * 2;
285  nlevels++;
286  }
287 
288  // initialize audio fragment buffers:
289  RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window * atempo->stride);
290  RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window * atempo->stride);
291  RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window * sizeof(FFTComplex));
292  RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window * sizeof(FFTComplex));
293 
294  // initialize rDFT contexts:
295  av_rdft_end(atempo->real_to_complex);
296  atempo->real_to_complex = NULL;
297 
298  av_rdft_end(atempo->complex_to_real);
299  atempo->complex_to_real = NULL;
300 
301  atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C);
302  if (!atempo->real_to_complex) {
303  yae_release_buffers(atempo);
304  return AVERROR(ENOMEM);
305  }
306 
307  atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R);
308  if (!atempo->complex_to_real) {
309  yae_release_buffers(atempo);
310  return AVERROR(ENOMEM);
311  }
312 
313  RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window * sizeof(FFTComplex));
314 
315  atempo->ring = atempo->window * 3;
316  RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride);
317 
318  // initialize the Hann window function:
319  RE_MALLOC_OR_FAIL(atempo->hann, atempo->window * sizeof(float));
320 
321  for (i = 0; i < atempo->window; i++) {
322  double t = (double)i / (double)(atempo->window - 1);
323  double h = 0.5 * (1.0 - cos(2.0 * M_PI * t));
324  atempo->hann[i] = (float)h;
325  }
326 
327  yae_clear(atempo);
328  return 0;
329 }
330 
332 {
333  const AudioFragment *prev;
334  ATempoContext *atempo = ctx->priv;
335 
336  prev = yae_prev_frag(atempo);
337  atempo->origin[0] = prev->position[0] + atempo->window / 2;
338  atempo->origin[1] = prev->position[1] + atempo->window / 2;
339  return 0;
340 }
341 
342 /**
343  * A helper macro for initializing complex data buffer with scalar data
344  * of a given type.
345  */
346 #define yae_init_xdat(scalar_type, scalar_max) \
347  do { \
348  const uint8_t *src_end = src + \
349  frag->nsamples * atempo->channels * sizeof(scalar_type); \
350  \
351  FFTSample *xdat = frag->xdat; \
352  scalar_type tmp; \
353  \
354  if (atempo->channels == 1) { \
355  for (; src < src_end; xdat++) { \
356  tmp = *(const scalar_type *)src; \
357  src += sizeof(scalar_type); \
358  \
359  *xdat = (FFTSample)tmp; \
360  } \
361  } else { \
362  FFTSample s, max, ti, si; \
363  int i; \
364  \
365  for (; src < src_end; xdat++) { \
366  tmp = *(const scalar_type *)src; \
367  src += sizeof(scalar_type); \
368  \
369  max = (FFTSample)tmp; \
370  s = FFMIN((FFTSample)scalar_max, \
371  (FFTSample)fabsf(max)); \
372  \
373  for (i = 1; i < atempo->channels; i++) { \
374  tmp = *(const scalar_type *)src; \
375  src += sizeof(scalar_type); \
376  \
377  ti = (FFTSample)tmp; \
378  si = FFMIN((FFTSample)scalar_max, \
379  (FFTSample)fabsf(ti)); \
380  \
381  if (s < si) { \
382  s = si; \
383  max = ti; \
384  } \
385  } \
386  \
387  *xdat = max; \
388  } \
389  } \
390  } while (0)
391 
392 /**
393  * Initialize complex data buffer of a given audio fragment
394  * with down-mixed mono data of appropriate scalar type.
395  */
396 static void yae_downmix(ATempoContext *atempo, AudioFragment *frag)
397 {
398  // shortcuts:
399  const uint8_t *src = frag->data;
400 
401  // init complex data buffer used for FFT and Correlation:
402  memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window);
403 
404  if (atempo->format == AV_SAMPLE_FMT_U8) {
405  yae_init_xdat(uint8_t, 127);
406  } else if (atempo->format == AV_SAMPLE_FMT_S16) {
407  yae_init_xdat(int16_t, 32767);
408  } else if (atempo->format == AV_SAMPLE_FMT_S32) {
409  yae_init_xdat(int, 2147483647);
410  } else if (atempo->format == AV_SAMPLE_FMT_FLT) {
411  yae_init_xdat(float, 1);
412  } else if (atempo->format == AV_SAMPLE_FMT_DBL) {
413  yae_init_xdat(double, 1);
414  }
415 }
416 
417 /**
418  * Populate the internal data buffer on as-needed basis.
419  *
420  * @return
421  * 0 if requested data was already available or was successfully loaded,
422  * AVERROR(EAGAIN) if more input data is required.
423  */
424 static int yae_load_data(ATempoContext *atempo,
425  const uint8_t **src_ref,
426  const uint8_t *src_end,
427  int64_t stop_here)
428 {
429  // shortcut:
430  const uint8_t *src = *src_ref;
431  const int read_size = stop_here - atempo->position[0];
432 
433  if (stop_here <= atempo->position[0]) {
434  return 0;
435  }
436 
437  // samples are not expected to be skipped, unless tempo is greater than 2:
438  av_assert0(read_size <= atempo->ring || atempo->tempo > 2.0);
439 
440  while (atempo->position[0] < stop_here && src < src_end) {
441  int src_samples = (src_end - src) / atempo->stride;
442 
443  // load data piece-wise, in order to avoid complicating the logic:
444  int nsamples = FFMIN(read_size, src_samples);
445  int na;
446  int nb;
447 
448  nsamples = FFMIN(nsamples, atempo->ring);
449  na = FFMIN(nsamples, atempo->ring - atempo->tail);
450  nb = FFMIN(nsamples - na, atempo->ring);
451 
452  if (na) {
453  uint8_t *a = atempo->buffer + atempo->tail * atempo->stride;
454  memcpy(a, src, na * atempo->stride);
455 
456  src += na * atempo->stride;
457  atempo->position[0] += na;
458 
459  atempo->size = FFMIN(atempo->size + na, atempo->ring);
460  atempo->tail = (atempo->tail + na) % atempo->ring;
461  atempo->head =
462  atempo->size < atempo->ring ?
463  atempo->tail - atempo->size :
464  atempo->tail;
465  }
466 
467  if (nb) {
468  uint8_t *b = atempo->buffer;
469  memcpy(b, src, nb * atempo->stride);
470 
471  src += nb * atempo->stride;
472  atempo->position[0] += nb;
473 
474  atempo->size = FFMIN(atempo->size + nb, atempo->ring);
475  atempo->tail = (atempo->tail + nb) % atempo->ring;
476  atempo->head =
477  atempo->size < atempo->ring ?
478  atempo->tail - atempo->size :
479  atempo->tail;
480  }
481  }
482 
483  // pass back the updated source buffer pointer:
484  *src_ref = src;
485 
486  // sanity check:
487  av_assert0(atempo->position[0] <= stop_here);
488 
489  return atempo->position[0] == stop_here ? 0 : AVERROR(EAGAIN);
490 }
491 
492 /**
493  * Populate current audio fragment data buffer.
494  *
495  * @return
496  * 0 when the fragment is ready,
497  * AVERROR(EAGAIN) if more input data is required.
498  */
499 static int yae_load_frag(ATempoContext *atempo,
500  const uint8_t **src_ref,
501  const uint8_t *src_end)
502 {
503  // shortcuts:
504  AudioFragment *frag = yae_curr_frag(atempo);
505  uint8_t *dst;
506  int64_t missing, start, zeros;
507  uint32_t nsamples;
508  const uint8_t *a, *b;
509  int i0, i1, n0, n1, na, nb;
510 
511  int64_t stop_here = frag->position[0] + atempo->window;
512  if (src_ref && yae_load_data(atempo, src_ref, src_end, stop_here) != 0) {
513  return AVERROR(EAGAIN);
514  }
515 
516  // calculate the number of samples we don't have:
517  missing =
518  stop_here > atempo->position[0] ?
519  stop_here - atempo->position[0] : 0;
520 
521  nsamples =
522  missing < (int64_t)atempo->window ?
523  (uint32_t)(atempo->window - missing) : 0;
524 
525  // setup the output buffer:
526  frag->nsamples = nsamples;
527  dst = frag->data;
528 
529  start = atempo->position[0] - atempo->size;
530  zeros = 0;
531 
532  if (frag->position[0] < start) {
533  // what we don't have we substitute with zeros:
534  zeros = FFMIN(start - frag->position[0], (int64_t)nsamples);
535  av_assert0(zeros != nsamples);
536 
537  memset(dst, 0, zeros * atempo->stride);
538  dst += zeros * atempo->stride;
539  }
540 
541  if (zeros == nsamples) {
542  return 0;
543  }
544 
545  // get the remaining data from the ring buffer:
546  na = (atempo->head < atempo->tail ?
547  atempo->tail - atempo->head :
548  atempo->ring - atempo->head);
549 
550  nb = atempo->head < atempo->tail ? 0 : atempo->tail;
551 
552  // sanity check:
553  av_assert0(nsamples <= zeros + na + nb);
554 
555  a = atempo->buffer + atempo->head * atempo->stride;
556  b = atempo->buffer;
557 
558  i0 = frag->position[0] + zeros - start;
559  i1 = i0 < na ? 0 : i0 - na;
560 
561  n0 = i0 < na ? FFMIN(na - i0, (int)(nsamples - zeros)) : 0;
562  n1 = nsamples - zeros - n0;
563 
564  if (n0) {
565  memcpy(dst, a + i0 * atempo->stride, n0 * atempo->stride);
566  dst += n0 * atempo->stride;
567  }
568 
569  if (n1) {
570  memcpy(dst, b + i1 * atempo->stride, n1 * atempo->stride);
571  }
572 
573  return 0;
574 }
575 
576 /**
577  * Prepare for loading next audio fragment.
578  */
580 {
581  const double fragment_step = atempo->tempo * (double)(atempo->window / 2);
582 
583  const AudioFragment *prev;
584  AudioFragment *frag;
585 
586  atempo->nfrag++;
587  prev = yae_prev_frag(atempo);
588  frag = yae_curr_frag(atempo);
589 
590  frag->position[0] = prev->position[0] + (int64_t)fragment_step;
591  frag->position[1] = prev->position[1] + atempo->window / 2;
592  frag->nsamples = 0;
593 }
594 
595 /**
596  * Calculate cross-correlation via rDFT.
597  *
598  * Multiply two vectors of complex numbers (result of real_to_complex rDFT)
599  * and transform back via complex_to_real rDFT.
600  */
601 static void yae_xcorr_via_rdft(FFTSample *xcorr,
602  RDFTContext *complex_to_real,
603  const FFTComplex *xa,
604  const FFTComplex *xb,
605  const int window)
606 {
607  FFTComplex *xc = (FFTComplex *)xcorr;
608  int i;
609 
610  // NOTE: first element requires special care -- Given Y = rDFT(X),
611  // Im(Y[0]) and Im(Y[N/2]) are always zero, therefore av_rdft_calc
612  // stores Re(Y[N/2]) in place of Im(Y[0]).
613 
614  xc->re = xa->re * xb->re;
615  xc->im = xa->im * xb->im;
616  xa++;
617  xb++;
618  xc++;
619 
620  for (i = 1; i < window; i++, xa++, xb++, xc++) {
621  xc->re = (xa->re * xb->re + xa->im * xb->im);
622  xc->im = (xa->im * xb->re - xa->re * xb->im);
623  }
624 
625  // apply inverse rDFT:
626  av_rdft_calc(complex_to_real, xcorr);
627 }
628 
629 /**
630  * Calculate alignment offset for given fragment
631  * relative to the previous fragment.
632  *
633  * @return alignment offset of current fragment relative to previous.
634  */
635 static int yae_align(AudioFragment *frag,
636  const AudioFragment *prev,
637  const int window,
638  const int delta_max,
639  const int drift,
641  RDFTContext *complex_to_real)
642 {
643  int best_offset = -drift;
644  FFTSample best_metric = -FLT_MAX;
645  FFTSample *xcorr;
646 
647  int i0;
648  int i1;
649  int i;
650 
652  complex_to_real,
653  (const FFTComplex *)prev->xdat,
654  (const FFTComplex *)frag->xdat,
655  window);
656 
657  // identify search window boundaries:
658  i0 = FFMAX(window / 2 - delta_max - drift, 0);
659  i0 = FFMIN(i0, window);
660 
661  i1 = FFMIN(window / 2 + delta_max - drift, window - window / 16);
662  i1 = FFMAX(i1, 0);
663 
664  // identify cross-correlation peaks within search window:
665  xcorr = correlation + i0;
666 
667  for (i = i0; i < i1; i++, xcorr++) {
668  FFTSample metric = *xcorr;
669 
670  // normalize:
671  FFTSample drifti = (FFTSample)(drift + i);
672  metric *= drifti * (FFTSample)(i - i0) * (FFTSample)(i1 - i);
673 
674  if (metric > best_metric) {
675  best_metric = metric;
676  best_offset = i - window / 2;
677  }
678  }
679 
680  return best_offset;
681 }
682 
683 /**
684  * Adjust current fragment position for better alignment
685  * with previous fragment.
686  *
687  * @return alignment correction.
688  */
690 {
691  const AudioFragment *prev = yae_prev_frag(atempo);
692  AudioFragment *frag = yae_curr_frag(atempo);
693 
694  const double prev_output_position =
695  (double)(prev->position[1] - atempo->origin[1] + atempo->window / 2) *
696  atempo->tempo;
697 
698  const double ideal_output_position =
699  (double)(prev->position[0] - atempo->origin[0] + atempo->window / 2);
700 
701  const int drift = (int)(prev_output_position - ideal_output_position);
702 
703  const int delta_max = atempo->window / 2;
704  const int correction = yae_align(frag,
705  prev,
706  atempo->window,
707  delta_max,
708  drift,
709  atempo->correlation,
710  atempo->complex_to_real);
711 
712  if (correction) {
713  // adjust fragment position:
714  frag->position[0] -= correction;
715 
716  // clear so that the fragment can be reloaded:
717  frag->nsamples = 0;
718  }
719 
720  return correction;
721 }
722 
723 /**
724  * A helper macro for blending the overlap region of previous
725  * and current audio fragment.
726  */
727 #define yae_blend(scalar_type) \
728  do { \
729  const scalar_type *aaa = (const scalar_type *)a; \
730  const scalar_type *bbb = (const scalar_type *)b; \
731  \
732  scalar_type *out = (scalar_type *)dst; \
733  scalar_type *out_end = (scalar_type *)dst_end; \
734  int64_t i; \
735  \
736  for (i = 0; i < overlap && out < out_end; \
737  i++, atempo->position[1]++, wa++, wb++) { \
738  float w0 = *wa; \
739  float w1 = *wb; \
740  int j; \
741  \
742  for (j = 0; j < atempo->channels; \
743  j++, aaa++, bbb++, out++) { \
744  float t0 = (float)*aaa; \
745  float t1 = (float)*bbb; \
746  \
747  *out = \
748  frag->position[0] + i < 0 ? \
749  *aaa : \
750  (scalar_type)(t0 * w0 + t1 * w1); \
751  } \
752  } \
753  dst = (uint8_t *)out; \
754  } while (0)
755 
756 /**
757  * Blend the overlap region of previous and current audio fragment
758  * and output the results to the given destination buffer.
759  *
760  * @return
761  * 0 if the overlap region was completely stored in the dst buffer,
762  * AVERROR(EAGAIN) if more destination buffer space is required.
763  */
764 static int yae_overlap_add(ATempoContext *atempo,
765  uint8_t **dst_ref,
766  uint8_t *dst_end)
767 {
768  // shortcuts:
769  const AudioFragment *prev = yae_prev_frag(atempo);
770  const AudioFragment *frag = yae_curr_frag(atempo);
771 
772  const int64_t start_here = FFMAX(atempo->position[1],
773  frag->position[1]);
774 
775  const int64_t stop_here = FFMIN(prev->position[1] + prev->nsamples,
776  frag->position[1] + frag->nsamples);
777 
778  const int64_t overlap = stop_here - start_here;
779 
780  const int64_t ia = start_here - prev->position[1];
781  const int64_t ib = start_here - frag->position[1];
782 
783  const float *wa = atempo->hann + ia;
784  const float *wb = atempo->hann + ib;
785 
786  const uint8_t *a = prev->data + ia * atempo->stride;
787  const uint8_t *b = frag->data + ib * atempo->stride;
788 
789  uint8_t *dst = *dst_ref;
790 
791  av_assert0(start_here <= stop_here &&
792  frag->position[1] <= start_here &&
793  overlap <= frag->nsamples);
794 
795  if (atempo->format == AV_SAMPLE_FMT_U8) {
797  } else if (atempo->format == AV_SAMPLE_FMT_S16) {
798  yae_blend(int16_t);
799  } else if (atempo->format == AV_SAMPLE_FMT_S32) {
800  yae_blend(int);
801  } else if (atempo->format == AV_SAMPLE_FMT_FLT) {
802  yae_blend(float);
803  } else if (atempo->format == AV_SAMPLE_FMT_DBL) {
804  yae_blend(double);
805  }
806 
807  // pass-back the updated destination buffer pointer:
808  *dst_ref = dst;
809 
810  return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN);
811 }
812 
813 /**
814  * Feed as much data to the filter as it is able to consume
815  * and receive as much processed data in the destination buffer
816  * as it is able to produce or store.
817  */
818 static void
820  const uint8_t **src_ref,
821  const uint8_t *src_end,
822  uint8_t **dst_ref,
823  uint8_t *dst_end)
824 {
825  while (1) {
826  if (atempo->state == YAE_LOAD_FRAGMENT) {
827  // load additional data for the current fragment:
828  if (yae_load_frag(atempo, src_ref, src_end) != 0) {
829  break;
830  }
831 
832  // down-mix to mono:
833  yae_downmix(atempo, yae_curr_frag(atempo));
834 
835  // apply rDFT:
836  av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat);
837 
838  // must load the second fragment before alignment can start:
839  if (!atempo->nfrag) {
840  yae_advance_to_next_frag(atempo);
841  continue;
842  }
843 
844  atempo->state = YAE_ADJUST_POSITION;
845  }
846 
847  if (atempo->state == YAE_ADJUST_POSITION) {
848  // adjust position for better alignment:
849  if (yae_adjust_position(atempo)) {
850  // reload the fragment at the corrected position, so that the
851  // Hann window blending would not require normalization:
852  atempo->state = YAE_RELOAD_FRAGMENT;
853  } else {
854  atempo->state = YAE_OUTPUT_OVERLAP_ADD;
855  }
856  }
857 
858  if (atempo->state == YAE_RELOAD_FRAGMENT) {
859  // load additional data if necessary due to position adjustment:
860  if (yae_load_frag(atempo, src_ref, src_end) != 0) {
861  break;
862  }
863 
864  // down-mix to mono:
865  yae_downmix(atempo, yae_curr_frag(atempo));
866 
867  // apply rDFT:
868  av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat);
869 
870  atempo->state = YAE_OUTPUT_OVERLAP_ADD;
871  }
872 
873  if (atempo->state == YAE_OUTPUT_OVERLAP_ADD) {
874  // overlap-add and output the result:
875  if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) {
876  break;
877  }
878 
879  // advance to the next fragment, repeat:
880  yae_advance_to_next_frag(atempo);
881  atempo->state = YAE_LOAD_FRAGMENT;
882  }
883  }
884 }
885 
886 /**
887  * Flush any buffered data from the filter.
888  *
889  * @return
890  * 0 if all data was completely stored in the dst buffer,
891  * AVERROR(EAGAIN) if more destination buffer space is required.
892  */
893 static int yae_flush(ATempoContext *atempo,
894  uint8_t **dst_ref,
895  uint8_t *dst_end)
896 {
897  AudioFragment *frag = yae_curr_frag(atempo);
898  int64_t overlap_end;
899  int64_t start_here;
900  int64_t stop_here;
901  int64_t offset;
902 
903  const uint8_t *src;
904  uint8_t *dst;
905 
906  int src_size;
907  int dst_size;
908  int nbytes;
909 
910  atempo->state = YAE_FLUSH_OUTPUT;
911 
912  if (!atempo->nfrag) {
913  // there is nothing to flush:
914  return 0;
915  }
916 
917  if (atempo->position[0] == frag->position[0] + frag->nsamples &&
918  atempo->position[1] == frag->position[1] + frag->nsamples) {
919  // the current fragment is already flushed:
920  return 0;
921  }
922 
923  if (frag->position[0] + frag->nsamples < atempo->position[0]) {
924  // finish loading the current (possibly partial) fragment:
925  yae_load_frag(atempo, NULL, NULL);
926 
927  if (atempo->nfrag) {
928  // down-mix to mono:
929  yae_downmix(atempo, frag);
930 
931  // apply rDFT:
932  av_rdft_calc(atempo->real_to_complex, frag->xdat);
933 
934  // align current fragment to previous fragment:
935  if (yae_adjust_position(atempo)) {
936  // reload the current fragment due to adjusted position:
937  yae_load_frag(atempo, NULL, NULL);
938  }
939  }
940  }
941 
942  // flush the overlap region:
943  overlap_end = frag->position[1] + FFMIN(atempo->window / 2,
944  frag->nsamples);
945 
946  while (atempo->position[1] < overlap_end) {
947  if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) {
948  return AVERROR(EAGAIN);
949  }
950  }
951 
952  // check whether all of the input samples have been consumed:
953  if (frag->position[0] + frag->nsamples < atempo->position[0]) {
954  yae_advance_to_next_frag(atempo);
955  return AVERROR(EAGAIN);
956  }
957 
958  // flush the remainder of the current fragment:
959  start_here = FFMAX(atempo->position[1], overlap_end);
960  stop_here = frag->position[1] + frag->nsamples;
961  offset = start_here - frag->position[1];
962  av_assert0(start_here <= stop_here && frag->position[1] <= start_here);
963 
964  src = frag->data + offset * atempo->stride;
965  dst = (uint8_t *)*dst_ref;
966 
967  src_size = (int)(stop_here - start_here) * atempo->stride;
968  dst_size = dst_end - dst;
969  nbytes = FFMIN(src_size, dst_size);
970 
971  memcpy(dst, src, nbytes);
972  dst += nbytes;
973 
974  atempo->position[1] += (nbytes / atempo->stride);
975 
976  // pass-back the updated destination buffer pointer:
977  *dst_ref = (uint8_t *)dst;
978 
979  return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN);
980 }
981 
983 {
984  ATempoContext *atempo = ctx->priv;
985  atempo->format = AV_SAMPLE_FMT_NONE;
986  atempo->state = YAE_LOAD_FRAGMENT;
987  return 0;
988 }
989 
991 {
992  ATempoContext *atempo = ctx->priv;
993  yae_release_buffers(atempo);
994 }
995 
997 {
1000 
1001  // WSOLA necessitates an internal sliding window ring buffer
1002  // for incoming audio stream.
1003  //
1004  // Planar sample formats are too cumbersome to store in a ring buffer,
1005  // therefore planar sample formats are not supported.
1006  //
1007  static const enum AVSampleFormat sample_fmts[] = {
1014  };
1015  int ret;
1016 
1018  if (!layouts) {
1019  return AVERROR(ENOMEM);
1020  }
1022  if (ret < 0)
1023  return ret;
1024 
1026  if (!formats) {
1027  return AVERROR(ENOMEM);
1028  }
1030  if (ret < 0)
1031  return ret;
1032 
1034  if (!formats) {
1035  return AVERROR(ENOMEM);
1036  }
1038 }
1039 
1041 {
1042  AVFilterContext *ctx = inlink->dst;
1043  ATempoContext *atempo = ctx->priv;
1044 
1045  enum AVSampleFormat format = inlink->format;
1046  int sample_rate = (int)inlink->sample_rate;
1047 
1048  return yae_reset(atempo, format, sample_rate, inlink->channels);
1049 }
1050 
1051 static int push_samples(ATempoContext *atempo,
1052  AVFilterLink *outlink,
1053  int n_out)
1054 {
1055  int ret;
1056 
1057  atempo->dst_buffer->sample_rate = outlink->sample_rate;
1058  atempo->dst_buffer->nb_samples = n_out;
1059 
1060  // adjust the PTS:
1061  atempo->dst_buffer->pts = atempo->start_pts +
1062  av_rescale_q(atempo->nsamples_out,
1063  (AVRational){ 1, outlink->sample_rate },
1064  outlink->time_base);
1065 
1066  ret = ff_filter_frame(outlink, atempo->dst_buffer);
1067  atempo->dst_buffer = NULL;
1068  atempo->dst = NULL;
1069  atempo->dst_end = NULL;
1070  if (ret < 0)
1071  return ret;
1072 
1073  atempo->nsamples_out += n_out;
1074  return 0;
1075 }
1076 
1077 static int filter_frame(AVFilterLink *inlink, AVFrame *src_buffer)
1078 {
1079  AVFilterContext *ctx = inlink->dst;
1080  ATempoContext *atempo = ctx->priv;
1081  AVFilterLink *outlink = ctx->outputs[0];
1082 
1083  int ret = 0;
1084  int n_in = src_buffer->nb_samples;
1085  int n_out = (int)(0.5 + ((double)n_in) / atempo->tempo);
1086 
1087  const uint8_t *src = src_buffer->data[0];
1088  const uint8_t *src_end = src + n_in * atempo->stride;
1089 
1090  if (atempo->start_pts == AV_NOPTS_VALUE)
1091  atempo->start_pts = av_rescale_q(src_buffer->pts,
1092  inlink->time_base,
1093  outlink->time_base);
1094 
1095  while (src < src_end) {
1096  if (!atempo->dst_buffer) {
1097  atempo->dst_buffer = ff_get_audio_buffer(outlink, n_out);
1098  if (!atempo->dst_buffer) {
1099  av_frame_free(&src_buffer);
1100  return AVERROR(ENOMEM);
1101  }
1102  av_frame_copy_props(atempo->dst_buffer, src_buffer);
1103 
1104  atempo->dst = atempo->dst_buffer->data[0];
1105  atempo->dst_end = atempo->dst + n_out * atempo->stride;
1106  }
1107 
1108  yae_apply(atempo, &src, src_end, &atempo->dst, atempo->dst_end);
1109 
1110  if (atempo->dst == atempo->dst_end) {
1111  int n_samples = ((atempo->dst - atempo->dst_buffer->data[0]) /
1112  atempo->stride);
1113  ret = push_samples(atempo, outlink, n_samples);
1114  if (ret < 0)
1115  goto end;
1116  }
1117  }
1118 
1119  atempo->nsamples_in += n_in;
1120 end:
1121  av_frame_free(&src_buffer);
1122  return ret;
1123 }
1124 
1125 static int request_frame(AVFilterLink *outlink)
1126 {
1127  AVFilterContext *ctx = outlink->src;
1128  ATempoContext *atempo = ctx->priv;
1129  int ret;
1130 
1131  ret = ff_request_frame(ctx->inputs[0]);
1132 
1133  if (ret == AVERROR_EOF) {
1134  // flush the filter:
1135  int n_max = atempo->ring;
1136  int n_out;
1137  int err = AVERROR(EAGAIN);
1138 
1139  while (err == AVERROR(EAGAIN)) {
1140  if (!atempo->dst_buffer) {
1141  atempo->dst_buffer = ff_get_audio_buffer(outlink, n_max);
1142  if (!atempo->dst_buffer)
1143  return AVERROR(ENOMEM);
1144 
1145  atempo->dst = atempo->dst_buffer->data[0];
1146  atempo->dst_end = atempo->dst + n_max * atempo->stride;
1147  }
1148 
1149  err = yae_flush(atempo, &atempo->dst, atempo->dst_end);
1150 
1151  n_out = ((atempo->dst - atempo->dst_buffer->data[0]) /
1152  atempo->stride);
1153 
1154  if (n_out) {
1155  ret = push_samples(atempo, outlink, n_out);
1156  if (ret < 0)
1157  return ret;
1158  }
1159  }
1160 
1161  av_frame_free(&atempo->dst_buffer);
1162  atempo->dst = NULL;
1163  atempo->dst_end = NULL;
1164 
1165  return AVERROR_EOF;
1166  }
1167 
1168  return ret;
1169 }
1170 
1172  const char *cmd,
1173  const char *arg,
1174  char *res,
1175  int res_len,
1176  int flags)
1177 {
1178  int ret = ff_filter_process_command(ctx, cmd, arg, res, res_len, flags);
1179 
1180  if (ret < 0)
1181  return ret;
1182 
1183  return yae_update(ctx);
1184 }
1185 
1186 static const AVFilterPad atempo_inputs[] = {
1187  {
1188  .name = "default",
1189  .type = AVMEDIA_TYPE_AUDIO,
1190  .filter_frame = filter_frame,
1191  .config_props = config_props,
1192  },
1193  { NULL }
1194 };
1195 
1196 static const AVFilterPad atempo_outputs[] = {
1197  {
1198  .name = "default",
1199  .request_frame = request_frame,
1200  .type = AVMEDIA_TYPE_AUDIO,
1201  },
1202  { NULL }
1203 };
1204 
1206  .name = "atempo",
1207  .description = NULL_IF_CONFIG_SMALL("Adjust audio tempo."),
1208  .init = init,
1209  .uninit = uninit,
1210  .query_formats = query_formats,
1211  .process_command = process_command,
1212  .priv_size = sizeof(ATempoContext),
1213  .priv_class = &atempo_class,
1214  .inputs = atempo_inputs,
1216 };
formats
formats
Definition: signature.h:48
ff_get_audio_buffer
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
Definition: audio.c:86
yae_update
static int yae_update(AVFilterContext *ctx)
Definition: af_atempo.c:331
AVFilterChannelLayouts
A list of supported channel layouts.
Definition: formats.h:86
ATempoContext::stride
int stride
Definition: af_atempo.c:117
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
ff_make_format_list
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:286
AudioFragment::xdat
FFTSample * xdat
Definition: af_atempo.c:70
push_samples
static int push_samples(ATempoContext *atempo, AVFilterLink *outlink, int n_out)
Definition: af_atempo.c:1051
ATempoContext::channels
int channels
Definition: af_atempo.c:113
config_props
static int config_props(AVFilterLink *inlink)
Definition: af_atempo.c:1040
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1096
sample_fmts
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:925
layouts
enum MovChannelLayoutTag * layouts
Definition: mov_chan.c:434
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:55
ATempoContext::size
int size
Definition: af_atempo.c:98
ATempoContext::dst_end
uint8_t * dst_end
Definition: af_atempo.c:150
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
yae_downmix
static void yae_downmix(ATempoContext *atempo, AudioFragment *frag)
Initialize complex data buffer of a given audio fragment with down-mixed mono data of appropriate sca...
Definition: af_atempo.c:396
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
ff_all_channel_counts
AVFilterChannelLayouts * ff_all_channel_counts(void)
Construct an AVFilterChannelLayouts coding for any channel layout, with known or unknown disposition.
Definition: formats.c:436
YAE_FLUSH_OUTPUT
@ YAE_FLUSH_OUTPUT
Definition: af_atempo.c:81
yae_load_data
static int yae_load_data(ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end, int64_t stop_here)
Populate the internal data buffer on as-needed basis.
Definition: af_atempo.c:424
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:318
AVFrame::pts
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:411
AVOption
AVOption.
Definition: opt.h:248
b
#define b
Definition: input.c:41
ATempoContext::position
int64_t position[2]
Definition: af_atempo.c:104
YAE_RELOAD_FRAGMENT
@ YAE_RELOAD_FRAGMENT
Definition: af_atempo.c:79
ff_request_frame
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Definition: avfilter.c:408
AV_OPT_FLAG_RUNTIME_PARAM
#define AV_OPT_FLAG_RUNTIME_PARAM
a generic parameter which can be set by the user at runtime
Definition: opt.h:293
ATempoContext::nsamples_out
uint64_t nsamples_out
Definition: af_atempo.c:152
float.h
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:149
OFFSET
#define OFFSET(x)
Definition: af_atempo.c:158
ATempoContext::frag
AudioFragment frag[2]
Definition: af_atempo.c:134
AV_OPT_FLAG_FILTERING_PARAM
#define AV_OPT_FLAG_FILTERING_PARAM
a generic parameter which can be set by the user for filtering
Definition: opt.h:294
ATempoContext::tail
int tail
Definition: af_atempo.c:100
sample_rate
sample_rate
Definition: ffmpeg_filter.c:170
init
static av_cold int init(AVFilterContext *ctx)
Definition: af_atempo.c:982
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:332
AVFilterFormats
A list of supported formats for one end of a filter link.
Definition: formats.h:65
ATempoContext
Filter state machine.
Definition: af_atempo.c:87
window
static SDL_Window * window
Definition: ffplay.c:366
query_formats
static int query_formats(AVFilterContext *ctx)
Definition: af_atempo.c:996
YAE_ADJUST_POSITION
@ YAE_ADJUST_POSITION
Definition: af_atempo.c:78
samplefmt.h
IDFT_C2R
@ IDFT_C2R
Definition: avfft.h:73
ATempoContext::state
FilterState state
Definition: af_atempo.c:140
ATempoContext::origin
int64_t origin[2]
Definition: af_atempo.c:131
atempo_outputs
static const AVFilterPad atempo_outputs[]
Definition: af_atempo.c:1196
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:54
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
ff_set_common_formats
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:587
yae_apply
static void yae_apply(ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end, uint8_t **dst_ref, uint8_t *dst_end)
Feed as much data to the filter as it is able to consume and receive as much processed data in the de...
Definition: af_atempo.c:819
ATempoContext::dst
uint8_t * dst
Definition: af_atempo.c:149
format
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate. The lists are not just lists
yae_init_xdat
#define yae_init_xdat(scalar_type, scalar_max)
A helper macro for initializing complex data buffer with scalar data of a given type.
Definition: af_atempo.c:346
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:227
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
YAE_ATEMPO_MAX
#define YAE_ATEMPO_MAX
Definition: af_atempo.c:156
outputs
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
YAE_OUTPUT_OVERLAP_ADD
@ YAE_OUTPUT_OVERLAP_ADD
Definition: af_atempo.c:80
ctx
AVFormatContext * ctx
Definition: movenc.c:48
ATempoContext::real_to_complex
RDFTContext * real_to_complex
Definition: af_atempo.c:143
channels
channels
Definition: aptx.h:33
av_rescale_q
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:142
ATempoContext::buffer
uint8_t * buffer
Definition: af_atempo.c:92
ATempoContext::ring
int ring
Definition: af_atempo.c:95
ATempoContext::correlation
FFTSample * correlation
Definition: af_atempo.c:145
av_rdft_calc
void av_rdft_calc(RDFTContext *s, FFTSample *data)
AV_OPT_FLAG_AUDIO_PARAM
#define AV_OPT_FLAG_AUDIO_PARAM
Definition: opt.h:280
RE_MALLOC_OR_FAIL
#define RE_MALLOC_OR_FAIL(field, field_size)
Definition: af_atempo.c:247
arg
const char * arg
Definition: jacosubdec.c:66
ATempoContext::complex_to_real
RDFTContext * complex_to_real
Definition: af_atempo.c:144
yae_align
static int yae_align(AudioFragment *frag, const AudioFragment *prev, const int window, const int delta_max, const int drift, FFTSample *correlation, RDFTContext *complex_to_real)
Calculate alignment offset for given fragment relative to the previous fragment.
Definition: af_atempo.c:635
ATempoContext::tempo
double tempo
Definition: af_atempo.c:127
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:67
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:658
AudioFragment::position
int64_t position[2]
Definition: af_atempo.c:60
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
ATempoContext::head
int head
Definition: af_atempo.c:99
src
#define src
Definition: vp8dsp.c:255
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(atempo)
filter_frame
static int filter_frame(AVFilterLink *inlink, AVFrame *src_buffer)
Definition: af_atempo.c:1077
DFT_R2C
@ DFT_R2C
Definition: avfft.h:72
ff_af_atempo
AVFilter ff_af_atempo
Definition: af_atempo.c:1205
FFTSample
float FFTSample
Definition: avfft.h:35
avfft.h
yae_curr_frag
static AudioFragment * yae_curr_frag(ATempoContext *atempo)
Definition: af_atempo.c:171
yae_reset
static int yae_reset(ATempoContext *atempo, enum AVSampleFormat format, int sample_rate, int channels)
Prepare filter for processing audio data of given format, sample rate and number of channels.
Definition: af_atempo.c:261
inputs
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
Definition: filter_design.txt:243
AV_SAMPLE_FMT_U8
AV_SAMPLE_FMT_U8
Definition: audio_convert.c:194
correlation
static void correlation(int32_t *corr, int32_t *ener, int16_t *buffer, int16_t lag, int16_t blen, int16_t srange, int16_t scale)
Definition: ilbcdec.c:912
process_command
static int process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Definition: af_atempo.c:1171
yae_overlap_add
static int yae_overlap_add(ATempoContext *atempo, uint8_t **dst_ref, uint8_t *dst_end)
Blend the overlap region of previous and current audio fragment and output the results to the given d...
Definition: af_atempo.c:764
ATempoContext::nsamples_in
uint64_t nsamples_in
Definition: af_atempo.c:151
eval.h
yae_load_frag
static int yae_load_frag(ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end)
Populate current audio fragment data buffer.
Definition: af_atempo.c:499
av_rdft_init
RDFTContext * av_rdft_init(int nbits, enum RDFTransformType trans)
Set up a real FFT.
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
yae_prev_frag
static AudioFragment * yae_prev_frag(ATempoContext *atempo)
Definition: af_atempo.c:176
AVFrame::sample_rate
int sample_rate
Sample rate of the audio data.
Definition: frame.h:490
FFMAX
#define FFMAX(a, b)
Definition: common.h:103
AV_SAMPLE_FMT_NONE
@ AV_SAMPLE_FMT_NONE
Definition: samplefmt.h:59
AV_NOPTS_VALUE
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:248
FFTComplex::im
FFTSample im
Definition: avfft.h:38
FFTComplex::re
FFTSample re
Definition: avfft.h:38
ff_filter_process_command
int ff_filter_process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Generic processing of user supplied commands that are set in the same way as the filter options.
Definition: avfilter.c:882
ATempoContext::hann
float * hann
Definition: af_atempo.c:124
AudioFragment::nsamples
int nsamples
Definition: af_atempo.c:66
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
FFMIN
#define FFMIN(a, b)
Definition: common.h:105
AudioFragment::data
uint8_t * data
Definition: af_atempo.c:63
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
yae_blend
#define yae_blend(scalar_type)
A helper macro for blending the overlap region of previous and current audio fragment.
Definition: af_atempo.c:727
M_PI
#define M_PI
Definition: mathematics.h:52
internal.h
AVFrame::nb_samples
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:384
i
int i
Definition: input.c:407
av_get_bytes_per_sample
int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt)
Return number of bytes per sample.
Definition: samplefmt.c:106
YAE_ATEMPO_MIN
#define YAE_ATEMPO_MIN
Definition: af_atempo.c:155
yae_advance_to_next_frag
static void yae_advance_to_next_frag(ATempoContext *atempo)
Prepare for loading next audio fragment.
Definition: af_atempo.c:579
ATempoContext::dst_buffer
AVFrame * dst_buffer
Definition: af_atempo.c:148
RDFTContext
Definition: rdft.h:28
yae_clear
static void yae_clear(ATempoContext *atempo)
Reset filter to initial state, do not deallocate existing local buffers.
Definition: af_atempo.c:184
AVSampleFormat
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
ATempoContext::window
int window
Definition: af_atempo.c:120
uint8_t
uint8_t
Definition: audio_convert.c:194
AV_SAMPLE_FMT_S16
@ AV_SAMPLE_FMT_S16
signed 16 bits
Definition: samplefmt.h:61
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:60
atempo_inputs
static const AVFilterPad atempo_inputs[]
Definition: af_atempo.c:1186
yae_flush
static int yae_flush(ATempoContext *atempo, uint8_t **dst_ref, uint8_t *dst_end)
Flush any buffered data from the filter.
Definition: af_atempo.c:893
AVFilter
Filter definition.
Definition: avfilter.h:145
ret
ret
Definition: filter_design.txt:187
atempo_options
static const AVOption atempo_options[]
Definition: af_atempo.c:160
ATempoContext::nfrag
uint64_t nfrag
Definition: af_atempo.c:137
request_frame
static int request_frame(AVFilterLink *outlink)
Definition: af_atempo.c:1125
yae_xcorr_via_rdft
static void yae_xcorr_via_rdft(FFTSample *xcorr, RDFTContext *complex_to_real, const FFTComplex *xa, const FFTComplex *xb, const int window)
Calculate cross-correlation via rDFT.
Definition: af_atempo.c:601
ATempoContext::format
enum AVSampleFormat format
Definition: af_atempo.c:110
ff_all_samplerates
AVFilterFormats * ff_all_samplerates(void)
Definition: formats.c:421
channel_layout.h
yae_adjust_position
static int yae_adjust_position(ATempoContext *atempo)
Adjust current fragment position for better alignment with previous fragment.
Definition: af_atempo.c:689
avfilter.h
FilterState
FilterState
Filter state machine states.
Definition: af_atempo.c:76
AVFilterContext
An instance of a filter.
Definition: avfilter.h:341
audio.h
ib
#define ib(width, name)
Definition: cbs_h2645.c:273
ATempoContext::start_pts
int64_t start_pts
Definition: af_atempo.c:107
yae_release_buffers
static void yae_release_buffers(ATempoContext *atempo)
Reset filter to initial state and deallocate all buffers.
Definition: af_atempo.c:225
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_atempo.c:990
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
av_rdft_end
void av_rdft_end(RDFTContext *s)
ff_set_common_samplerates
int ff_set_common_samplerates(AVFilterContext *ctx, AVFilterFormats *samplerates)
Definition: formats.c:575
h
h
Definition: vp9dsp_template.c:2038
AV_SAMPLE_FMT_DBL
@ AV_SAMPLE_FMT_DBL
double
Definition: samplefmt.h:64
avstring.h
int
int
Definition: ffmpeg_filter.c:170
AV_SAMPLE_FMT_S32
@ AV_SAMPLE_FMT_S32
signed 32 bits
Definition: samplefmt.h:62
AudioFragment
A fragment of audio waveform.
Definition: af_atempo.c:56
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
AV_SAMPLE_FMT_FLT
@ AV_SAMPLE_FMT_FLT
float
Definition: samplefmt.h:63
YAE_LOAD_FRAGMENT
@ YAE_LOAD_FRAGMENT
Definition: af_atempo.c:77
FFTComplex
Definition: avfft.h:37
ff_set_common_channel_layouts
int ff_set_common_channel_layouts(AVFilterContext *ctx, AVFilterChannelLayouts *channel_layouts)
A helper for query_formats() which sets all links to the same list of channel layouts/sample rates.
Definition: formats.c:568