00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "avcodec.h"
00023 #include "put_bits.h"
00024 #include "dsputil.h"
00025 #include "internal.h"
00026 #include "lpc.h"
00027 #include "mathops.h"
00028
00029 #define DEFAULT_FRAME_SIZE 4096
00030 #define DEFAULT_SAMPLE_SIZE 16
00031 #define MAX_CHANNELS 8
00032 #define ALAC_EXTRADATA_SIZE 36
00033 #define ALAC_FRAME_HEADER_SIZE 55
00034 #define ALAC_FRAME_FOOTER_SIZE 3
00035
00036 #define ALAC_ESCAPE_CODE 0x1FF
00037 #define ALAC_MAX_LPC_ORDER 30
00038 #define DEFAULT_MAX_PRED_ORDER 6
00039 #define DEFAULT_MIN_PRED_ORDER 4
00040 #define ALAC_MAX_LPC_PRECISION 9
00041 #define ALAC_MAX_LPC_SHIFT 9
00042
00043 #define ALAC_CHMODE_LEFT_RIGHT 0
00044 #define ALAC_CHMODE_LEFT_SIDE 1
00045 #define ALAC_CHMODE_RIGHT_SIDE 2
00046 #define ALAC_CHMODE_MID_SIDE 3
00047
00048 typedef struct RiceContext {
00049 int history_mult;
00050 int initial_history;
00051 int k_modifier;
00052 int rice_modifier;
00053 } RiceContext;
00054
00055 typedef struct AlacLPCContext {
00056 int lpc_order;
00057 int lpc_coeff[ALAC_MAX_LPC_ORDER+1];
00058 int lpc_quant;
00059 } AlacLPCContext;
00060
00061 typedef struct AlacEncodeContext {
00062 int frame_size;
00063 int verbatim;
00064 int compression_level;
00065 int min_prediction_order;
00066 int max_prediction_order;
00067 int max_coded_frame_size;
00068 int write_sample_size;
00069 int32_t sample_buf[MAX_CHANNELS][DEFAULT_FRAME_SIZE];
00070 int32_t predictor_buf[DEFAULT_FRAME_SIZE];
00071 int interlacing_shift;
00072 int interlacing_leftweight;
00073 PutBitContext pbctx;
00074 RiceContext rc;
00075 AlacLPCContext lpc[MAX_CHANNELS];
00076 LPCContext lpc_ctx;
00077 AVCodecContext *avctx;
00078 } AlacEncodeContext;
00079
00080
00081 static void init_sample_buffers(AlacEncodeContext *s,
00082 const int16_t *input_samples)
00083 {
00084 int ch, i;
00085
00086 for (ch = 0; ch < s->avctx->channels; ch++) {
00087 const int16_t *sptr = input_samples + ch;
00088 for (i = 0; i < s->frame_size; i++) {
00089 s->sample_buf[ch][i] = *sptr;
00090 sptr += s->avctx->channels;
00091 }
00092 }
00093 }
00094
00095 static void encode_scalar(AlacEncodeContext *s, int x,
00096 int k, int write_sample_size)
00097 {
00098 int divisor, q, r;
00099
00100 k = FFMIN(k, s->rc.k_modifier);
00101 divisor = (1<<k) - 1;
00102 q = x / divisor;
00103 r = x % divisor;
00104
00105 if (q > 8) {
00106
00107 put_bits(&s->pbctx, 9, ALAC_ESCAPE_CODE);
00108 put_bits(&s->pbctx, write_sample_size, x);
00109 } else {
00110 if (q)
00111 put_bits(&s->pbctx, q, (1<<q) - 1);
00112 put_bits(&s->pbctx, 1, 0);
00113
00114 if (k != 1) {
00115 if (r > 0)
00116 put_bits(&s->pbctx, k, r+1);
00117 else
00118 put_bits(&s->pbctx, k-1, 0);
00119 }
00120 }
00121 }
00122
00123 static void write_frame_header(AlacEncodeContext *s)
00124 {
00125 int encode_fs = 0;
00126
00127 if (s->frame_size < DEFAULT_FRAME_SIZE)
00128 encode_fs = 1;
00129
00130 put_bits(&s->pbctx, 3, s->avctx->channels-1);
00131 put_bits(&s->pbctx, 16, 0);
00132 put_bits(&s->pbctx, 1, encode_fs);
00133 put_bits(&s->pbctx, 2, 0);
00134 put_bits(&s->pbctx, 1, s->verbatim);
00135 if (encode_fs)
00136 put_bits32(&s->pbctx, s->frame_size);
00137 }
00138
00139 static void calc_predictor_params(AlacEncodeContext *s, int ch)
00140 {
00141 int32_t coefs[MAX_LPC_ORDER][MAX_LPC_ORDER];
00142 int shift[MAX_LPC_ORDER];
00143 int opt_order;
00144
00145 if (s->compression_level == 1) {
00146 s->lpc[ch].lpc_order = 6;
00147 s->lpc[ch].lpc_quant = 6;
00148 s->lpc[ch].lpc_coeff[0] = 160;
00149 s->lpc[ch].lpc_coeff[1] = -190;
00150 s->lpc[ch].lpc_coeff[2] = 170;
00151 s->lpc[ch].lpc_coeff[3] = -130;
00152 s->lpc[ch].lpc_coeff[4] = 80;
00153 s->lpc[ch].lpc_coeff[5] = -25;
00154 } else {
00155 opt_order = ff_lpc_calc_coefs(&s->lpc_ctx, s->sample_buf[ch],
00156 s->frame_size,
00157 s->min_prediction_order,
00158 s->max_prediction_order,
00159 ALAC_MAX_LPC_PRECISION, coefs, shift,
00160 FF_LPC_TYPE_LEVINSON, 0,
00161 ORDER_METHOD_EST, ALAC_MAX_LPC_SHIFT, 1);
00162
00163 s->lpc[ch].lpc_order = opt_order;
00164 s->lpc[ch].lpc_quant = shift[opt_order-1];
00165 memcpy(s->lpc[ch].lpc_coeff, coefs[opt_order-1], opt_order*sizeof(int));
00166 }
00167 }
00168
00169 static int estimate_stereo_mode(int32_t *left_ch, int32_t *right_ch, int n)
00170 {
00171 int i, best;
00172 int32_t lt, rt;
00173 uint64_t sum[4];
00174 uint64_t score[4];
00175
00176
00177 sum[0] = sum[1] = sum[2] = sum[3] = 0;
00178 for (i = 2; i < n; i++) {
00179 lt = left_ch[i] - 2 * left_ch[i - 1] + left_ch[i - 2];
00180 rt = right_ch[i] - 2 * right_ch[i - 1] + right_ch[i - 2];
00181 sum[2] += FFABS((lt + rt) >> 1);
00182 sum[3] += FFABS(lt - rt);
00183 sum[0] += FFABS(lt);
00184 sum[1] += FFABS(rt);
00185 }
00186
00187
00188 score[0] = sum[0] + sum[1];
00189 score[1] = sum[0] + sum[3];
00190 score[2] = sum[1] + sum[3];
00191 score[3] = sum[2] + sum[3];
00192
00193
00194 best = 0;
00195 for (i = 1; i < 4; i++) {
00196 if (score[i] < score[best])
00197 best = i;
00198 }
00199 return best;
00200 }
00201
00202 static void alac_stereo_decorrelation(AlacEncodeContext *s)
00203 {
00204 int32_t *left = s->sample_buf[0], *right = s->sample_buf[1];
00205 int i, mode, n = s->frame_size;
00206 int32_t tmp;
00207
00208 mode = estimate_stereo_mode(left, right, n);
00209
00210 switch (mode) {
00211 case ALAC_CHMODE_LEFT_RIGHT:
00212 s->interlacing_leftweight = 0;
00213 s->interlacing_shift = 0;
00214 break;
00215 case ALAC_CHMODE_LEFT_SIDE:
00216 for (i = 0; i < n; i++)
00217 right[i] = left[i] - right[i];
00218 s->interlacing_leftweight = 1;
00219 s->interlacing_shift = 0;
00220 break;
00221 case ALAC_CHMODE_RIGHT_SIDE:
00222 for (i = 0; i < n; i++) {
00223 tmp = right[i];
00224 right[i] = left[i] - right[i];
00225 left[i] = tmp + (right[i] >> 31);
00226 }
00227 s->interlacing_leftweight = 1;
00228 s->interlacing_shift = 31;
00229 break;
00230 default:
00231 for (i = 0; i < n; i++) {
00232 tmp = left[i];
00233 left[i] = (tmp + right[i]) >> 1;
00234 right[i] = tmp - right[i];
00235 }
00236 s->interlacing_leftweight = 1;
00237 s->interlacing_shift = 1;
00238 break;
00239 }
00240 }
00241
00242 static void alac_linear_predictor(AlacEncodeContext *s, int ch)
00243 {
00244 int i;
00245 AlacLPCContext lpc = s->lpc[ch];
00246
00247 if (lpc.lpc_order == 31) {
00248 s->predictor_buf[0] = s->sample_buf[ch][0];
00249
00250 for (i = 1; i < s->frame_size; i++) {
00251 s->predictor_buf[i] = s->sample_buf[ch][i ] -
00252 s->sample_buf[ch][i - 1];
00253 }
00254
00255 return;
00256 }
00257
00258
00259
00260 if (lpc.lpc_order > 0) {
00261 int32_t *samples = s->sample_buf[ch];
00262 int32_t *residual = s->predictor_buf;
00263
00264
00265 residual[0] = samples[0];
00266 for (i = 1; i <= lpc.lpc_order; i++)
00267 residual[i] = samples[i] - samples[i-1];
00268
00269
00270 for (i = lpc.lpc_order + 1; i < s->frame_size; i++) {
00271 int sum = 1 << (lpc.lpc_quant - 1), res_val, j;
00272
00273 for (j = 0; j < lpc.lpc_order; j++) {
00274 sum += (samples[lpc.lpc_order-j] - samples[0]) *
00275 lpc.lpc_coeff[j];
00276 }
00277
00278 sum >>= lpc.lpc_quant;
00279 sum += samples[0];
00280 residual[i] = sign_extend(samples[lpc.lpc_order+1] - sum,
00281 s->write_sample_size);
00282 res_val = residual[i];
00283
00284 if (res_val) {
00285 int index = lpc.lpc_order - 1;
00286 int neg = (res_val < 0);
00287
00288 while (index >= 0 && (neg ? (res_val < 0) : (res_val > 0))) {
00289 int val = samples[0] - samples[lpc.lpc_order - index];
00290 int sign = (val ? FFSIGN(val) : 0);
00291
00292 if (neg)
00293 sign *= -1;
00294
00295 lpc.lpc_coeff[index] -= sign;
00296 val *= sign;
00297 res_val -= (val >> lpc.lpc_quant) * (lpc.lpc_order - index);
00298 index--;
00299 }
00300 }
00301 samples++;
00302 }
00303 }
00304 }
00305
00306 static void alac_entropy_coder(AlacEncodeContext *s)
00307 {
00308 unsigned int history = s->rc.initial_history;
00309 int sign_modifier = 0, i, k;
00310 int32_t *samples = s->predictor_buf;
00311
00312 for (i = 0; i < s->frame_size;) {
00313 int x;
00314
00315 k = av_log2((history >> 9) + 3);
00316
00317 x = -2 * (*samples) -1;
00318 x ^= x >> 31;
00319
00320 samples++;
00321 i++;
00322
00323 encode_scalar(s, x - sign_modifier, k, s->write_sample_size);
00324
00325 history += x * s->rc.history_mult -
00326 ((history * s->rc.history_mult) >> 9);
00327
00328 sign_modifier = 0;
00329 if (x > 0xFFFF)
00330 history = 0xFFFF;
00331
00332 if (history < 128 && i < s->frame_size) {
00333 unsigned int block_size = 0;
00334
00335 k = 7 - av_log2(history) + ((history + 16) >> 6);
00336
00337 while (*samples == 0 && i < s->frame_size) {
00338 samples++;
00339 i++;
00340 block_size++;
00341 }
00342 encode_scalar(s, block_size, k, 16);
00343 sign_modifier = (block_size <= 0xFFFF);
00344 history = 0;
00345 }
00346
00347 }
00348 }
00349
00350 static int write_frame(AlacEncodeContext *s, AVPacket *avpkt,
00351 const int16_t *samples)
00352 {
00353 int i, j;
00354 int prediction_type = 0;
00355 PutBitContext *pb = &s->pbctx;
00356
00357 init_put_bits(pb, avpkt->data, avpkt->size);
00358
00359 if (s->verbatim) {
00360 write_frame_header(s);
00361 for (i = 0; i < s->frame_size * s->avctx->channels; i++)
00362 put_sbits(pb, 16, *samples++);
00363 } else {
00364 init_sample_buffers(s, samples);
00365 write_frame_header(s);
00366
00367 if (s->avctx->channels == 2)
00368 alac_stereo_decorrelation(s);
00369 put_bits(pb, 8, s->interlacing_shift);
00370 put_bits(pb, 8, s->interlacing_leftweight);
00371
00372 for (i = 0; i < s->avctx->channels; i++) {
00373 calc_predictor_params(s, i);
00374
00375 put_bits(pb, 4, prediction_type);
00376 put_bits(pb, 4, s->lpc[i].lpc_quant);
00377
00378 put_bits(pb, 3, s->rc.rice_modifier);
00379 put_bits(pb, 5, s->lpc[i].lpc_order);
00380
00381 for (j = 0; j < s->lpc[i].lpc_order; j++)
00382 put_sbits(pb, 16, s->lpc[i].lpc_coeff[j]);
00383 }
00384
00385
00386
00387 for (i = 0; i < s->avctx->channels; i++) {
00388 alac_linear_predictor(s, i);
00389
00390
00391 if (prediction_type == 15) {
00392
00393 for (j = s->frame_size - 1; j > 0; j--)
00394 s->predictor_buf[j] -= s->predictor_buf[j - 1];
00395 }
00396
00397 alac_entropy_coder(s);
00398 }
00399 }
00400 put_bits(pb, 3, 7);
00401 flush_put_bits(pb);
00402 return put_bits_count(pb) >> 3;
00403 }
00404
00405 static av_always_inline int get_max_frame_size(int frame_size, int ch, int bps)
00406 {
00407 int header_bits = 23 + 32 * (frame_size < DEFAULT_FRAME_SIZE);
00408 return FFALIGN(header_bits + bps * ch * frame_size + 3, 8) / 8;
00409 }
00410
00411 static av_cold int alac_encode_close(AVCodecContext *avctx)
00412 {
00413 AlacEncodeContext *s = avctx->priv_data;
00414 ff_lpc_end(&s->lpc_ctx);
00415 av_freep(&avctx->extradata);
00416 avctx->extradata_size = 0;
00417 av_freep(&avctx->coded_frame);
00418 return 0;
00419 }
00420
00421 static av_cold int alac_encode_init(AVCodecContext *avctx)
00422 {
00423 AlacEncodeContext *s = avctx->priv_data;
00424 int ret;
00425 uint8_t *alac_extradata;
00426
00427 avctx->frame_size = s->frame_size = DEFAULT_FRAME_SIZE;
00428
00429 if (avctx->sample_fmt != AV_SAMPLE_FMT_S16) {
00430 av_log(avctx, AV_LOG_ERROR, "only pcm_s16 input samples are supported\n");
00431 return -1;
00432 }
00433
00434
00435
00436
00437 if (avctx->channels > 2) {
00438 av_log(avctx, AV_LOG_ERROR, "only mono or stereo input is currently supported\n");
00439 return AVERROR_PATCHWELCOME;
00440 }
00441
00442
00443 if (avctx->compression_level == FF_COMPRESSION_DEFAULT)
00444 s->compression_level = 2;
00445 else
00446 s->compression_level = av_clip(avctx->compression_level, 0, 2);
00447
00448
00449 s->rc.history_mult = 40;
00450 s->rc.initial_history = 10;
00451 s->rc.k_modifier = 14;
00452 s->rc.rice_modifier = 4;
00453
00454 s->max_coded_frame_size = get_max_frame_size(avctx->frame_size,
00455 avctx->channels,
00456 DEFAULT_SAMPLE_SIZE);
00457
00458
00459 s->write_sample_size = DEFAULT_SAMPLE_SIZE + avctx->channels - 1;
00460
00461 avctx->extradata = av_mallocz(ALAC_EXTRADATA_SIZE + FF_INPUT_BUFFER_PADDING_SIZE);
00462 if (!avctx->extradata) {
00463 ret = AVERROR(ENOMEM);
00464 goto error;
00465 }
00466 avctx->extradata_size = ALAC_EXTRADATA_SIZE;
00467
00468 alac_extradata = avctx->extradata;
00469 AV_WB32(alac_extradata, ALAC_EXTRADATA_SIZE);
00470 AV_WB32(alac_extradata+4, MKBETAG('a','l','a','c'));
00471 AV_WB32(alac_extradata+12, avctx->frame_size);
00472 AV_WB8 (alac_extradata+17, DEFAULT_SAMPLE_SIZE);
00473 AV_WB8 (alac_extradata+21, avctx->channels);
00474 AV_WB32(alac_extradata+24, s->max_coded_frame_size);
00475 AV_WB32(alac_extradata+28,
00476 avctx->sample_rate * avctx->channels * DEFAULT_SAMPLE_SIZE);
00477 AV_WB32(alac_extradata+32, avctx->sample_rate);
00478
00479
00480 if (s->compression_level > 0) {
00481 AV_WB8(alac_extradata+18, s->rc.history_mult);
00482 AV_WB8(alac_extradata+19, s->rc.initial_history);
00483 AV_WB8(alac_extradata+20, s->rc.k_modifier);
00484 }
00485
00486 s->min_prediction_order = DEFAULT_MIN_PRED_ORDER;
00487 if (avctx->min_prediction_order >= 0) {
00488 if (avctx->min_prediction_order < MIN_LPC_ORDER ||
00489 avctx->min_prediction_order > ALAC_MAX_LPC_ORDER) {
00490 av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n",
00491 avctx->min_prediction_order);
00492 ret = AVERROR(EINVAL);
00493 goto error;
00494 }
00495
00496 s->min_prediction_order = avctx->min_prediction_order;
00497 }
00498
00499 s->max_prediction_order = DEFAULT_MAX_PRED_ORDER;
00500 if (avctx->max_prediction_order >= 0) {
00501 if (avctx->max_prediction_order < MIN_LPC_ORDER ||
00502 avctx->max_prediction_order > ALAC_MAX_LPC_ORDER) {
00503 av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n",
00504 avctx->max_prediction_order);
00505 ret = AVERROR(EINVAL);
00506 goto error;
00507 }
00508
00509 s->max_prediction_order = avctx->max_prediction_order;
00510 }
00511
00512 if (s->max_prediction_order < s->min_prediction_order) {
00513 av_log(avctx, AV_LOG_ERROR,
00514 "invalid prediction orders: min=%d max=%d\n",
00515 s->min_prediction_order, s->max_prediction_order);
00516 ret = AVERROR(EINVAL);
00517 goto error;
00518 }
00519
00520 avctx->coded_frame = avcodec_alloc_frame();
00521 if (!avctx->coded_frame) {
00522 ret = AVERROR(ENOMEM);
00523 goto error;
00524 }
00525
00526 s->avctx = avctx;
00527
00528 if ((ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size,
00529 s->max_prediction_order,
00530 FF_LPC_TYPE_LEVINSON)) < 0) {
00531 goto error;
00532 }
00533
00534 return 0;
00535 error:
00536 alac_encode_close(avctx);
00537 return ret;
00538 }
00539
00540 static int alac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
00541 const AVFrame *frame, int *got_packet_ptr)
00542 {
00543 AlacEncodeContext *s = avctx->priv_data;
00544 int out_bytes, max_frame_size, ret;
00545 const int16_t *samples = (const int16_t *)frame->data[0];
00546
00547 s->frame_size = frame->nb_samples;
00548
00549 if (avctx->frame_size < DEFAULT_FRAME_SIZE)
00550 max_frame_size = get_max_frame_size(s->frame_size, avctx->channels,
00551 DEFAULT_SAMPLE_SIZE);
00552 else
00553 max_frame_size = s->max_coded_frame_size;
00554
00555 if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * max_frame_size)))
00556 return ret;
00557
00558
00559 s->verbatim = !s->compression_level;
00560
00561 out_bytes = write_frame(s, avpkt, samples);
00562
00563 if (out_bytes > max_frame_size) {
00564
00565 s->verbatim = 1;
00566 out_bytes = write_frame(s, avpkt, samples);
00567 }
00568
00569 avpkt->size = out_bytes;
00570 *got_packet_ptr = 1;
00571 return 0;
00572 }
00573
00574 AVCodec ff_alac_encoder = {
00575 .name = "alac",
00576 .type = AVMEDIA_TYPE_AUDIO,
00577 .id = CODEC_ID_ALAC,
00578 .priv_data_size = sizeof(AlacEncodeContext),
00579 .init = alac_encode_init,
00580 .encode2 = alac_encode_frame,
00581 .close = alac_encode_close,
00582 .capabilities = CODEC_CAP_SMALL_LAST_FRAME,
00583 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
00584 AV_SAMPLE_FMT_NONE },
00585 .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
00586 };