00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #define RC_VARIANCE 1 // use variance or ssd for fast rc
00026
00027 #include "avcodec.h"
00028 #include "dsputil.h"
00029 #include "mpegvideo.h"
00030 #include "dnxhdenc.h"
00031
00032 int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
00033
00034 #define LAMBDA_FRAC_BITS 10
00035
00036 static av_always_inline void dnxhd_get_pixels_8x4(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
00037 {
00038 int i;
00039 for (i = 0; i < 4; i++) {
00040 block[0] = pixels[0]; block[1] = pixels[1];
00041 block[2] = pixels[2]; block[3] = pixels[3];
00042 block[4] = pixels[4]; block[5] = pixels[5];
00043 block[6] = pixels[6]; block[7] = pixels[7];
00044 pixels += line_size;
00045 block += 8;
00046 }
00047 memcpy(block , block- 8, sizeof(*block)*8);
00048 memcpy(block+ 8, block-16, sizeof(*block)*8);
00049 memcpy(block+16, block-24, sizeof(*block)*8);
00050 memcpy(block+24, block-32, sizeof(*block)*8);
00051 }
00052
00053 static int dnxhd_init_vlc(DNXHDEncContext *ctx)
00054 {
00055 int i, j, level, run;
00056 int max_level = 1<<(ctx->cid_table->bit_depth+2);
00057
00058 CHECKED_ALLOCZ(ctx->vlc_codes, max_level*4*sizeof(*ctx->vlc_codes));
00059 CHECKED_ALLOCZ(ctx->vlc_bits, max_level*4*sizeof(*ctx->vlc_bits));
00060 CHECKED_ALLOCZ(ctx->run_codes, 63*2);
00061 CHECKED_ALLOCZ(ctx->run_bits, 63);
00062
00063 ctx->vlc_codes += max_level*2;
00064 ctx->vlc_bits += max_level*2;
00065 for (level = -max_level; level < max_level; level++) {
00066 for (run = 0; run < 2; run++) {
00067 int index = (level<<1)|run;
00068 int sign, offset = 0, alevel = level;
00069
00070 MASK_ABS(sign, alevel);
00071 if (alevel > 64) {
00072 offset = (alevel-1)>>6;
00073 alevel -= offset<<6;
00074 }
00075 for (j = 0; j < 257; j++) {
00076 if (ctx->cid_table->ac_level[j] == alevel &&
00077 (!offset || (ctx->cid_table->ac_index_flag[j] && offset)) &&
00078 (!run || (ctx->cid_table->ac_run_flag [j] && run))) {
00079 assert(!ctx->vlc_codes[index]);
00080 if (alevel) {
00081 ctx->vlc_codes[index] = (ctx->cid_table->ac_codes[j]<<1)|(sign&1);
00082 ctx->vlc_bits [index] = ctx->cid_table->ac_bits[j]+1;
00083 } else {
00084 ctx->vlc_codes[index] = ctx->cid_table->ac_codes[j];
00085 ctx->vlc_bits [index] = ctx->cid_table->ac_bits [j];
00086 }
00087 break;
00088 }
00089 }
00090 assert(!alevel || j < 257);
00091 if (offset) {
00092 ctx->vlc_codes[index] = (ctx->vlc_codes[index]<<ctx->cid_table->index_bits)|offset;
00093 ctx->vlc_bits [index]+= ctx->cid_table->index_bits;
00094 }
00095 }
00096 }
00097 for (i = 0; i < 62; i++) {
00098 int run = ctx->cid_table->run[i];
00099 assert(run < 63);
00100 ctx->run_codes[run] = ctx->cid_table->run_codes[i];
00101 ctx->run_bits [run] = ctx->cid_table->run_bits[i];
00102 }
00103 return 0;
00104 fail:
00105 return -1;
00106 }
00107
00108 static int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias)
00109 {
00110
00111 uint16_t weight_matrix[64] = {1,};
00112 int qscale, i;
00113
00114 CHECKED_ALLOCZ(ctx->qmatrix_l, (ctx->m.avctx->qmax+1) * 64 * sizeof(int));
00115 CHECKED_ALLOCZ(ctx->qmatrix_c, (ctx->m.avctx->qmax+1) * 64 * sizeof(int));
00116 CHECKED_ALLOCZ(ctx->qmatrix_l16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t));
00117 CHECKED_ALLOCZ(ctx->qmatrix_c16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t));
00118
00119 for (i = 1; i < 64; i++) {
00120 int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
00121 weight_matrix[j] = ctx->cid_table->luma_weight[i];
00122 }
00123 ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix,
00124 ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
00125 for (i = 1; i < 64; i++) {
00126 int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
00127 weight_matrix[j] = ctx->cid_table->chroma_weight[i];
00128 }
00129 ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix,
00130 ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
00131 for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
00132 for (i = 0; i < 64; i++) {
00133 ctx->qmatrix_l [qscale] [i] <<= 2; ctx->qmatrix_c [qscale] [i] <<= 2;
00134 ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2;
00135 ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2;
00136 }
00137 }
00138 return 0;
00139 fail:
00140 return -1;
00141 }
00142
00143 static int dnxhd_init_rc(DNXHDEncContext *ctx)
00144 {
00145 CHECKED_ALLOCZ(ctx->mb_rc, 8160*ctx->m.avctx->qmax*sizeof(RCEntry));
00146 if (ctx->m.avctx->mb_decision != FF_MB_DECISION_RD)
00147 CHECKED_ALLOCZ(ctx->mb_cmp, ctx->m.mb_num*sizeof(RCCMPEntry));
00148
00149 ctx->frame_bits = (ctx->cid_table->coding_unit_size - 640 - 4) * 8;
00150 ctx->qscale = 1;
00151 ctx->lambda = 2<<LAMBDA_FRAC_BITS;
00152 return 0;
00153 fail:
00154 return -1;
00155 }
00156
00157 static int dnxhd_encode_init(AVCodecContext *avctx)
00158 {
00159 DNXHDEncContext *ctx = avctx->priv_data;
00160 int i, index;
00161
00162 ctx->cid = ff_dnxhd_find_cid(avctx);
00163 if (!ctx->cid || avctx->pix_fmt != PIX_FMT_YUV422P) {
00164 av_log(avctx, AV_LOG_ERROR, "video parameters incompatible with DNxHD\n");
00165 return -1;
00166 }
00167 av_log(avctx, AV_LOG_DEBUG, "cid %d\n", ctx->cid);
00168
00169 index = ff_dnxhd_get_cid_table(ctx->cid);
00170 ctx->cid_table = &ff_dnxhd_cid_table[index];
00171
00172 ctx->m.avctx = avctx;
00173 ctx->m.mb_intra = 1;
00174 ctx->m.h263_aic = 1;
00175
00176 ctx->get_pixels_8x4_sym = dnxhd_get_pixels_8x4;
00177
00178 dsputil_init(&ctx->m.dsp, avctx);
00179 ff_dct_common_init(&ctx->m);
00180 #if HAVE_MMX
00181 ff_dnxhd_init_mmx(ctx);
00182 #endif
00183 if (!ctx->m.dct_quantize)
00184 ctx->m.dct_quantize = dct_quantize_c;
00185
00186 ctx->m.mb_height = (avctx->height + 15) / 16;
00187 ctx->m.mb_width = (avctx->width + 15) / 16;
00188
00189 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
00190 ctx->interlaced = 1;
00191 ctx->m.mb_height /= 2;
00192 }
00193
00194 ctx->m.mb_num = ctx->m.mb_height * ctx->m.mb_width;
00195
00196 if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
00197 ctx->m.intra_quant_bias = avctx->intra_quant_bias;
00198 if (dnxhd_init_qmat(ctx, ctx->m.intra_quant_bias, 0) < 0)
00199 return -1;
00200
00201 if (dnxhd_init_vlc(ctx) < 0)
00202 return -1;
00203 if (dnxhd_init_rc(ctx) < 0)
00204 return -1;
00205
00206 CHECKED_ALLOCZ(ctx->slice_size, ctx->m.mb_height*sizeof(uint32_t));
00207 CHECKED_ALLOCZ(ctx->mb_bits, ctx->m.mb_num *sizeof(uint16_t));
00208 CHECKED_ALLOCZ(ctx->mb_qscale, ctx->m.mb_num *sizeof(uint8_t));
00209
00210 ctx->frame.key_frame = 1;
00211 ctx->frame.pict_type = FF_I_TYPE;
00212 ctx->m.avctx->coded_frame = &ctx->frame;
00213
00214 if (avctx->thread_count > MAX_THREADS || (avctx->thread_count > ctx->m.mb_height)) {
00215 av_log(avctx, AV_LOG_ERROR, "too many threads\n");
00216 return -1;
00217 }
00218
00219 ctx->thread[0] = ctx;
00220 for (i = 1; i < avctx->thread_count; i++) {
00221 ctx->thread[i] = av_malloc(sizeof(DNXHDEncContext));
00222 memcpy(ctx->thread[i], ctx, sizeof(DNXHDEncContext));
00223 }
00224
00225 for (i = 0; i < avctx->thread_count; i++) {
00226 ctx->thread[i]->m.start_mb_y = (ctx->m.mb_height*(i ) + avctx->thread_count/2) / avctx->thread_count;
00227 ctx->thread[i]->m.end_mb_y = (ctx->m.mb_height*(i+1) + avctx->thread_count/2) / avctx->thread_count;
00228 }
00229
00230 return 0;
00231 fail:
00232 return -1;
00233 }
00234
00235 static int dnxhd_write_header(AVCodecContext *avctx, uint8_t *buf)
00236 {
00237 DNXHDEncContext *ctx = avctx->priv_data;
00238 const uint8_t header_prefix[5] = { 0x00,0x00,0x02,0x80,0x01 };
00239
00240 memcpy(buf, header_prefix, 5);
00241 buf[5] = ctx->interlaced ? ctx->cur_field+2 : 0x01;
00242 buf[6] = 0x80;
00243 buf[7] = 0xa0;
00244 AV_WB16(buf + 0x18, avctx->height);
00245 AV_WB16(buf + 0x1a, avctx->width);
00246 AV_WB16(buf + 0x1d, avctx->height);
00247
00248 buf[0x21] = 0x38;
00249 buf[0x22] = 0x88 + (ctx->frame.interlaced_frame<<2);
00250 AV_WB32(buf + 0x28, ctx->cid);
00251 buf[0x2c] = ctx->interlaced ? 0 : 0x80;
00252
00253 buf[0x5f] = 0x01;
00254
00255 buf[0x167] = 0x02;
00256 AV_WB16(buf + 0x16a, ctx->m.mb_height * 4 + 4);
00257 buf[0x16d] = ctx->m.mb_height;
00258 buf[0x16f] = 0x10;
00259
00260 ctx->msip = buf + 0x170;
00261 return 0;
00262 }
00263
00264 static av_always_inline void dnxhd_encode_dc(DNXHDEncContext *ctx, int diff)
00265 {
00266 int nbits;
00267 if (diff < 0) {
00268 nbits = av_log2_16bit(-2*diff);
00269 diff--;
00270 } else {
00271 nbits = av_log2_16bit(2*diff);
00272 }
00273 put_bits(&ctx->m.pb, ctx->cid_table->dc_bits[nbits] + nbits,
00274 (ctx->cid_table->dc_codes[nbits]<<nbits) + (diff & ((1 << nbits) - 1)));
00275 }
00276
00277 static av_always_inline void dnxhd_encode_block(DNXHDEncContext *ctx, DCTELEM *block, int last_index, int n)
00278 {
00279 int last_non_zero = 0;
00280 int slevel, i, j;
00281
00282 dnxhd_encode_dc(ctx, block[0] - ctx->m.last_dc[n]);
00283 ctx->m.last_dc[n] = block[0];
00284
00285 for (i = 1; i <= last_index; i++) {
00286 j = ctx->m.intra_scantable.permutated[i];
00287 slevel = block[j];
00288 if (slevel) {
00289 int run_level = i - last_non_zero - 1;
00290 int rlevel = (slevel<<1)|!!run_level;
00291 put_bits(&ctx->m.pb, ctx->vlc_bits[rlevel], ctx->vlc_codes[rlevel]);
00292 if (run_level)
00293 put_bits(&ctx->m.pb, ctx->run_bits[run_level], ctx->run_codes[run_level]);
00294 last_non_zero = i;
00295 }
00296 }
00297 put_bits(&ctx->m.pb, ctx->vlc_bits[0], ctx->vlc_codes[0]);
00298 }
00299
00300 static av_always_inline void dnxhd_unquantize_c(DNXHDEncContext *ctx, DCTELEM *block, int n, int qscale, int last_index)
00301 {
00302 const uint8_t *weight_matrix;
00303 int level;
00304 int i;
00305
00306 weight_matrix = (n&2) ? ctx->cid_table->chroma_weight : ctx->cid_table->luma_weight;
00307
00308 for (i = 1; i <= last_index; i++) {
00309 int j = ctx->m.intra_scantable.permutated[i];
00310 level = block[j];
00311 if (level) {
00312 if (level < 0) {
00313 level = (1-2*level) * qscale * weight_matrix[i];
00314 if (weight_matrix[i] != 32)
00315 level += 32;
00316 level >>= 6;
00317 level = -level;
00318 } else {
00319 level = (2*level+1) * qscale * weight_matrix[i];
00320 if (weight_matrix[i] != 32)
00321 level += 32;
00322 level >>= 6;
00323 }
00324 block[j] = level;
00325 }
00326 }
00327 }
00328
00329 static av_always_inline int dnxhd_ssd_block(DCTELEM *qblock, DCTELEM *block)
00330 {
00331 int score = 0;
00332 int i;
00333 for (i = 0; i < 64; i++)
00334 score += (block[i]-qblock[i])*(block[i]-qblock[i]);
00335 return score;
00336 }
00337
00338 static av_always_inline int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, DCTELEM *block, int last_index)
00339 {
00340 int last_non_zero = 0;
00341 int bits = 0;
00342 int i, j, level;
00343 for (i = 1; i <= last_index; i++) {
00344 j = ctx->m.intra_scantable.permutated[i];
00345 level = block[j];
00346 if (level) {
00347 int run_level = i - last_non_zero - 1;
00348 bits += ctx->vlc_bits[(level<<1)|!!run_level]+ctx->run_bits[run_level];
00349 last_non_zero = i;
00350 }
00351 }
00352 return bits;
00353 }
00354
00355 static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
00356 {
00357 const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize) + (mb_x << 4);
00358 const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
00359 const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
00360 DSPContext *dsp = &ctx->m.dsp;
00361
00362 dsp->get_pixels(ctx->blocks[0], ptr_y , ctx->m.linesize);
00363 dsp->get_pixels(ctx->blocks[1], ptr_y + 8, ctx->m.linesize);
00364 dsp->get_pixels(ctx->blocks[2], ptr_u , ctx->m.uvlinesize);
00365 dsp->get_pixels(ctx->blocks[3], ptr_v , ctx->m.uvlinesize);
00366
00367 if (mb_y+1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
00368 if (ctx->interlaced) {
00369 ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset , ctx->m.linesize);
00370 ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
00371 ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset , ctx->m.uvlinesize);
00372 ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset , ctx->m.uvlinesize);
00373 } else {
00374 dsp->clear_block(ctx->blocks[4]); dsp->clear_block(ctx->blocks[5]);
00375 dsp->clear_block(ctx->blocks[6]); dsp->clear_block(ctx->blocks[7]);
00376 }
00377 } else {
00378 dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset , ctx->m.linesize);
00379 dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
00380 dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset , ctx->m.uvlinesize);
00381 dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset , ctx->m.uvlinesize);
00382 }
00383 }
00384
00385 static av_always_inline int dnxhd_switch_matrix(DNXHDEncContext *ctx, int i)
00386 {
00387 if (i&2) {
00388 ctx->m.q_intra_matrix16 = ctx->qmatrix_c16;
00389 ctx->m.q_intra_matrix = ctx->qmatrix_c;
00390 return 1 + (i&1);
00391 } else {
00392 ctx->m.q_intra_matrix16 = ctx->qmatrix_l16;
00393 ctx->m.q_intra_matrix = ctx->qmatrix_l;
00394 return 0;
00395 }
00396 }
00397
00398 static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg)
00399 {
00400 DNXHDEncContext *ctx = *(void**)arg;
00401 int mb_y, mb_x;
00402 int qscale = ctx->thread[0]->qscale;
00403
00404 for (mb_y = ctx->m.start_mb_y; mb_y < ctx->m.end_mb_y; mb_y++) {
00405 ctx->m.last_dc[0] =
00406 ctx->m.last_dc[1] =
00407 ctx->m.last_dc[2] = 1024;
00408
00409 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
00410 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
00411 int ssd = 0;
00412 int ac_bits = 0;
00413 int dc_bits = 0;
00414 int i;
00415
00416 dnxhd_get_blocks(ctx, mb_x, mb_y);
00417
00418 for (i = 0; i < 8; i++) {
00419 DECLARE_ALIGNED_16(DCTELEM, block[64]);
00420 DCTELEM *src_block = ctx->blocks[i];
00421 int overflow, nbits, diff, last_index;
00422 int n = dnxhd_switch_matrix(ctx, i);
00423
00424 memcpy(block, src_block, sizeof(block));
00425 last_index = ctx->m.dct_quantize((MpegEncContext*)ctx, block, i, qscale, &overflow);
00426 ac_bits += dnxhd_calc_ac_bits(ctx, block, last_index);
00427
00428 diff = block[0] - ctx->m.last_dc[n];
00429 if (diff < 0) nbits = av_log2_16bit(-2*diff);
00430 else nbits = av_log2_16bit( 2*diff);
00431 dc_bits += ctx->cid_table->dc_bits[nbits] + nbits;
00432
00433 ctx->m.last_dc[n] = block[0];
00434
00435 if (avctx->mb_decision == FF_MB_DECISION_RD || !RC_VARIANCE) {
00436 dnxhd_unquantize_c(ctx, block, i, qscale, last_index);
00437 ctx->m.dsp.idct(block);
00438 ssd += dnxhd_ssd_block(block, src_block);
00439 }
00440 }
00441 ctx->mb_rc[qscale][mb].ssd = ssd;
00442 ctx->mb_rc[qscale][mb].bits = ac_bits+dc_bits+12+8*ctx->vlc_bits[0];
00443 }
00444 }
00445 return 0;
00446 }
00447
00448 static int dnxhd_encode_thread(AVCodecContext *avctx, void *arg)
00449 {
00450 DNXHDEncContext *ctx = *(void**)arg;
00451 int mb_y, mb_x;
00452
00453 for (mb_y = ctx->m.start_mb_y; mb_y < ctx->m.end_mb_y; mb_y++) {
00454 ctx->m.last_dc[0] =
00455 ctx->m.last_dc[1] =
00456 ctx->m.last_dc[2] = 1024;
00457 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
00458 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
00459 int qscale = ctx->mb_qscale[mb];
00460 int i;
00461
00462 put_bits(&ctx->m.pb, 12, qscale<<1);
00463
00464 dnxhd_get_blocks(ctx, mb_x, mb_y);
00465
00466 for (i = 0; i < 8; i++) {
00467 DCTELEM *block = ctx->blocks[i];
00468 int last_index, overflow;
00469 int n = dnxhd_switch_matrix(ctx, i);
00470 last_index = ctx->m.dct_quantize((MpegEncContext*)ctx, block, i, qscale, &overflow);
00471
00472 dnxhd_encode_block(ctx, block, last_index, n);
00473
00474 }
00475 }
00476 if (put_bits_count(&ctx->m.pb)&31)
00477 put_bits(&ctx->m.pb, 32-(put_bits_count(&ctx->m.pb)&31), 0);
00478 }
00479 flush_put_bits(&ctx->m.pb);
00480 return 0;
00481 }
00482
00483 static void dnxhd_setup_threads_slices(DNXHDEncContext *ctx, uint8_t *buf)
00484 {
00485 int mb_y, mb_x;
00486 int i, offset = 0;
00487 for (i = 0; i < ctx->m.avctx->thread_count; i++) {
00488 int thread_size = 0;
00489 for (mb_y = ctx->thread[i]->m.start_mb_y; mb_y < ctx->thread[i]->m.end_mb_y; mb_y++) {
00490 ctx->slice_size[mb_y] = 0;
00491 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
00492 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
00493 ctx->slice_size[mb_y] += ctx->mb_bits[mb];
00494 }
00495 ctx->slice_size[mb_y] = (ctx->slice_size[mb_y]+31)&~31;
00496 ctx->slice_size[mb_y] >>= 3;
00497 thread_size += ctx->slice_size[mb_y];
00498 }
00499 init_put_bits(&ctx->thread[i]->m.pb, buf + 640 + offset, thread_size);
00500 offset += thread_size;
00501 }
00502 }
00503
00504 static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg)
00505 {
00506 DNXHDEncContext *ctx = *(void**)arg;
00507 int mb_y, mb_x;
00508 for (mb_y = ctx->m.start_mb_y; mb_y < ctx->m.end_mb_y; mb_y++) {
00509 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
00510 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
00511 uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize) + (mb_x<<4);
00512 int sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize);
00513 int varc = (ctx->m.dsp.pix_norm1(pix, ctx->m.linesize) - (((unsigned)(sum*sum))>>8)+128)>>8;
00514 ctx->mb_cmp[mb].value = varc;
00515 ctx->mb_cmp[mb].mb = mb;
00516 }
00517 }
00518 return 0;
00519 }
00520
00521 static int dnxhd_encode_rdo(AVCodecContext *avctx, DNXHDEncContext *ctx)
00522 {
00523 int lambda, up_step, down_step;
00524 int last_lower = INT_MAX, last_higher = 0;
00525 int x, y, q;
00526
00527 for (q = 1; q < avctx->qmax; q++) {
00528 ctx->qscale = q;
00529 avctx->execute(avctx, dnxhd_calc_bits_thread, (void**)&ctx->thread[0], NULL, avctx->thread_count, sizeof(void*));
00530 }
00531 up_step = down_step = 2<<LAMBDA_FRAC_BITS;
00532 lambda = ctx->lambda;
00533
00534 for (;;) {
00535 int bits = 0;
00536 int end = 0;
00537 if (lambda == last_higher) {
00538 lambda++;
00539 end = 1;
00540 }
00541 for (y = 0; y < ctx->m.mb_height; y++) {
00542 for (x = 0; x < ctx->m.mb_width; x++) {
00543 unsigned min = UINT_MAX;
00544 int qscale = 1;
00545 int mb = y*ctx->m.mb_width+x;
00546 for (q = 1; q < avctx->qmax; q++) {
00547 unsigned score = ctx->mb_rc[q][mb].bits*lambda+(ctx->mb_rc[q][mb].ssd<<LAMBDA_FRAC_BITS);
00548 if (score < min) {
00549 min = score;
00550 qscale = q;
00551 }
00552 }
00553 bits += ctx->mb_rc[qscale][mb].bits;
00554 ctx->mb_qscale[mb] = qscale;
00555 ctx->mb_bits[mb] = ctx->mb_rc[qscale][mb].bits;
00556 }
00557 bits = (bits+31)&~31;
00558 if (bits > ctx->frame_bits)
00559 break;
00560 }
00561
00562
00563 if (end) {
00564 if (bits > ctx->frame_bits)
00565 return -1;
00566 break;
00567 }
00568 if (bits < ctx->frame_bits) {
00569 last_lower = FFMIN(lambda, last_lower);
00570 if (last_higher != 0)
00571 lambda = (lambda+last_higher)>>1;
00572 else
00573 lambda -= down_step;
00574 down_step *= 5;
00575 up_step = 1<<LAMBDA_FRAC_BITS;
00576 lambda = FFMAX(1, lambda);
00577 if (lambda == last_lower)
00578 break;
00579 } else {
00580 last_higher = FFMAX(lambda, last_higher);
00581 if (last_lower != INT_MAX)
00582 lambda = (lambda+last_lower)>>1;
00583 else
00584 lambda += up_step;
00585 up_step *= 5;
00586 down_step = 1<<LAMBDA_FRAC_BITS;
00587 }
00588 }
00589
00590 ctx->lambda = lambda;
00591 return 0;
00592 }
00593
00594 static int dnxhd_find_qscale(DNXHDEncContext *ctx)
00595 {
00596 int bits = 0;
00597 int up_step = 1;
00598 int down_step = 1;
00599 int last_higher = 0;
00600 int last_lower = INT_MAX;
00601 int qscale;
00602 int x, y;
00603
00604 qscale = ctx->qscale;
00605 for (;;) {
00606 bits = 0;
00607 ctx->qscale = qscale;
00608
00609 ctx->m.avctx->execute(ctx->m.avctx, dnxhd_calc_bits_thread, (void**)&ctx->thread[0], NULL, ctx->m.avctx->thread_count, sizeof(void*));
00610 for (y = 0; y < ctx->m.mb_height; y++) {
00611 for (x = 0; x < ctx->m.mb_width; x++)
00612 bits += ctx->mb_rc[qscale][y*ctx->m.mb_width+x].bits;
00613 bits = (bits+31)&~31;
00614 if (bits > ctx->frame_bits)
00615 break;
00616 }
00617
00618
00619 if (bits < ctx->frame_bits) {
00620 if (qscale == 1)
00621 return 1;
00622 if (last_higher == qscale - 1) {
00623 qscale = last_higher;
00624 break;
00625 }
00626 last_lower = FFMIN(qscale, last_lower);
00627 if (last_higher != 0)
00628 qscale = (qscale+last_higher)>>1;
00629 else
00630 qscale -= down_step++;
00631 if (qscale < 1)
00632 qscale = 1;
00633 up_step = 1;
00634 } else {
00635 if (last_lower == qscale + 1)
00636 break;
00637 last_higher = FFMAX(qscale, last_higher);
00638 if (last_lower != INT_MAX)
00639 qscale = (qscale+last_lower)>>1;
00640 else
00641 qscale += up_step++;
00642 down_step = 1;
00643 if (qscale >= ctx->m.avctx->qmax)
00644 return -1;
00645 }
00646 }
00647
00648 ctx->qscale = qscale;
00649 return 0;
00650 }
00651
00652 static int dnxhd_rc_cmp(const void *a, const void *b)
00653 {
00654 return ((const RCCMPEntry *)b)->value - ((const RCCMPEntry *)a)->value;
00655 }
00656
00657 static int dnxhd_encode_fast(AVCodecContext *avctx, DNXHDEncContext *ctx)
00658 {
00659 int max_bits = 0;
00660 int ret, x, y;
00661 if ((ret = dnxhd_find_qscale(ctx)) < 0)
00662 return -1;
00663 for (y = 0; y < ctx->m.mb_height; y++) {
00664 for (x = 0; x < ctx->m.mb_width; x++) {
00665 int mb = y*ctx->m.mb_width+x;
00666 int delta_bits;
00667 ctx->mb_qscale[mb] = ctx->qscale;
00668 ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale][mb].bits;
00669 max_bits += ctx->mb_rc[ctx->qscale][mb].bits;
00670 if (!RC_VARIANCE) {
00671 delta_bits = ctx->mb_rc[ctx->qscale][mb].bits-ctx->mb_rc[ctx->qscale+1][mb].bits;
00672 ctx->mb_cmp[mb].mb = mb;
00673 ctx->mb_cmp[mb].value = delta_bits ?
00674 ((ctx->mb_rc[ctx->qscale][mb].ssd-ctx->mb_rc[ctx->qscale+1][mb].ssd)*100)/delta_bits
00675 : INT_MIN;
00676 }
00677 }
00678 max_bits += 31;
00679 }
00680 if (!ret) {
00681 if (RC_VARIANCE)
00682 avctx->execute(avctx, dnxhd_mb_var_thread, (void**)&ctx->thread[0], NULL, avctx->thread_count, sizeof(void*));
00683 qsort(ctx->mb_cmp, ctx->m.mb_num, sizeof(RCEntry), dnxhd_rc_cmp);
00684 for (x = 0; x < ctx->m.mb_num && max_bits > ctx->frame_bits; x++) {
00685 int mb = ctx->mb_cmp[x].mb;
00686 max_bits -= ctx->mb_rc[ctx->qscale][mb].bits - ctx->mb_rc[ctx->qscale+1][mb].bits;
00687 ctx->mb_qscale[mb] = ctx->qscale+1;
00688 ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale+1][mb].bits;
00689 }
00690 }
00691 return 0;
00692 }
00693
00694 static void dnxhd_load_picture(DNXHDEncContext *ctx, const AVFrame *frame)
00695 {
00696 int i;
00697
00698 for (i = 0; i < 3; i++) {
00699 ctx->frame.data[i] = frame->data[i];
00700 ctx->frame.linesize[i] = frame->linesize[i];
00701 }
00702
00703 for (i = 0; i < ctx->m.avctx->thread_count; i++) {
00704 ctx->thread[i]->m.linesize = ctx->frame.linesize[0]<<ctx->interlaced;
00705 ctx->thread[i]->m.uvlinesize = ctx->frame.linesize[1]<<ctx->interlaced;
00706 ctx->thread[i]->dct_y_offset = ctx->m.linesize *8;
00707 ctx->thread[i]->dct_uv_offset = ctx->m.uvlinesize*8;
00708 }
00709
00710 ctx->frame.interlaced_frame = frame->interlaced_frame;
00711 ctx->cur_field = frame->interlaced_frame && !frame->top_field_first;
00712 }
00713
00714 static int dnxhd_encode_picture(AVCodecContext *avctx, unsigned char *buf, int buf_size, const void *data)
00715 {
00716 DNXHDEncContext *ctx = avctx->priv_data;
00717 int first_field = 1;
00718 int offset, i, ret;
00719
00720 if (buf_size < ctx->cid_table->frame_size) {
00721 av_log(avctx, AV_LOG_ERROR, "output buffer is too small to compress picture\n");
00722 return -1;
00723 }
00724
00725 dnxhd_load_picture(ctx, data);
00726
00727 encode_coding_unit:
00728 for (i = 0; i < 3; i++) {
00729 ctx->src[i] = ctx->frame.data[i];
00730 if (ctx->interlaced && ctx->cur_field)
00731 ctx->src[i] += ctx->frame.linesize[i];
00732 }
00733
00734 dnxhd_write_header(avctx, buf);
00735
00736 if (avctx->mb_decision == FF_MB_DECISION_RD)
00737 ret = dnxhd_encode_rdo(avctx, ctx);
00738 else
00739 ret = dnxhd_encode_fast(avctx, ctx);
00740 if (ret < 0) {
00741 av_log(avctx, AV_LOG_ERROR, "picture could not fit ratecontrol constraints\n");
00742 return -1;
00743 }
00744
00745 dnxhd_setup_threads_slices(ctx, buf);
00746
00747 offset = 0;
00748 for (i = 0; i < ctx->m.mb_height; i++) {
00749 AV_WB32(ctx->msip + i * 4, offset);
00750 offset += ctx->slice_size[i];
00751 assert(!(ctx->slice_size[i] & 3));
00752 }
00753
00754 avctx->execute(avctx, dnxhd_encode_thread, (void**)&ctx->thread[0], NULL, avctx->thread_count, sizeof(void*));
00755
00756 AV_WB32(buf + ctx->cid_table->coding_unit_size - 4, 0x600DC0DE);
00757
00758 if (ctx->interlaced && first_field) {
00759 first_field = 0;
00760 ctx->cur_field ^= 1;
00761 buf += ctx->cid_table->coding_unit_size;
00762 buf_size -= ctx->cid_table->coding_unit_size;
00763 goto encode_coding_unit;
00764 }
00765
00766 ctx->frame.quality = ctx->qscale*FF_QP2LAMBDA;
00767
00768 return ctx->cid_table->frame_size;
00769 }
00770
00771 static int dnxhd_encode_end(AVCodecContext *avctx)
00772 {
00773 DNXHDEncContext *ctx = avctx->priv_data;
00774 int max_level = 1<<(ctx->cid_table->bit_depth+2);
00775 int i;
00776
00777 av_free(ctx->vlc_codes-max_level*2);
00778 av_free(ctx->vlc_bits -max_level*2);
00779 av_freep(&ctx->run_codes);
00780 av_freep(&ctx->run_bits);
00781
00782 av_freep(&ctx->mb_bits);
00783 av_freep(&ctx->mb_qscale);
00784 av_freep(&ctx->mb_rc);
00785 av_freep(&ctx->mb_cmp);
00786 av_freep(&ctx->slice_size);
00787
00788 av_freep(&ctx->qmatrix_c);
00789 av_freep(&ctx->qmatrix_l);
00790 av_freep(&ctx->qmatrix_c16);
00791 av_freep(&ctx->qmatrix_l16);
00792
00793 for (i = 1; i < avctx->thread_count; i++)
00794 av_freep(&ctx->thread[i]);
00795
00796 return 0;
00797 }
00798
00799 AVCodec dnxhd_encoder = {
00800 "dnxhd",
00801 CODEC_TYPE_VIDEO,
00802 CODEC_ID_DNXHD,
00803 sizeof(DNXHDEncContext),
00804 dnxhd_encode_init,
00805 dnxhd_encode_picture,
00806 dnxhd_encode_end,
00807 .pix_fmts = (enum PixelFormat[]){PIX_FMT_YUV422P, PIX_FMT_NONE},
00808 .long_name = NULL_IF_CONFIG_SMALL("VC3/DNxHD"),
00809 };