00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 #undef FUNC
00023 #undef PIXEL_SHIFT
00024 
00025 #if SIMPLE
00026 #   define FUNC(n) AV_JOIN(n ## _simple_, BITS)
00027 #   define PIXEL_SHIFT (BITS >> 4)
00028 #else
00029 #   define FUNC(n) n ## _complex
00030 #   define PIXEL_SHIFT h->pixel_shift
00031 #endif
00032 
00033 #undef  CHROMA_IDC
00034 #define CHROMA_IDC 1
00035 #include "h264_mc_template.c"
00036 
00037 #undef  CHROMA_IDC
00038 #define CHROMA_IDC 2
00039 #include "h264_mc_template.c"
00040 
00041 static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
00042 {
00043     MpegEncContext *const s = &h->s;
00044     const int mb_x    = s->mb_x;
00045     const int mb_y    = s->mb_y;
00046     const int mb_xy   = h->mb_xy;
00047     const int mb_type = s->current_picture.f.mb_type[mb_xy];
00048     uint8_t *dest_y, *dest_cb, *dest_cr;
00049     int linesize, uvlinesize ;
00050     int i, j;
00051     int *block_offset = &h->block_offset[0];
00052     const int transform_bypass = !SIMPLE && (s->qscale == 0 && h->sps.transform_bypass);
00053     
00054     const int is_h264 = !CONFIG_SVQ3_DECODER || SIMPLE || s->codec_id == AV_CODEC_ID_H264;
00055     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
00056     const int block_h   = 16 >> s->chroma_y_shift;
00057     const int chroma422 = CHROMA422;
00058 
00059     dest_y  = s->current_picture.f.data[0] + ((mb_x << PIXEL_SHIFT)     + mb_y * s->linesize)  * 16;
00060     dest_cb = s->current_picture.f.data[1] +  (mb_x << PIXEL_SHIFT) * 8 + mb_y * s->uvlinesize * block_h;
00061     dest_cr = s->current_picture.f.data[2] +  (mb_x << PIXEL_SHIFT) * 8 + mb_y * s->uvlinesize * block_h;
00062 
00063     s->dsp.prefetch(dest_y  + (s->mb_x & 3) * 4 * s->linesize   + (64 << PIXEL_SHIFT), s->linesize,       4);
00064     s->dsp.prefetch(dest_cb + (s->mb_x & 7)     * s->uvlinesize + (64 << PIXEL_SHIFT), dest_cr - dest_cb, 2);
00065 
00066     h->list_counts[mb_xy] = h->list_count;
00067 
00068     if (!SIMPLE && MB_FIELD) {
00069         linesize     = h->mb_linesize = s->linesize * 2;
00070         uvlinesize   = h->mb_uvlinesize = s->uvlinesize * 2;
00071         block_offset = &h->block_offset[48];
00072         if (mb_y & 1) { 
00073             dest_y  -= s->linesize * 15;
00074             dest_cb -= s->uvlinesize * (block_h - 1);
00075             dest_cr -= s->uvlinesize * (block_h - 1);
00076         }
00077         if (FRAME_MBAFF) {
00078             int list;
00079             for (list = 0; list < h->list_count; list++) {
00080                 if (!USES_LIST(mb_type, list))
00081                     continue;
00082                 if (IS_16X16(mb_type)) {
00083                     int8_t *ref = &h->ref_cache[list][scan8[0]];
00084                     fill_rectangle(ref, 4, 4, 8, (16 + *ref) ^ (s->mb_y & 1), 1);
00085                 } else {
00086                     for (i = 0; i < 16; i += 4) {
00087                         int ref = h->ref_cache[list][scan8[i]];
00088                         if (ref >= 0)
00089                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2,
00090                                            8, (16 + ref) ^ (s->mb_y & 1), 1);
00091                     }
00092                 }
00093             }
00094         }
00095     } else {
00096         linesize   = h->mb_linesize   = s->linesize;
00097         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
00098         
00099     }
00100 
00101     if (!SIMPLE && IS_INTRA_PCM(mb_type)) {
00102         const int bit_depth = h->sps.bit_depth_luma;
00103         if (PIXEL_SHIFT) {
00104             int j;
00105             GetBitContext gb;
00106             init_get_bits(&gb, (uint8_t *)h->mb,
00107                           ff_h264_mb_sizes[h->sps.chroma_format_idc] * bit_depth);
00108 
00109             for (i = 0; i < 16; i++) {
00110                 uint16_t *tmp_y = (uint16_t *)(dest_y + i * linesize);
00111                 for (j = 0; j < 16; j++)
00112                     tmp_y[j] = get_bits(&gb, bit_depth);
00113             }
00114             if (SIMPLE || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
00115                 if (!h->sps.chroma_format_idc) {
00116                     for (i = 0; i < block_h; i++) {
00117                         uint16_t *tmp_cb = (uint16_t *)(dest_cb + i * uvlinesize);
00118                         uint16_t *tmp_cr = (uint16_t *)(dest_cr + i * uvlinesize);
00119                         for (j = 0; j < 8; j++) {
00120                             tmp_cb[j] = tmp_cr[j] = 1 << (bit_depth - 1);
00121                         }
00122                     }
00123                 } else {
00124                     for (i = 0; i < block_h; i++) {
00125                         uint16_t *tmp_cb = (uint16_t *)(dest_cb + i * uvlinesize);
00126                         for (j = 0; j < 8; j++)
00127                             tmp_cb[j] = get_bits(&gb, bit_depth);
00128                     }
00129                     for (i = 0; i < block_h; i++) {
00130                         uint16_t *tmp_cr = (uint16_t *)(dest_cr + i * uvlinesize);
00131                         for (j = 0; j < 8; j++)
00132                             tmp_cr[j] = get_bits(&gb, bit_depth);
00133                     }
00134                 }
00135             }
00136         } else {
00137             for (i = 0; i < 16; i++)
00138                 memcpy(dest_y + i * linesize, (uint8_t *)h->mb + i * 16, 16);
00139             if (SIMPLE || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
00140                 if (!h->sps.chroma_format_idc) {
00141                     for (i = 0; i < 8; i++) {
00142                         memset(dest_cb + i*uvlinesize, 1 << (bit_depth - 1), 8);
00143                         memset(dest_cr + i*uvlinesize, 1 << (bit_depth - 1), 8);
00144                     }
00145                 } else {
00146                     uint8_t *src_cb = (uint8_t *)h->mb + 256;
00147                     uint8_t *src_cr = (uint8_t *)h->mb + 256 + block_h * 8;
00148                     for (i = 0; i < block_h; i++) {
00149                         memcpy(dest_cb + i * uvlinesize, src_cb + i * 8, 8);
00150                         memcpy(dest_cr + i * uvlinesize, src_cr + i * 8, 8);
00151                     }
00152                 }
00153             }
00154         }
00155     } else {
00156         if (IS_INTRA(mb_type)) {
00157             if (h->deblocking_filter)
00158                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize,
00159                                uvlinesize, 1, 0, SIMPLE, PIXEL_SHIFT);
00160 
00161             if (SIMPLE || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
00162                 if (CHROMA) {
00163                 h->hpc.pred8x8[h->chroma_pred_mode](dest_cb, uvlinesize);
00164                 h->hpc.pred8x8[h->chroma_pred_mode](dest_cr, uvlinesize);
00165                 }
00166             }
00167 
00168             hl_decode_mb_predict_luma(h, mb_type, is_h264, SIMPLE,
00169                                       transform_bypass, PIXEL_SHIFT,
00170                                       block_offset, linesize, dest_y, 0);
00171 
00172             if (h->deblocking_filter)
00173                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize,
00174                                uvlinesize, 0, 0, SIMPLE, PIXEL_SHIFT);
00175         } else if (is_h264) {
00176             if (chroma422) {
00177                 FUNC(hl_motion_422)(h, dest_y, dest_cb, dest_cr,
00178                               s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
00179                               s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
00180                               h->h264dsp.weight_h264_pixels_tab,
00181                               h->h264dsp.biweight_h264_pixels_tab);
00182             } else {
00183                 FUNC(hl_motion_420)(h, dest_y, dest_cb, dest_cr,
00184                               s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
00185                               s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
00186                               h->h264dsp.weight_h264_pixels_tab,
00187                               h->h264dsp.biweight_h264_pixels_tab);
00188             }
00189         }
00190 
00191         hl_decode_mb_idct_luma(h, mb_type, is_h264, SIMPLE, transform_bypass,
00192                                PIXEL_SHIFT, block_offset, linesize, dest_y, 0);
00193 
00194         if ((SIMPLE || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) &&
00195             (h->cbp & 0x30)) {
00196             uint8_t *dest[2] = { dest_cb, dest_cr };
00197             if (transform_bypass) {
00198                 if (IS_INTRA(mb_type) && h->sps.profile_idc == 244 &&
00199                     (h->chroma_pred_mode == VERT_PRED8x8 ||
00200                      h->chroma_pred_mode == HOR_PRED8x8)) {
00201                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0],
00202                                                             block_offset + 16,
00203                                                             h->mb + (16 * 16 * 1 << PIXEL_SHIFT),
00204                                                             uvlinesize);
00205                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1],
00206                                                             block_offset + 32,
00207                                                             h->mb + (16 * 16 * 2 << PIXEL_SHIFT),
00208                                                             uvlinesize);
00209                 } else {
00210                     idct_add = s->dsp.add_pixels4;
00211                     for (j = 1; j < 3; j++) {
00212                         for (i = j * 16; i < j * 16 + 4; i++)
00213                             if (h->non_zero_count_cache[scan8[i]] ||
00214                                 dctcoef_get(h->mb, PIXEL_SHIFT, i * 16))
00215                                 idct_add(dest[j - 1] + block_offset[i],
00216                                          h->mb + (i * 16 << PIXEL_SHIFT),
00217                                          uvlinesize);
00218                         if (chroma422) {
00219                             for (i = j * 16 + 4; i < j * 16 + 8; i++)
00220                                 if (h->non_zero_count_cache[scan8[i + 4]] ||
00221                                     dctcoef_get(h->mb, PIXEL_SHIFT, i * 16))
00222                                     idct_add(dest[j - 1] + block_offset[i + 4],
00223                                              h->mb + (i * 16 << PIXEL_SHIFT),
00224                                              uvlinesize);
00225                         }
00226                     }
00227                 }
00228             } else {
00229                 if (is_h264) {
00230                     int qp[2];
00231                     if (chroma422) {
00232                         qp[0] = h->chroma_qp[0] + 3;
00233                         qp[1] = h->chroma_qp[1] + 3;
00234                     } else {
00235                         qp[0] = h->chroma_qp[0];
00236                         qp[1] = h->chroma_qp[1];
00237                     }
00238                     if (h->non_zero_count_cache[scan8[CHROMA_DC_BLOCK_INDEX + 0]])
00239                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16 * 16 * 1 << PIXEL_SHIFT),
00240                                                                h->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][qp[0]][0]);
00241                     if (h->non_zero_count_cache[scan8[CHROMA_DC_BLOCK_INDEX + 1]])
00242                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16 * 16 * 2 << PIXEL_SHIFT),
00243                                                                h->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][qp[1]][0]);
00244                     h->h264dsp.h264_idct_add8(dest, block_offset,
00245                                               h->mb, uvlinesize,
00246                                               h->non_zero_count_cache);
00247                 } else if (CONFIG_SVQ3_DECODER) {
00248                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16 * 16 * 1,
00249                                                            h->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][h->chroma_qp[0]][0]);
00250                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16 * 16 * 2,
00251                                                            h->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][h->chroma_qp[1]][0]);
00252                     for (j = 1; j < 3; j++) {
00253                         for (i = j * 16; i < j * 16 + 4; i++)
00254                             if (h->non_zero_count_cache[scan8[i]] || h->mb[i * 16]) {
00255                                 uint8_t *const ptr = dest[j - 1] + block_offset[i];
00256                                 ff_svq3_add_idct_c(ptr, h->mb + i * 16,
00257                                                    uvlinesize,
00258                                                    ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
00259                             }
00260                     }
00261                 }
00262             }
00263         }
00264     }
00265     if (h->cbp || IS_INTRA(mb_type)) {
00266         s->dsp.clear_blocks(h->mb);
00267         s->dsp.clear_blocks(h->mb + (24 * 16 << PIXEL_SHIFT));
00268     }
00269 }
00270 
00271 #if !SIMPLE || BITS == 8
00272 
00273 #undef  CHROMA_IDC
00274 #define CHROMA_IDC 3
00275 #include "h264_mc_template.c"
00276 
00277 static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h)
00278 {
00279     MpegEncContext *const s = &h->s;
00280     const int mb_x    = s->mb_x;
00281     const int mb_y    = s->mb_y;
00282     const int mb_xy   = h->mb_xy;
00283     const int mb_type = s->current_picture.f.mb_type[mb_xy];
00284     uint8_t *dest[3];
00285     int linesize;
00286     int i, j, p;
00287     int *block_offset = &h->block_offset[0];
00288     const int transform_bypass = !SIMPLE && (s->qscale == 0 && h->sps.transform_bypass);
00289     const int plane_count      = (SIMPLE || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) ? 3 : 1;
00290 
00291     for (p = 0; p < plane_count; p++) {
00292         dest[p] = s->current_picture.f.data[p] +
00293                   ((mb_x << PIXEL_SHIFT) + mb_y * s->linesize) * 16;
00294         s->dsp.prefetch(dest[p] + (s->mb_x & 3) * 4 * s->linesize + (64 << PIXEL_SHIFT),
00295                         s->linesize, 4);
00296     }
00297 
00298     h->list_counts[mb_xy] = h->list_count;
00299 
00300     if (!SIMPLE && MB_FIELD) {
00301         linesize     = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
00302         block_offset = &h->block_offset[48];
00303         if (mb_y & 1) 
00304             for (p = 0; p < 3; p++)
00305                 dest[p] -= s->linesize * 15;
00306         if (FRAME_MBAFF) {
00307             int list;
00308             for (list = 0; list < h->list_count; list++) {
00309                 if (!USES_LIST(mb_type, list))
00310                     continue;
00311                 if (IS_16X16(mb_type)) {
00312                     int8_t *ref = &h->ref_cache[list][scan8[0]];
00313                     fill_rectangle(ref, 4, 4, 8, (16 + *ref) ^ (s->mb_y & 1), 1);
00314                 } else {
00315                     for (i = 0; i < 16; i += 4) {
00316                         int ref = h->ref_cache[list][scan8[i]];
00317                         if (ref >= 0)
00318                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2,
00319                                            8, (16 + ref) ^ (s->mb_y & 1), 1);
00320                     }
00321                 }
00322             }
00323         }
00324     } else {
00325         linesize = h->mb_linesize = h->mb_uvlinesize = s->linesize;
00326     }
00327 
00328     if (!SIMPLE && IS_INTRA_PCM(mb_type)) {
00329         if (PIXEL_SHIFT) {
00330             const int bit_depth = h->sps.bit_depth_luma;
00331             GetBitContext gb;
00332             init_get_bits(&gb, (uint8_t *)h->mb, 768 * bit_depth);
00333 
00334             for (p = 0; p < plane_count; p++)
00335                 for (i = 0; i < 16; i++) {
00336                     uint16_t *tmp = (uint16_t *)(dest[p] + i * linesize);
00337                     for (j = 0; j < 16; j++)
00338                         tmp[j] = get_bits(&gb, bit_depth);
00339                 }
00340         } else {
00341             for (p = 0; p < plane_count; p++)
00342                 for (i = 0; i < 16; i++)
00343                     memcpy(dest[p] + i * linesize,
00344                            (uint8_t *)h->mb + p * 256 + i * 16, 16);
00345         }
00346     } else {
00347         if (IS_INTRA(mb_type)) {
00348             if (h->deblocking_filter)
00349                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize,
00350                                linesize, 1, 1, SIMPLE, PIXEL_SHIFT);
00351 
00352             for (p = 0; p < plane_count; p++)
00353                 hl_decode_mb_predict_luma(h, mb_type, 1, SIMPLE,
00354                                           transform_bypass, PIXEL_SHIFT,
00355                                           block_offset, linesize, dest[p], p);
00356 
00357             if (h->deblocking_filter)
00358                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize,
00359                                linesize, 0, 1, SIMPLE, PIXEL_SHIFT);
00360         } else {
00361             FUNC(hl_motion_444)(h, dest[0], dest[1], dest[2],
00362                       s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
00363                       s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
00364                       h->h264dsp.weight_h264_pixels_tab,
00365                       h->h264dsp.biweight_h264_pixels_tab);
00366         }
00367 
00368         for (p = 0; p < plane_count; p++)
00369             hl_decode_mb_idct_luma(h, mb_type, 1, SIMPLE, transform_bypass,
00370                                    PIXEL_SHIFT, block_offset, linesize,
00371                                    dest[p], p);
00372     }
00373     if (h->cbp || IS_INTRA(mb_type)) {
00374         s->dsp.clear_blocks(h->mb);
00375         s->dsp.clear_blocks(h->mb + (24 * 16 << PIXEL_SHIFT));
00376     }
00377 }
00378 
00379 #endif