• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • Examples
  • File List
  • Globals

libavcodec/h264.c

Go to the documentation of this file.
00001 /*
00002  * H.26L/H.264/AVC/JVT/14496-10/... decoder
00003  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include "libavutil/imgutils.h"
00029 #include "internal.h"
00030 #include "dsputil.h"
00031 #include "avcodec.h"
00032 #include "mpegvideo.h"
00033 #include "h264.h"
00034 #include "h264data.h"
00035 #include "h264_mvpred.h"
00036 #include "golomb.h"
00037 #include "mathops.h"
00038 #include "rectangle.h"
00039 #include "thread.h"
00040 #include "vdpau_internal.h"
00041 #include "libavutil/avassert.h"
00042 
00043 #include "cabac.h"
00044 
00045 //#undef NDEBUG
00046 #include <assert.h>
00047 
00048 static const uint8_t rem6[QP_MAX_NUM+1]={
00049 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
00050 };
00051 
00052 static const uint8_t div6[QP_MAX_NUM+1]={
00053 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,10,10,10,10,
00054 };
00055 
00056 static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
00057     PIX_FMT_DXVA2_VLD,
00058     PIX_FMT_VAAPI_VLD,
00059     PIX_FMT_YUVJ420P,
00060     PIX_FMT_NONE
00061 };
00062 
00063 void ff_h264_write_back_intra_pred_mode(H264Context *h){
00064     int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
00065 
00066     AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
00067     mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
00068     mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
00069     mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
00070 }
00071 
00075 int ff_h264_check_intra4x4_pred_mode(H264Context *h){
00076     MpegEncContext * const s = &h->s;
00077     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
00078     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
00079     int i;
00080 
00081     if(!(h->top_samples_available&0x8000)){
00082         for(i=0; i<4; i++){
00083             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
00084             if(status<0){
00085                 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00086                 return -1;
00087             } else if(status){
00088                 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
00089             }
00090         }
00091     }
00092 
00093     if((h->left_samples_available&0x8888)!=0x8888){
00094         static const int mask[4]={0x8000,0x2000,0x80,0x20};
00095         for(i=0; i<4; i++){
00096             if(!(h->left_samples_available&mask[i])){
00097                 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
00098                 if(status<0){
00099                     av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00100                     return -1;
00101                 } else if(status){
00102                     h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
00103                 }
00104             }
00105         }
00106     }
00107 
00108     return 0;
00109 } //FIXME cleanup like check_intra_pred_mode
00110 
00114 int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma){
00115     MpegEncContext * const s = &h->s;
00116     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
00117     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
00118 
00119     if(mode > 6U) {
00120         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
00121         return -1;
00122     }
00123 
00124     if(!(h->top_samples_available&0x8000)){
00125         mode= top[ mode ];
00126         if(mode<0){
00127             av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00128             return -1;
00129         }
00130     }
00131 
00132     if((h->left_samples_available&0x8080) != 0x8080){
00133         mode= left[ mode ];
00134         if(is_chroma && (h->left_samples_available&0x8080)){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
00135             mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
00136         }
00137         if(mode<0){
00138             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00139             return -1;
00140         }
00141     }
00142 
00143     return mode;
00144 }
00145 
00146 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
00147     int i, si, di;
00148     uint8_t *dst;
00149     int bufidx;
00150 
00151 //    src[0]&0x80;                //forbidden bit
00152     h->nal_ref_idc= src[0]>>5;
00153     h->nal_unit_type= src[0]&0x1F;
00154 
00155     src++; length--;
00156 
00157 #if HAVE_FAST_UNALIGNED
00158 # if HAVE_FAST_64BIT
00159 #   define RS 7
00160     for(i=0; i+1<length; i+=9){
00161         if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
00162 # else
00163 #   define RS 3
00164     for(i=0; i+1<length; i+=5){
00165         if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
00166 # endif
00167             continue;
00168         if(i>0 && !src[i]) i--;
00169         while(src[i]) i++;
00170 #else
00171 #   define RS 0
00172     for(i=0; i+1<length; i+=2){
00173         if(src[i]) continue;
00174         if(i>0 && src[i-1]==0) i--;
00175 #endif
00176         if(i+2<length && src[i+1]==0 && src[i+2]<=3){
00177             if(src[i+2]!=3){
00178                 /* startcode, so we must be past the end */
00179                 length=i;
00180             }
00181             break;
00182         }
00183         i-= RS;
00184     }
00185 
00186     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
00187     si=h->rbsp_buffer_size[bufidx];
00188     av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE+MAX_MBPAIR_SIZE);
00189     dst= h->rbsp_buffer[bufidx];
00190     if(si != h->rbsp_buffer_size[bufidx])
00191         memset(dst + length, 0, FF_INPUT_BUFFER_PADDING_SIZE+MAX_MBPAIR_SIZE);
00192 
00193     if (dst == NULL){
00194         return NULL;
00195     }
00196 
00197     if(i>=length-1){ //no escaped 0
00198         *dst_length= length;
00199         *consumed= length+1; //+1 for the header
00200         if(h->s.avctx->flags2 & CODEC_FLAG2_FAST){
00201             return src;
00202         }else{
00203             memcpy(dst, src, length);
00204             return dst;
00205         }
00206     }
00207 
00208 //printf("decoding esc\n");
00209     memcpy(dst, src, i);
00210     si=di=i;
00211     while(si+2<length){
00212         //remove escapes (very rare 1:2^22)
00213         if(src[si+2]>3){
00214             dst[di++]= src[si++];
00215             dst[di++]= src[si++];
00216         }else if(src[si]==0 && src[si+1]==0){
00217             if(src[si+2]==3){ //escape
00218                 dst[di++]= 0;
00219                 dst[di++]= 0;
00220                 si+=3;
00221                 continue;
00222             }else //next start code
00223                 goto nsc;
00224         }
00225 
00226         dst[di++]= src[si++];
00227     }
00228     while(si<length)
00229         dst[di++]= src[si++];
00230 nsc:
00231 
00232     memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
00233 
00234     *dst_length= di;
00235     *consumed= si + 1;//+1 for the header
00236 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
00237     return dst;
00238 }
00239 
00244 static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
00245     int v= *src;
00246     int r;
00247 
00248     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
00249 
00250     for(r=1; r<9; r++){
00251         if(v&1) return r;
00252         v>>=1;
00253     }
00254     return 0;
00255 }
00256 
00257 static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height,
00258                                  int y_offset, int list){
00259     int raw_my= h->mv_cache[list][ scan8[n] ][1];
00260     int filter_height= (raw_my&3) ? 2 : 0;
00261     int full_my= (raw_my>>2) + y_offset;
00262     int top = full_my - filter_height, bottom = full_my + height + filter_height;
00263 
00264     return FFMAX(abs(top), bottom);
00265 }
00266 
00267 static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height,
00268                                int y_offset, int list0, int list1, int *nrefs){
00269     MpegEncContext * const s = &h->s;
00270     int my;
00271 
00272     y_offset += 16*(s->mb_y >> MB_FIELD);
00273 
00274     if(list0){
00275         int ref_n = h->ref_cache[0][ scan8[n] ];
00276         Picture *ref= &h->ref_list[0][ref_n];
00277 
00278         // Error resilience puts the current picture in the ref list.
00279         // Don't try to wait on these as it will cause a deadlock.
00280         // Fields can wait on each other, though.
00281         if(ref->thread_opaque != s->current_picture.thread_opaque ||
00282            (ref->reference&3) != s->picture_structure) {
00283             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
00284             if (refs[0][ref_n] < 0) nrefs[0] += 1;
00285             refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
00286         }
00287     }
00288 
00289     if(list1){
00290         int ref_n = h->ref_cache[1][ scan8[n] ];
00291         Picture *ref= &h->ref_list[1][ref_n];
00292 
00293         if(ref->thread_opaque != s->current_picture.thread_opaque ||
00294            (ref->reference&3) != s->picture_structure) {
00295             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
00296             if (refs[1][ref_n] < 0) nrefs[1] += 1;
00297             refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
00298         }
00299     }
00300 }
00301 
00307 static void await_references(H264Context *h){
00308     MpegEncContext * const s = &h->s;
00309     const int mb_xy= h->mb_xy;
00310     const int mb_type= s->current_picture.mb_type[mb_xy];
00311     int refs[2][48];
00312     int nrefs[2] = {0};
00313     int ref, list;
00314 
00315     memset(refs, -1, sizeof(refs));
00316 
00317     if(IS_16X16(mb_type)){
00318         get_lowest_part_y(h, refs, 0, 16, 0,
00319                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00320     }else if(IS_16X8(mb_type)){
00321         get_lowest_part_y(h, refs, 0, 8, 0,
00322                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00323         get_lowest_part_y(h, refs, 8, 8, 8,
00324                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00325     }else if(IS_8X16(mb_type)){
00326         get_lowest_part_y(h, refs, 0, 16, 0,
00327                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00328         get_lowest_part_y(h, refs, 4, 16, 0,
00329                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00330     }else{
00331         int i;
00332 
00333         assert(IS_8X8(mb_type));
00334 
00335         for(i=0; i<4; i++){
00336             const int sub_mb_type= h->sub_mb_type[i];
00337             const int n= 4*i;
00338             int y_offset= (i&2)<<2;
00339 
00340             if(IS_SUB_8X8(sub_mb_type)){
00341                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00342                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00343             }else if(IS_SUB_8X4(sub_mb_type)){
00344                 get_lowest_part_y(h, refs, n  , 4, y_offset,
00345                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00346                 get_lowest_part_y(h, refs, n+2, 4, y_offset+4,
00347                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00348             }else if(IS_SUB_4X8(sub_mb_type)){
00349                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00350                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00351                 get_lowest_part_y(h, refs, n+1, 8, y_offset,
00352                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00353             }else{
00354                 int j;
00355                 assert(IS_SUB_4X4(sub_mb_type));
00356                 for(j=0; j<4; j++){
00357                     int sub_y_offset= y_offset + 2*(j&2);
00358                     get_lowest_part_y(h, refs, n+j, 4, sub_y_offset,
00359                               IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00360                 }
00361             }
00362         }
00363     }
00364 
00365     for(list=h->list_count-1; list>=0; list--){
00366         for(ref=0; ref<48 && nrefs[list]; ref++){
00367             int row = refs[list][ref];
00368             if(row >= 0){
00369                 Picture *ref_pic = &h->ref_list[list][ref];
00370                 int ref_field = ref_pic->reference - 1;
00371                 int ref_field_picture = ref_pic->field_picture;
00372                 int pic_height = 16*s->mb_height >> ref_field_picture;
00373 
00374                 row <<= MB_MBAFF;
00375                 nrefs[list]--;
00376 
00377                 if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields
00378                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1);
00379                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1)           , pic_height-1), 0);
00380                 }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame
00381                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field    , pic_height-1), 0);
00382                 }else if(FIELD_PICTURE){
00383                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field);
00384                 }else{
00385                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0);
00386                 }
00387             }
00388         }
00389     }
00390 }
00391 
00392 #if 0
00393 
00397 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
00398 //    const int qmul= dequant_coeff[qp][0];
00399     int i;
00400     int temp[16]; //FIXME check if this is a good idea
00401     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
00402     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
00403 
00404     for(i=0; i<4; i++){
00405         const int offset= y_offset[i];
00406         const int z0= block[offset+stride*0] + block[offset+stride*4];
00407         const int z1= block[offset+stride*0] - block[offset+stride*4];
00408         const int z2= block[offset+stride*1] - block[offset+stride*5];
00409         const int z3= block[offset+stride*1] + block[offset+stride*5];
00410 
00411         temp[4*i+0]= z0+z3;
00412         temp[4*i+1]= z1+z2;
00413         temp[4*i+2]= z1-z2;
00414         temp[4*i+3]= z0-z3;
00415     }
00416 
00417     for(i=0; i<4; i++){
00418         const int offset= x_offset[i];
00419         const int z0= temp[4*0+i] + temp[4*2+i];
00420         const int z1= temp[4*0+i] - temp[4*2+i];
00421         const int z2= temp[4*1+i] - temp[4*3+i];
00422         const int z3= temp[4*1+i] + temp[4*3+i];
00423 
00424         block[stride*0 +offset]= (z0 + z3)>>1;
00425         block[stride*2 +offset]= (z1 + z2)>>1;
00426         block[stride*8 +offset]= (z1 - z2)>>1;
00427         block[stride*10+offset]= (z0 - z3)>>1;
00428     }
00429 }
00430 #endif
00431 
00432 #undef xStride
00433 #undef stride
00434 
00435 #if 0
00436 static void chroma_dc_dct_c(DCTELEM *block){
00437     const int stride= 16*2;
00438     const int xStride= 16;
00439     int a,b,c,d,e;
00440 
00441     a= block[stride*0 + xStride*0];
00442     b= block[stride*0 + xStride*1];
00443     c= block[stride*1 + xStride*0];
00444     d= block[stride*1 + xStride*1];
00445 
00446     e= a-b;
00447     a= a+b;
00448     b= c-d;
00449     c= c+d;
00450 
00451     block[stride*0 + xStride*0]= (a+c);
00452     block[stride*0 + xStride*1]= (e+b);
00453     block[stride*1 + xStride*0]= (a-c);
00454     block[stride*1 + xStride*1]= (e-b);
00455 }
00456 #endif
00457 
00458 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
00459                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00460                            int src_x_offset, int src_y_offset,
00461                            qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
00462                            int pixel_shift, int chroma444){
00463     MpegEncContext * const s = &h->s;
00464     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
00465     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
00466     const int luma_xy= (mx&3) + ((my&3)<<2);
00467     int offset = ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
00468     uint8_t * src_y = pic->data[0] + offset;
00469     uint8_t * src_cb, * src_cr;
00470     int extra_width= h->emu_edge_width;
00471     int extra_height= h->emu_edge_height;
00472     int emu=0;
00473     const int full_mx= mx>>2;
00474     const int full_my= my>>2;
00475     const int pic_width  = 16*s->mb_width;
00476     const int pic_height = 16*s->mb_height >> MB_FIELD;
00477 
00478     if(mx&7) extra_width -= 3;
00479     if(my&7) extra_height -= 3;
00480 
00481     if(   full_mx < 0-extra_width
00482        || full_my < 0-extra_height
00483        || full_mx + 16/*FIXME*/ > pic_width + extra_width
00484        || full_my + 16/*FIXME*/ > pic_height + extra_height){
00485         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00486             src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00487         emu=1;
00488     }
00489 
00490     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
00491     if(!square){
00492         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
00493     }
00494 
00495     if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
00496 
00497     if(chroma444){
00498         src_cb = pic->data[1] + offset;
00499         if(emu){
00500             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00501                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00502             src_cb= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00503         }
00504         qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); //FIXME try variable height perhaps?
00505         if(!square){
00506             qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
00507         }
00508 
00509         src_cr = pic->data[2] + offset;
00510         if(emu){
00511             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00512                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00513             src_cr= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00514         }
00515         qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); //FIXME try variable height perhaps?
00516         if(!square){
00517             qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
00518         }
00519         return;
00520     }
00521 
00522     if(MB_FIELD){
00523         // chroma offset when predicting from a field of opposite parity
00524         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
00525         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
00526     }
00527     src_cb= pic->data[1] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
00528     src_cr= pic->data[2] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;
00529 
00530     if(emu){
00531         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
00532             src_cb= s->edge_emu_buffer;
00533     }
00534     chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
00535 
00536     if(emu){
00537         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
00538             src_cr= s->edge_emu_buffer;
00539     }
00540     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
00541 }
00542 
00543 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
00544                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00545                            int x_offset, int y_offset,
00546                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00547                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00548                            int list0, int list1, int pixel_shift, int chroma444){
00549     MpegEncContext * const s = &h->s;
00550     qpel_mc_func *qpix_op=  qpix_put;
00551     h264_chroma_mc_func chroma_op= chroma_put;
00552 
00553     dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00554     if(chroma444){
00555         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00556         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00557     }else{
00558         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00559         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00560     }
00561     x_offset += 8*s->mb_x;
00562     y_offset += 8*(s->mb_y >> MB_FIELD);
00563 
00564     if(list0){
00565         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
00566         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
00567                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00568                            qpix_op, chroma_op, pixel_shift, chroma444);
00569 
00570         qpix_op=  qpix_avg;
00571         chroma_op= chroma_avg;
00572     }
00573 
00574     if(list1){
00575         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
00576         mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
00577                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00578                            qpix_op, chroma_op, pixel_shift, chroma444);
00579     }
00580 }
00581 
00582 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
00583                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00584                            int x_offset, int y_offset,
00585                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00586                            h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
00587                            h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
00588                            int list0, int list1, int pixel_shift, int chroma444){
00589     MpegEncContext * const s = &h->s;
00590 
00591     dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00592     if(chroma444){
00593         chroma_weight_avg = luma_weight_avg;
00594         chroma_weight_op = luma_weight_op;
00595         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00596         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00597     }else{
00598         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00599         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00600     }
00601     x_offset += 8*s->mb_x;
00602     y_offset += 8*(s->mb_y >> MB_FIELD);
00603 
00604     if(list0 && list1){
00605         /* don't optimize for luma-only case, since B-frames usually
00606          * use implicit weights => chroma too. */
00607         uint8_t *tmp_cb = s->obmc_scratchpad;
00608         uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift);
00609         uint8_t *tmp_y  = s->obmc_scratchpad + 16*h->mb_uvlinesize;
00610         int refn0 = h->ref_cache[0][ scan8[n] ];
00611         int refn1 = h->ref_cache[1][ scan8[n] ];
00612 
00613         mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
00614                     dest_y, dest_cb, dest_cr,
00615                     x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
00616         mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
00617                     tmp_y, tmp_cb, tmp_cr,
00618                     x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);
00619 
00620         if(h->use_weight == 2){
00621             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
00622             int weight1 = 64 - weight0;
00623             luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
00624             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
00625             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
00626         }else{
00627             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
00628                             h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
00629                             h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
00630             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00631                             h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
00632                             h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
00633             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00634                             h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
00635                             h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
00636         }
00637     }else{
00638         int list = list1 ? 1 : 0;
00639         int refn = h->ref_cache[list][ scan8[n] ];
00640         Picture *ref= &h->ref_list[list][refn];
00641         mc_dir_part(h, ref, n, square, chroma_height, delta, list,
00642                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
00643                     qpix_put, chroma_put, pixel_shift, chroma444);
00644 
00645         luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
00646                        h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
00647         if(h->use_weight_chroma){
00648             chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00649                              h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
00650             chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
00651                              h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
00652         }
00653     }
00654 }
00655 
00656 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
00657                            uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00658                            int x_offset, int y_offset,
00659                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00660                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00661                            h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00662                            int list0, int list1, int pixel_shift, int chroma444){
00663     if((h->use_weight==2 && list0 && list1
00664         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
00665        || h->use_weight==1)
00666         mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
00667                          x_offset, y_offset, qpix_put, chroma_put,
00668                          weight_op[0], weight_op[3], weight_avg[0],
00669                          weight_avg[3], list0, list1, pixel_shift, chroma444);
00670     else
00671         mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
00672                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
00673                     chroma_avg, list0, list1, pixel_shift, chroma444);
00674 }
00675 
00676 static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma444){
00677     /* fetch pixels for estimated mv 4 macroblocks ahead
00678      * optimized for 64byte cache lines */
00679     MpegEncContext * const s = &h->s;
00680     const int refn = h->ref_cache[list][scan8[0]];
00681     if(refn >= 0){
00682         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
00683         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
00684         uint8_t **src= h->ref_list[list][refn].data;
00685         int off= ((mx+64)<<h->pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize;
00686         s->dsp.prefetch(src[0]+off, s->linesize, 4);
00687         if(chroma444){
00688             s->dsp.prefetch(src[1]+off, s->linesize, 4);
00689             s->dsp.prefetch(src[2]+off, s->linesize, 4);
00690         }else{
00691             off= (((mx>>1)+64)<<pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize;
00692             s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
00693         }
00694     }
00695 }
00696 
00697 static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00698                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00699                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00700                       h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00701                       int pixel_shift, int chroma444){
00702     MpegEncContext * const s = &h->s;
00703     const int mb_xy= h->mb_xy;
00704     const int mb_type= s->current_picture.mb_type[mb_xy];
00705 
00706     assert(IS_INTER(mb_type));
00707 
00708     if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
00709         await_references(h);
00710     prefetch_motion(h, 0, pixel_shift, chroma444);
00711 
00712     if(IS_16X16(mb_type)){
00713         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
00714                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
00715                 weight_op, weight_avg,
00716                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00717                 pixel_shift, chroma444);
00718     }else if(IS_16X8(mb_type)){
00719         mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
00720                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00721                 &weight_op[1], &weight_avg[1],
00722                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00723                 pixel_shift, chroma444);
00724         mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
00725                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00726                 &weight_op[1], &weight_avg[1],
00727                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00728                 pixel_shift, chroma444);
00729     }else if(IS_8X16(mb_type)){
00730         mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
00731                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00732                 &weight_op[2], &weight_avg[2],
00733                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00734                 pixel_shift, chroma444);
00735         mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
00736                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00737                 &weight_op[2], &weight_avg[2],
00738                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00739                 pixel_shift, chroma444);
00740     }else{
00741         int i;
00742 
00743         assert(IS_8X8(mb_type));
00744 
00745         for(i=0; i<4; i++){
00746             const int sub_mb_type= h->sub_mb_type[i];
00747             const int n= 4*i;
00748             int x_offset= (i&1)<<2;
00749             int y_offset= (i&2)<<1;
00750 
00751             if(IS_SUB_8X8(sub_mb_type)){
00752                 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00753                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00754                     &weight_op[3], &weight_avg[3],
00755                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00756                     pixel_shift, chroma444);
00757             }else if(IS_SUB_8X4(sub_mb_type)){
00758                 mc_part(h, n  , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00759                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00760                     &weight_op[4], &weight_avg[4],
00761                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00762                     pixel_shift, chroma444);
00763                 mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
00764                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00765                     &weight_op[4], &weight_avg[4],
00766                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00767                     pixel_shift, chroma444);
00768             }else if(IS_SUB_4X8(sub_mb_type)){
00769                 mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00770                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00771                     &weight_op[5], &weight_avg[5],
00772                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00773                     pixel_shift, chroma444);
00774                 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
00775                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00776                     &weight_op[5], &weight_avg[5],
00777                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00778                     pixel_shift, chroma444);
00779             }else{
00780                 int j;
00781                 assert(IS_SUB_4X4(sub_mb_type));
00782                 for(j=0; j<4; j++){
00783                     int sub_x_offset= x_offset + 2*(j&1);
00784                     int sub_y_offset= y_offset +   (j&2);
00785                     mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
00786                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00787                         &weight_op[6], &weight_avg[6],
00788                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00789                         pixel_shift, chroma444);
00790                 }
00791             }
00792         }
00793     }
00794 
00795     prefetch_motion(h, 1, pixel_shift, chroma444);
00796 }
00797 
00798 #define hl_motion_fn(sh, bits) \
00799 static av_always_inline void hl_motion_ ## bits(H264Context *h, \
00800                                        uint8_t *dest_y, \
00801                                        uint8_t *dest_cb, uint8_t *dest_cr, \
00802                                        qpel_mc_func (*qpix_put)[16], \
00803                                        h264_chroma_mc_func (*chroma_put), \
00804                                        qpel_mc_func (*qpix_avg)[16], \
00805                                        h264_chroma_mc_func (*chroma_avg), \
00806                                        h264_weight_func *weight_op, \
00807                                        h264_biweight_func *weight_avg, \
00808                                        int chroma444) \
00809 { \
00810     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
00811               qpix_avg, chroma_avg, weight_op, weight_avg, sh, chroma444); \
00812 }
00813 hl_motion_fn(0, 8);
00814 hl_motion_fn(1, 16);
00815 
00816 static void free_tables(H264Context *h, int free_rbsp){
00817     int i;
00818     H264Context *hx;
00819 
00820     av_freep(&h->intra4x4_pred_mode);
00821     av_freep(&h->chroma_pred_mode_table);
00822     av_freep(&h->cbp_table);
00823     av_freep(&h->mvd_table[0]);
00824     av_freep(&h->mvd_table[1]);
00825     av_freep(&h->direct_table);
00826     av_freep(&h->non_zero_count);
00827     av_freep(&h->slice_table_base);
00828     h->slice_table= NULL;
00829     av_freep(&h->list_counts);
00830 
00831     av_freep(&h->mb2b_xy);
00832     av_freep(&h->mb2br_xy);
00833 
00834     for(i = 0; i < MAX_THREADS; i++) {
00835         hx = h->thread_context[i];
00836         if(!hx) continue;
00837         av_freep(&hx->top_borders[1]);
00838         av_freep(&hx->top_borders[0]);
00839         av_freep(&hx->s.obmc_scratchpad);
00840         if (free_rbsp){
00841             av_freep(&hx->rbsp_buffer[1]);
00842             av_freep(&hx->rbsp_buffer[0]);
00843             hx->rbsp_buffer_size[0] = 0;
00844             hx->rbsp_buffer_size[1] = 0;
00845         }
00846         if (i) av_freep(&h->thread_context[i]);
00847     }
00848 }
00849 
00850 static void init_dequant8_coeff_table(H264Context *h){
00851     int i,j,q,x;
00852     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00853 
00854     for(i=0; i<6; i++ ){
00855         h->dequant8_coeff[i] = h->dequant8_buffer[i];
00856         for(j=0; j<i; j++){
00857             if(!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], 64*sizeof(uint8_t))){
00858                 h->dequant8_coeff[i] = h->dequant8_buffer[j];
00859                 break;
00860             }
00861         }
00862         if(j<i)
00863             continue;
00864 
00865         for(q=0; q<max_qp+1; q++){
00866             int shift = div6[q];
00867             int idx = rem6[q];
00868             for(x=0; x<64; x++)
00869                 h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] =
00870                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
00871                     h->pps.scaling_matrix8[i][x]) << shift;
00872         }
00873     }
00874 }
00875 
00876 static void init_dequant4_coeff_table(H264Context *h){
00877     int i,j,q,x;
00878     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00879     for(i=0; i<6; i++ ){
00880         h->dequant4_coeff[i] = h->dequant4_buffer[i];
00881         for(j=0; j<i; j++){
00882             if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
00883                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
00884                 break;
00885             }
00886         }
00887         if(j<i)
00888             continue;
00889 
00890         for(q=0; q<max_qp+1; q++){
00891             int shift = div6[q] + 2;
00892             int idx = rem6[q];
00893             for(x=0; x<16; x++)
00894                 h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] =
00895                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
00896                     h->pps.scaling_matrix4[i][x]) << shift;
00897         }
00898     }
00899 }
00900 
00901 static void init_dequant_tables(H264Context *h){
00902     int i,x;
00903     init_dequant4_coeff_table(h);
00904     if(h->pps.transform_8x8_mode)
00905         init_dequant8_coeff_table(h);
00906     if(h->sps.transform_bypass){
00907         for(i=0; i<6; i++)
00908             for(x=0; x<16; x++)
00909                 h->dequant4_coeff[i][0][x] = 1<<6;
00910         if(h->pps.transform_8x8_mode)
00911             for(i=0; i<6; i++)
00912                 for(x=0; x<64; x++)
00913                     h->dequant8_coeff[i][0][x] = 1<<6;
00914     }
00915 }
00916 
00917 
00918 int ff_h264_alloc_tables(H264Context *h){
00919     MpegEncContext * const s = &h->s;
00920     const int big_mb_num= s->mb_stride * (s->mb_height+1);
00921     const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
00922     int x,y;
00923 
00924     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8  * sizeof(uint8_t), fail)
00925 
00926     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 48 * sizeof(uint8_t), fail)
00927     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
00928     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
00929 
00930     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
00931     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
00932     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
00933     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
00934     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
00935 
00936     memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
00937     h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
00938 
00939     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy  , big_mb_num * sizeof(uint32_t), fail);
00940     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
00941     for(y=0; y<s->mb_height; y++){
00942         for(x=0; x<s->mb_width; x++){
00943             const int mb_xy= x + y*s->mb_stride;
00944             const int b_xy = 4*x + 4*y*h->b_stride;
00945 
00946             h->mb2b_xy [mb_xy]= b_xy;
00947             h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
00948         }
00949     }
00950 
00951     s->obmc_scratchpad = NULL;
00952 
00953     if(!h->dequant4_coeff[0])
00954         init_dequant_tables(h);
00955 
00956     return 0;
00957 fail:
00958     free_tables(h, 1);
00959     return -1;
00960 }
00961 
00965 static void clone_tables(H264Context *dst, H264Context *src, int i){
00966     MpegEncContext * const s = &src->s;
00967     dst->intra4x4_pred_mode       = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
00968     dst->non_zero_count           = src->non_zero_count;
00969     dst->slice_table              = src->slice_table;
00970     dst->cbp_table                = src->cbp_table;
00971     dst->mb2b_xy                  = src->mb2b_xy;
00972     dst->mb2br_xy                 = src->mb2br_xy;
00973     dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
00974     dst->mvd_table[0]             = src->mvd_table[0] + i*8*2*s->mb_stride;
00975     dst->mvd_table[1]             = src->mvd_table[1] + i*8*2*s->mb_stride;
00976     dst->direct_table             = src->direct_table;
00977     dst->list_counts              = src->list_counts;
00978 
00979     dst->s.obmc_scratchpad = NULL;
00980     ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma);
00981 }
00982 
00987 static int context_init(H264Context *h){
00988     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
00989     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
00990 
00991     h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
00992     h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
00993 
00994     return 0;
00995 fail:
00996     return -1; // free_tables will clean up for us
00997 }
00998 
00999 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
01000 
01001 static av_cold void common_init(H264Context *h){
01002     MpegEncContext * const s = &h->s;
01003 
01004     s->width = s->avctx->width;
01005     s->height = s->avctx->height;
01006     s->codec_id= s->avctx->codec->id;
01007 
01008     s->avctx->bits_per_raw_sample = 8;
01009 
01010     ff_h264dsp_init(&h->h264dsp,
01011                     s->avctx->bits_per_raw_sample);
01012     ff_h264_pred_init(&h->hpc, s->codec_id,
01013                       s->avctx->bits_per_raw_sample);
01014 
01015     h->dequant_coeff_pps= -1;
01016     s->unrestricted_mv=1;
01017     s->decode=1; //FIXME
01018 
01019     dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
01020 
01021     memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
01022     memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
01023 }
01024 
01025 int ff_h264_decode_extradata(H264Context *h, const uint8_t *buf, int size)
01026 {
01027     AVCodecContext *avctx = h->s.avctx;
01028 
01029     if(!buf || size <= 0)
01030         return -1;
01031 
01032     if(buf[0] == 1){
01033         int i, cnt, nalsize;
01034         const unsigned char *p = buf;
01035 
01036         h->is_avc = 1;
01037 
01038         if(size < 7) {
01039             av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
01040             return -1;
01041         }
01042         /* sps and pps in the avcC always have length coded with 2 bytes,
01043            so put a fake nal_length_size = 2 while parsing them */
01044         h->nal_length_size = 2;
01045         // Decode sps from avcC
01046         cnt = *(p+5) & 0x1f; // Number of sps
01047         p += 6;
01048         for (i = 0; i < cnt; i++) {
01049             nalsize = AV_RB16(p) + 2;
01050             if(nalsize > size - (p-buf))
01051                 return -1;
01052             if(decode_nal_units(h, p, nalsize) < 0) {
01053                 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
01054                 return -1;
01055             }
01056             p += nalsize;
01057         }
01058         // Decode pps from avcC
01059         cnt = *(p++); // Number of pps
01060         for (i = 0; i < cnt; i++) {
01061             nalsize = AV_RB16(p) + 2;
01062             if(nalsize > size - (p-buf))
01063                 return -1;
01064             if (decode_nal_units(h, p, nalsize) < 0) {
01065                 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
01066                 return -1;
01067             }
01068             p += nalsize;
01069         }
01070         // Now store right nal length size, that will be use to parse all other nals
01071         h->nal_length_size = (buf[4] & 0x03) + 1;
01072     } else {
01073         h->is_avc = 0;
01074         if(decode_nal_units(h, buf, size) < 0)
01075             return -1;
01076     }
01077     return 0;
01078 }
01079 
01080 av_cold int ff_h264_decode_init(AVCodecContext *avctx){
01081     H264Context *h= avctx->priv_data;
01082     MpegEncContext * const s = &h->s;
01083 
01084     MPV_decode_defaults(s);
01085 
01086     s->avctx = avctx;
01087     common_init(h);
01088 
01089     s->out_format = FMT_H264;
01090     s->workaround_bugs= avctx->workaround_bugs;
01091 
01092     // set defaults
01093 //    s->decode_mb= ff_h263_decode_mb;
01094     s->quarter_sample = 1;
01095     if(!avctx->has_b_frames)
01096     s->low_delay= 1;
01097 
01098     avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
01099 
01100     ff_h264_decode_init_vlc();
01101 
01102     h->pixel_shift = 0;
01103     h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
01104 
01105     h->thread_context[0] = h;
01106     h->outputed_poc = h->next_outputed_poc = INT_MIN;
01107     h->prev_poc_msb= 1<<16;
01108     h->x264_build = -1;
01109     ff_h264_reset_sei(h);
01110     if(avctx->codec_id == CODEC_ID_H264){
01111         if(avctx->ticks_per_frame == 1){
01112             s->avctx->time_base.den *=2;
01113         }
01114         avctx->ticks_per_frame = 2;
01115     }
01116 
01117     if(avctx->extradata_size > 0 && avctx->extradata &&
01118         ff_h264_decode_extradata(h, avctx->extradata, avctx->extradata_size))
01119         return -1;
01120 
01121     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01122         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01123         s->low_delay = 0;
01124     }
01125 
01126     return 0;
01127 }
01128 
01129 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))
01130 static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base)
01131 {
01132     int i;
01133 
01134     for (i=0; i<count; i++){
01135         assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
01136                 IN_RANGE(from[i], old_base->picture, sizeof(Picture) * old_base->picture_count) ||
01137                 !from[i]));
01138         to[i] = REBASE_PICTURE(from[i], new_base, old_base);
01139     }
01140 }
01141 
01142 static void copy_parameter_set(void **to, void **from, int count, int size)
01143 {
01144     int i;
01145 
01146     for (i=0; i<count; i++){
01147         if (to[i] && !from[i]) av_freep(&to[i]);
01148         else if (from[i] && !to[i]) to[i] = av_malloc(size);
01149 
01150         if (from[i]) memcpy(to[i], from[i], size);
01151     }
01152 }
01153 
01154 static int decode_init_thread_copy(AVCodecContext *avctx){
01155     H264Context *h= avctx->priv_data;
01156 
01157     if (!avctx->is_copy) return 0;
01158     memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01159     memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01160 
01161     return 0;
01162 }
01163 
01164 #define copy_fields(to, from, start_field, end_field) memcpy(&to->start_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field)
01165 static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src){
01166     H264Context *h= dst->priv_data, *h1= src->priv_data;
01167     MpegEncContext * const s = &h->s, * const s1 = &h1->s;
01168     int inited = s->context_initialized, err;
01169     int i;
01170 
01171     if(dst == src || !s1->context_initialized) return 0;
01172 
01173     err = ff_mpeg_update_thread_context(dst, src);
01174     if(err) return err;
01175 
01176     //FIXME handle width/height changing
01177     if(!inited){
01178         for(i = 0; i < MAX_SPS_COUNT; i++)
01179             av_freep(h->sps_buffers + i);
01180 
01181         for(i = 0; i < MAX_PPS_COUNT; i++)
01182             av_freep(h->pps_buffers + i);
01183 
01184         memcpy(&h->s + 1, &h1->s + 1, sizeof(H264Context) - sizeof(MpegEncContext)); //copy all fields after MpegEnc
01185         memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01186         memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01187         if (ff_h264_alloc_tables(h) < 0) {
01188             av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n");
01189             return AVERROR(ENOMEM);
01190         }
01191         context_init(h);
01192 
01193         for(i=0; i<2; i++){
01194             h->rbsp_buffer[i] = NULL;
01195             h->rbsp_buffer_size[i] = 0;
01196         }
01197 
01198         h->thread_context[0] = h;
01199 
01200         // frame_start may not be called for the next thread (if it's decoding a bottom field)
01201         // so this has to be allocated here
01202         h->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01203 
01204         s->dsp.clear_blocks(h->mb);
01205         s->dsp.clear_blocks(h->mb+(24*16<<h->pixel_shift));
01206     }
01207 
01208     //extradata/NAL handling
01209     h->is_avc          = h1->is_avc;
01210 
01211     //SPS/PPS
01212     copy_parameter_set((void**)h->sps_buffers, (void**)h1->sps_buffers, MAX_SPS_COUNT, sizeof(SPS));
01213     h->sps             = h1->sps;
01214     copy_parameter_set((void**)h->pps_buffers, (void**)h1->pps_buffers, MAX_PPS_COUNT, sizeof(PPS));
01215     h->pps             = h1->pps;
01216 
01217     //Dequantization matrices
01218     //FIXME these are big - can they be only copied when PPS changes?
01219     copy_fields(h, h1, dequant4_buffer, dequant4_coeff);
01220 
01221     for(i=0; i<6; i++)
01222         h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
01223 
01224     for(i=0; i<6; i++)
01225         h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
01226 
01227     h->dequant_coeff_pps = h1->dequant_coeff_pps;
01228 
01229     //POC timing
01230     copy_fields(h, h1, poc_lsb, redundant_pic_count);
01231 
01232     //reference lists
01233     copy_fields(h, h1, ref_count, list_count);
01234     copy_fields(h, h1, ref_list,  intra_gb);
01235     copy_fields(h, h1, short_ref, cabac_init_idc);
01236 
01237     copy_picture_range(h->short_ref,   h1->short_ref,   32, s, s1);
01238     copy_picture_range(h->long_ref,    h1->long_ref,    32, s, s1);
01239     copy_picture_range(h->delayed_pic, h1->delayed_pic, MAX_DELAYED_PIC_COUNT+2, s, s1);
01240 
01241     h->last_slice_type = h1->last_slice_type;
01242 
01243     if(!s->current_picture_ptr) return 0;
01244 
01245     if(!s->dropable) {
01246         ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
01247         h->prev_poc_msb     = h->poc_msb;
01248         h->prev_poc_lsb     = h->poc_lsb;
01249     }
01250     h->prev_frame_num_offset= h->frame_num_offset;
01251     h->prev_frame_num       = h->frame_num;
01252     h->outputed_poc         = h->next_outputed_poc;
01253 
01254     return 0;
01255 }
01256 
01257 int ff_h264_frame_start(H264Context *h){
01258     MpegEncContext * const s = &h->s;
01259     int i;
01260     const int pixel_shift = h->pixel_shift;
01261     int thread_count = (s->avctx->active_thread_type & FF_THREAD_SLICE) ? s->avctx->thread_count : 1;
01262 
01263     if(MPV_frame_start(s, s->avctx) < 0)
01264         return -1;
01265     ff_er_frame_start(s);
01266     /*
01267      * MPV_frame_start uses pict_type to derive key_frame.
01268      * This is incorrect for H.264; IDR markings must be used.
01269      * Zero here; IDR markings per slice in frame or fields are ORed in later.
01270      * See decode_nal_units().
01271      */
01272     s->current_picture_ptr->key_frame= 0;
01273     s->current_picture_ptr->mmco_reset= 0;
01274 
01275     assert(s->linesize && s->uvlinesize);
01276 
01277     for(i=0; i<16; i++){
01278         h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
01279         h->block_offset[48+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
01280     }
01281     for(i=0; i<16; i++){
01282         h->block_offset[16+i]=
01283         h->block_offset[32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01284         h->block_offset[48+16+i]=
01285         h->block_offset[48+32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01286     }
01287 
01288     /* can't be in alloc_tables because linesize isn't known there.
01289      * FIXME: redo bipred weight to not require extra buffer? */
01290     for(i = 0; i < thread_count; i++)
01291         if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
01292             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01293 
01294     /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
01295     memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
01296 
01297 //    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
01298 
01299     // We mark the current picture as non-reference after allocating it, so
01300     // that if we break out due to an error it can be released automatically
01301     // in the next MPV_frame_start().
01302     // SVQ3 as well as most other codecs have only last/next/current and thus
01303     // get released even with set reference, besides SVQ3 and others do not
01304     // mark frames as reference later "naturally".
01305     if(s->codec_id != CODEC_ID_SVQ3)
01306         s->current_picture_ptr->reference= 0;
01307 
01308     s->current_picture_ptr->field_poc[0]=
01309     s->current_picture_ptr->field_poc[1]= INT_MAX;
01310 
01311     h->next_output_pic = NULL;
01312 
01313     assert(s->current_picture_ptr->long_ref==0);
01314 
01315     return 0;
01316 }
01317 
01326 static void decode_postinit(H264Context *h, int setup_finished){
01327     MpegEncContext * const s = &h->s;
01328     Picture *out = s->current_picture_ptr;
01329     Picture *cur = s->current_picture_ptr;
01330     int i, pics, out_of_order, out_idx;
01331 
01332     s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
01333     s->current_picture_ptr->pict_type= s->pict_type;
01334 
01335     if (h->next_output_pic) return;
01336 
01337     if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
01338         //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here.
01339         //If we have one field per packet, we can. The check in decode_nal_units() is not good enough
01340         //to find this yet, so we assume the worst for now.
01341         //if (setup_finished)
01342         //    ff_thread_finish_setup(s->avctx);
01343         return;
01344     }
01345 
01346     cur->interlaced_frame = 0;
01347     cur->repeat_pict = 0;
01348 
01349     /* Signal interlacing information externally. */
01350     /* Prioritize picture timing SEI information over used decoding process if it exists. */
01351 
01352     if(h->sps.pic_struct_present_flag){
01353         switch (h->sei_pic_struct)
01354         {
01355         case SEI_PIC_STRUCT_FRAME:
01356             break;
01357         case SEI_PIC_STRUCT_TOP_FIELD:
01358         case SEI_PIC_STRUCT_BOTTOM_FIELD:
01359             cur->interlaced_frame = 1;
01360             break;
01361         case SEI_PIC_STRUCT_TOP_BOTTOM:
01362         case SEI_PIC_STRUCT_BOTTOM_TOP:
01363             if (FIELD_OR_MBAFF_PICTURE)
01364                 cur->interlaced_frame = 1;
01365             else
01366                 // try to flag soft telecine progressive
01367                 cur->interlaced_frame = h->prev_interlaced_frame;
01368             break;
01369         case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
01370         case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
01371             // Signal the possibility of telecined film externally (pic_struct 5,6)
01372             // From these hints, let the applications decide if they apply deinterlacing.
01373             cur->repeat_pict = 1;
01374             break;
01375         case SEI_PIC_STRUCT_FRAME_DOUBLING:
01376             // Force progressive here, as doubling interlaced frame is a bad idea.
01377             cur->repeat_pict = 2;
01378             break;
01379         case SEI_PIC_STRUCT_FRAME_TRIPLING:
01380             cur->repeat_pict = 4;
01381             break;
01382         }
01383 
01384         if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
01385             cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
01386     }else{
01387         /* Derive interlacing flag from used decoding process. */
01388         cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
01389     }
01390     h->prev_interlaced_frame = cur->interlaced_frame;
01391 
01392     if (cur->field_poc[0] != cur->field_poc[1]){
01393         /* Derive top_field_first from field pocs. */
01394         cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
01395     }else{
01396         if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
01397             /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
01398             if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
01399               || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
01400                 cur->top_field_first = 1;
01401             else
01402                 cur->top_field_first = 0;
01403         }else{
01404             /* Most likely progressive */
01405             cur->top_field_first = 0;
01406         }
01407     }
01408 
01409     //FIXME do something with unavailable reference frames
01410 
01411     /* Sort B-frames into display order */
01412 
01413     if(h->sps.bitstream_restriction_flag
01414        && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01415         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01416         s->low_delay = 0;
01417     }
01418 
01419     if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
01420        && !h->sps.bitstream_restriction_flag){
01421         s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
01422         s->low_delay= 0;
01423     }
01424 
01425     pics = 0;
01426     while(h->delayed_pic[pics]) pics++;
01427 
01428     av_assert0(pics <= MAX_DELAYED_PIC_COUNT);
01429 
01430     h->delayed_pic[pics++] = cur;
01431     if(cur->reference == 0)
01432         cur->reference = DELAYED_PIC_REF;
01433 
01434     out = h->delayed_pic[0];
01435     out_idx = 0;
01436     for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
01437         if(h->delayed_pic[i]->poc < out->poc){
01438             out = h->delayed_pic[i];
01439             out_idx = i;
01440         }
01441     if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
01442         h->next_outputed_poc= INT_MIN;
01443     out_of_order = out->poc < h->next_outputed_poc;
01444 
01445     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
01446         { }
01447     else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
01448        || (s->low_delay &&
01449         ((h->next_outputed_poc != INT_MIN && out->poc > h->next_outputed_poc + 2)
01450          || cur->pict_type == AV_PICTURE_TYPE_B)))
01451     {
01452         s->low_delay = 0;
01453         s->avctx->has_b_frames++;
01454     }
01455 
01456     if(out_of_order || pics > s->avctx->has_b_frames){
01457         out->reference &= ~DELAYED_PIC_REF;
01458         out->owner2 = s; // for frame threading, the owner must be the second field's thread
01459                          // or else the first thread can release the picture and reuse it unsafely
01460         for(i=out_idx; h->delayed_pic[i]; i++)
01461             h->delayed_pic[i] = h->delayed_pic[i+1];
01462     }
01463     if(!out_of_order && pics > s->avctx->has_b_frames){
01464         h->next_output_pic = out;
01465         if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
01466             h->next_outputed_poc = INT_MIN;
01467         } else
01468             h->next_outputed_poc = out->poc;
01469     }else{
01470         av_log(s->avctx, AV_LOG_DEBUG, "no picture\n");
01471     }
01472 
01473     if (setup_finished)
01474         ff_thread_finish_setup(s->avctx);
01475 }
01476 
01477 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
01478     MpegEncContext * const s = &h->s;
01479     uint8_t *top_border;
01480     int top_idx = 1;
01481     const int pixel_shift = h->pixel_shift;
01482 
01483     src_y  -=   linesize;
01484     src_cb -= uvlinesize;
01485     src_cr -= uvlinesize;
01486 
01487     if(!simple && FRAME_MBAFF){
01488         if(s->mb_y&1){
01489             if(!MB_MBAFF){
01490                 top_border = h->top_borders[0][s->mb_x];
01491                 AV_COPY128(top_border, src_y + 15*linesize);
01492                 if (pixel_shift)
01493                     AV_COPY128(top_border+16, src_y+15*linesize+16);
01494                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01495                     if(chroma444){
01496                         if (pixel_shift){
01497                             AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
01498                             AV_COPY128(top_border+48, src_cb + 15*uvlinesize+16);
01499                             AV_COPY128(top_border+64, src_cr + 15*uvlinesize);
01500                             AV_COPY128(top_border+80, src_cr + 15*uvlinesize+16);
01501                         } else {
01502                             AV_COPY128(top_border+16, src_cb + 15*uvlinesize);
01503                             AV_COPY128(top_border+32, src_cr + 15*uvlinesize);
01504                         }
01505                     } else {
01506                         if (pixel_shift) {
01507                             AV_COPY128(top_border+32, src_cb+7*uvlinesize);
01508                             AV_COPY128(top_border+48, src_cr+7*uvlinesize);
01509                         } else {
01510                             AV_COPY64(top_border+16, src_cb+7*uvlinesize);
01511                             AV_COPY64(top_border+24, src_cr+7*uvlinesize);
01512                         }
01513                     }
01514                 }
01515             }
01516         }else if(MB_MBAFF){
01517             top_idx = 0;
01518         }else
01519             return;
01520     }
01521 
01522     top_border = h->top_borders[top_idx][s->mb_x];
01523     // There are two lines saved, the line above the the top macroblock of a pair,
01524     // and the line above the bottom macroblock
01525     AV_COPY128(top_border, src_y + 16*linesize);
01526     if (pixel_shift)
01527         AV_COPY128(top_border+16, src_y+16*linesize+16);
01528 
01529     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01530         if(chroma444){
01531             if (pixel_shift){
01532                 AV_COPY128(top_border+32, src_cb + 16*linesize);
01533                 AV_COPY128(top_border+48, src_cb + 16*linesize+16);
01534                 AV_COPY128(top_border+64, src_cr + 16*linesize);
01535                 AV_COPY128(top_border+80, src_cr + 16*linesize+16);
01536             } else {
01537                 AV_COPY128(top_border+16, src_cb + 16*linesize);
01538                 AV_COPY128(top_border+32, src_cr + 16*linesize);
01539             }
01540         } else {
01541             if (pixel_shift) {
01542                 AV_COPY128(top_border+32, src_cb+8*uvlinesize);
01543                 AV_COPY128(top_border+48, src_cr+8*uvlinesize);
01544             } else {
01545                 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
01546                 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
01547             }
01548         }
01549     }
01550 }
01551 
01552 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
01553                                   uint8_t *src_cb, uint8_t *src_cr,
01554                                   int linesize, int uvlinesize,
01555                                   int xchg, int chroma444,
01556                                   int simple, int pixel_shift){
01557     MpegEncContext * const s = &h->s;
01558     int deblock_topleft;
01559     int deblock_top;
01560     int top_idx = 1;
01561     uint8_t *top_border_m1;
01562     uint8_t *top_border;
01563 
01564     if(!simple && FRAME_MBAFF){
01565         if(s->mb_y&1){
01566             if(!MB_MBAFF)
01567                 return;
01568         }else{
01569             top_idx = MB_MBAFF ? 0 : 1;
01570         }
01571     }
01572 
01573     if(h->deblocking_filter == 2) {
01574         deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num;
01575         deblock_top     = h->top_type;
01576     } else {
01577         deblock_topleft = (s->mb_x > 0);
01578         deblock_top     = (s->mb_y > !!MB_FIELD);
01579     }
01580 
01581     src_y  -=   linesize + 1 + pixel_shift;
01582     src_cb -= uvlinesize + 1 + pixel_shift;
01583     src_cr -= uvlinesize + 1 + pixel_shift;
01584 
01585     top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
01586     top_border    = h->top_borders[top_idx][s->mb_x];
01587 
01588 #define XCHG(a,b,xchg)\
01589     if (pixel_shift) {\
01590         if (xchg) {\
01591             AV_SWAP64(b+0,a+0);\
01592             AV_SWAP64(b+8,a+8);\
01593         } else {\
01594             AV_COPY128(b,a); \
01595         }\
01596     } else \
01597 if (xchg) AV_SWAP64(b,a);\
01598 else      AV_COPY64(b,a);
01599 
01600     if(deblock_top){
01601         if(deblock_topleft){
01602             XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);
01603         }
01604         XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
01605         XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
01606         if(s->mb_x+1 < s->mb_width){
01607             XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1);
01608         }
01609     }
01610     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01611         if(chroma444){
01612             if(deblock_topleft){
01613                 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01614                 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01615             }
01616             XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
01617             XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
01618             XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
01619             XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
01620             if(s->mb_x+1 < s->mb_width){
01621                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
01622                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
01623             }
01624         } else {
01625             if(deblock_top){
01626                 if(deblock_topleft){
01627                     XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01628                     XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01629                 }
01630                 XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
01631                 XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
01632             }
01633         }
01634     }
01635 }
01636 
01637 static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) {
01638     if (high_bit_depth) {
01639         return AV_RN32A(((int32_t*)mb) + index);
01640     } else
01641         return AV_RN16A(mb + index);
01642 }
01643 
01644 static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) {
01645     if (high_bit_depth) {
01646         AV_WN32A(((int32_t*)mb) + index, value);
01647     } else
01648         AV_WN16A(mb + index, value);
01649 }
01650 
01651 static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01652                                                        int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01653 {
01654     MpegEncContext * const s = &h->s;
01655     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01656     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
01657     int i;
01658     int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
01659     block_offset += 16*p;
01660     if(IS_INTRA4x4(mb_type)){
01661         if(simple || !s->encoding){
01662             if(IS_8x8DCT(mb_type)){
01663                 if(transform_bypass){
01664                     idct_dc_add =
01665                     idct_add    = s->dsp.add_pixels8;
01666                 }else{
01667                     idct_dc_add = h->h264dsp.h264_idct8_dc_add;
01668                     idct_add    = h->h264dsp.h264_idct8_add;
01669                 }
01670                 for(i=0; i<16; i+=4){
01671                     uint8_t * const ptr= dest_y + block_offset[i];
01672                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01673                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01674                         h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01675                     }else{
01676                         const int nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01677                         h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
01678                                                     (h->topright_samples_available<<i)&0x4000, linesize);
01679                         if(nnz){
01680                             if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01681                                 idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01682                             else
01683                                 idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01684                         }
01685                     }
01686                 }
01687             }else{
01688                 if(transform_bypass){
01689                     idct_dc_add =
01690                     idct_add    = s->dsp.add_pixels4;
01691                 }else{
01692                     idct_dc_add = h->h264dsp.h264_idct_dc_add;
01693                     idct_add    = h->h264dsp.h264_idct_add;
01694                 }
01695                 for(i=0; i<16; i++){
01696                     uint8_t * const ptr= dest_y + block_offset[i];
01697                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01698 
01699                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01700                         h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01701                     }else{
01702                         uint8_t *topright;
01703                         int nnz, tr;
01704                         uint64_t tr_high;
01705                         if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
01706                             const int topright_avail= (h->topright_samples_available<<i)&0x8000;
01707                             assert(s->mb_y || linesize <= block_offset[i]);
01708                             if(!topright_avail){
01709                                 if (pixel_shift) {
01710                                     tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
01711                                     topright= (uint8_t*) &tr_high;
01712                                 } else {
01713                                     tr= ptr[3 - linesize]*0x01010101;
01714                                     topright= (uint8_t*) &tr;
01715                                 }
01716                             }else
01717                                 topright= ptr + (4 << pixel_shift) - linesize;
01718                         }else
01719                             topright= NULL;
01720 
01721                         h->hpc.pred4x4[ dir ](ptr, topright, linesize);
01722                         nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01723                         if(nnz){
01724                             if(is_h264){
01725                                 if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01726                                     idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01727                                 else
01728                                     idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01729                             }else
01730                                 ff_svq3_add_idct_c(ptr, h->mb + i*16+p*256, linesize, qscale, 0);
01731                         }
01732                     }
01733                 }
01734             }
01735         }
01736     }else{
01737         h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
01738         if(is_h264){
01739             if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX+p] ]){
01740                 if(!transform_bypass)
01741                     h->h264dsp.h264_luma_dc_dequant_idct(h->mb+(p*256 << pixel_shift), h->mb_luma_dc[p], h->dequant4_coeff[p][qscale][0]);
01742                 else{
01743                     static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
01744                                                             8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
01745                     for(i = 0; i < 16; i++)
01746                         dctcoef_set(h->mb+p*256, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc[p], pixel_shift, i));
01747                 }
01748             }
01749         }else
01750             ff_svq3_luma_dc_dequant_idct_c(h->mb+p*256, h->mb_luma_dc[p], qscale);
01751     }
01752 }
01753 
01754 static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01755                                                     int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01756 {
01757     MpegEncContext * const s = &h->s;
01758     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01759     int i;
01760     block_offset += 16*p;
01761     if(!IS_INTRA4x4(mb_type)){
01762         if(is_h264){
01763             if(IS_INTRA16x16(mb_type)){
01764                 if(transform_bypass){
01765                     if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
01766                         h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize);
01767                     }else{
01768                         for(i=0; i<16; i++){
01769                             if(h->non_zero_count_cache[ scan8[i+p*16] ] || dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01770                                 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01771                         }
01772                     }
01773                 }else{
01774                     h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01775                 }
01776             }else if(h->cbp&15){
01777                 if(transform_bypass){
01778                     const int di = IS_8x8DCT(mb_type) ? 4 : 1;
01779                     idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
01780                     for(i=0; i<16; i+=di){
01781                         if(h->non_zero_count_cache[ scan8[i+p*16] ]){
01782                             idct_add(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01783                         }
01784                     }
01785                 }else{
01786                     if(IS_8x8DCT(mb_type)){
01787                         h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01788                     }else{
01789                         h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01790                     }
01791                 }
01792             }
01793         }else{
01794             for(i=0; i<16; i++){
01795                 if(h->non_zero_count_cache[ scan8[i+p*16] ] || h->mb[i*16+p*256]){ //FIXME benchmark weird rule, & below
01796                     uint8_t * const ptr= dest_y + block_offset[i];
01797                     ff_svq3_add_idct_c(ptr, h->mb + i*16 + p*256, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
01798                 }
01799             }
01800         }
01801     }
01802 }
01803 
01804 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
01805     MpegEncContext * const s = &h->s;
01806     const int mb_x= s->mb_x;
01807     const int mb_y= s->mb_y;
01808     const int mb_xy= h->mb_xy;
01809     const int mb_type= s->current_picture.mb_type[mb_xy];
01810     uint8_t  *dest_y, *dest_cb, *dest_cr;
01811     int linesize, uvlinesize /*dct_offset*/;
01812     int i, j;
01813     int *block_offset = &h->block_offset[0];
01814     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
01815     /* is_h264 should always be true if SVQ3 is disabled. */
01816     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
01817     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01818 
01819     dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
01820     dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
01821     dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;
01822 
01823     s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
01824     s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
01825 
01826     h->list_counts[mb_xy]= h->list_count;
01827 
01828     if (!simple && MB_FIELD) {
01829         linesize   = h->mb_linesize   = s->linesize * 2;
01830         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
01831         block_offset = &h->block_offset[48];
01832         if(mb_y&1){ //FIXME move out of this function?
01833             dest_y -= s->linesize*15;
01834             dest_cb-= s->uvlinesize*7;
01835             dest_cr-= s->uvlinesize*7;
01836         }
01837         if(FRAME_MBAFF) {
01838             int list;
01839             for(list=0; list<h->list_count; list++){
01840                 if(!USES_LIST(mb_type, list))
01841                     continue;
01842                 if(IS_16X16(mb_type)){
01843                     int8_t *ref = &h->ref_cache[list][scan8[0]];
01844                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
01845                 }else{
01846                     for(i=0; i<16; i+=4){
01847                         int ref = h->ref_cache[list][scan8[i]];
01848                         if(ref >= 0)
01849                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
01850                     }
01851                 }
01852             }
01853         }
01854     } else {
01855         linesize   = h->mb_linesize   = s->linesize;
01856         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
01857 //        dct_offset = s->linesize * 16;
01858     }
01859 
01860     if (!simple && IS_INTRA_PCM(mb_type)) {
01861         if (pixel_shift) {
01862             const int bit_depth = h->sps.bit_depth_luma;
01863             int j;
01864             GetBitContext gb;
01865             init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth);
01866 
01867             for (i = 0; i < 16; i++) {
01868                 uint16_t *tmp_y  = (uint16_t*)(dest_y  + i*linesize);
01869                 for (j = 0; j < 16; j++)
01870                     tmp_y[j] = get_bits(&gb, bit_depth);
01871             }
01872             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01873                 if (!h->sps.chroma_format_idc) {
01874                     for (i = 0; i < 8; i++) {
01875                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01876                         for (j = 0; j < 8; j++) {
01877                             tmp_cb[j] = 1 << (bit_depth - 1);
01878                         }
01879                     }
01880                     for (i = 0; i < 8; i++) {
01881                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01882                         for (j = 0; j < 8; j++) {
01883                             tmp_cr[j] = 1 << (bit_depth - 1);
01884                         }
01885                     }
01886                 } else {
01887                     for (i = 0; i < 8; i++) {
01888                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01889                         for (j = 0; j < 8; j++)
01890                             tmp_cb[j] = get_bits(&gb, bit_depth);
01891                     }
01892                     for (i = 0; i < 8; i++) {
01893                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01894                         for (j = 0; j < 8; j++)
01895                             tmp_cr[j] = get_bits(&gb, bit_depth);
01896                     }
01897                 }
01898             }
01899         } else {
01900             for (i=0; i<16; i++) {
01901                 memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
01902             }
01903             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01904                 if (!h->sps.chroma_format_idc) {
01905                     for (i = 0; i < 8; i++) {
01906                         memset(dest_cb + i*uvlinesize, 128, 8);
01907                         memset(dest_cr + i*uvlinesize, 128, 8);
01908                     }
01909                 } else {
01910                     for (i = 0; i < 8; i++) {
01911                         memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4,  8);
01912                         memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4,  8);
01913                     }
01914                 }
01915             }
01916         }
01917     } else {
01918         if(IS_INTRA(mb_type)){
01919             if(h->deblocking_filter)
01920                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, 0, simple, pixel_shift);
01921 
01922             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01923                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
01924                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
01925             }
01926 
01927             hl_decode_mb_predict_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
01928 
01929             if(h->deblocking_filter)
01930                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
01931         }else if(is_h264){
01932             if (pixel_shift) {
01933                 hl_motion_16(h, dest_y, dest_cb, dest_cr,
01934                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
01935                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
01936                              h->h264dsp.weight_h264_pixels_tab,
01937                              h->h264dsp.biweight_h264_pixels_tab, 0);
01938             } else
01939                 hl_motion_8(h, dest_y, dest_cb, dest_cr,
01940                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
01941                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
01942                             h->h264dsp.weight_h264_pixels_tab,
01943                             h->h264dsp.biweight_h264_pixels_tab, 0);
01944         }
01945 
01946         hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
01947 
01948         if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
01949             uint8_t *dest[2] = {dest_cb, dest_cr};
01950             if(transform_bypass){
01951                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
01952                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16*1 << pixel_shift), uvlinesize);
01953                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 32, h->mb + (16*16*2 << pixel_shift), uvlinesize);
01954                 }else{
01955                     idct_add = s->dsp.add_pixels4;
01956                     for(j=1; j<3; j++){
01957                         for(i=j*16; i<j*16+4; i++){
01958                             if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
01959                                 idct_add   (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
01960                         }
01961                     }
01962                 }
01963             }else{
01964                 if(is_h264){
01965                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
01966                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
01967                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
01968                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
01969                     h->h264dsp.h264_idct_add8(dest, block_offset,
01970                                               h->mb, uvlinesize,
01971                                               h->non_zero_count_cache);
01972                 }
01973 #if CONFIG_SVQ3_DECODER
01974                 else{
01975                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*1, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
01976                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*2, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
01977                     for(j=1; j<3; j++){
01978                         for(i=j*16; i<j*16+4; i++){
01979                             if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
01980                                 uint8_t * const ptr= dest[j-1] + block_offset[i];
01981                                 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
01982                             }
01983                         }
01984                     }
01985                 }
01986 #endif
01987             }
01988         }
01989     }
01990     if(h->cbp || IS_INTRA(mb_type))
01991     {
01992         s->dsp.clear_blocks(h->mb);
01993         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
01994     }
01995 }
01996 
01997 static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simple, int pixel_shift){
01998     MpegEncContext * const s = &h->s;
01999     const int mb_x= s->mb_x;
02000     const int mb_y= s->mb_y;
02001     const int mb_xy= h->mb_xy;
02002     const int mb_type= s->current_picture.mb_type[mb_xy];
02003     uint8_t  *dest[3];
02004     int linesize;
02005     int i, j, p;
02006     int *block_offset = &h->block_offset[0];
02007     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
02008     const int plane_count = (simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) ? 3 : 1;
02009 
02010     for (p = 0; p < plane_count; p++)
02011     {
02012         dest[p] = s->current_picture.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
02013         s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
02014     }
02015 
02016     h->list_counts[mb_xy]= h->list_count;
02017 
02018     if (!simple && MB_FIELD) {
02019         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
02020         block_offset = &h->block_offset[48];
02021         if(mb_y&1) //FIXME move out of this function?
02022             for (p = 0; p < 3; p++)
02023                 dest[p] -= s->linesize*15;
02024         if(FRAME_MBAFF) {
02025             int list;
02026             for(list=0; list<h->list_count; list++){
02027                 if(!USES_LIST(mb_type, list))
02028                     continue;
02029                 if(IS_16X16(mb_type)){
02030                     int8_t *ref = &h->ref_cache[list][scan8[0]];
02031                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
02032                 }else{
02033                     for(i=0; i<16; i+=4){
02034                         int ref = h->ref_cache[list][scan8[i]];
02035                         if(ref >= 0)
02036                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
02037                     }
02038                 }
02039             }
02040         }
02041     } else {
02042         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize;
02043     }
02044 
02045     if (!simple && IS_INTRA_PCM(mb_type)) {
02046         if (pixel_shift) {
02047             const int bit_depth = h->sps.bit_depth_luma;
02048             GetBitContext gb;
02049             init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth);
02050 
02051             for (p = 0; p < plane_count; p++) {
02052                 for (i = 0; i < 16; i++) {
02053                     uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize);
02054                     for (j = 0; j < 16; j++)
02055                         tmp[j] = get_bits(&gb, bit_depth);
02056                 }
02057             }
02058         } else {
02059             for (p = 0; p < plane_count; p++) {
02060                 for (i = 0; i < 16; i++) {
02061                     memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16);
02062                 }
02063             }
02064         }
02065     } else {
02066         if(IS_INTRA(mb_type)){
02067             if(h->deblocking_filter)
02068                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift);
02069 
02070             for (p = 0; p < plane_count; p++)
02071                 hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02072 
02073             if(h->deblocking_filter)
02074                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift);
02075         }else{
02076             if (pixel_shift) {
02077                 hl_motion_16(h, dest[0], dest[1], dest[2],
02078                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02079                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02080                              h->h264dsp.weight_h264_pixels_tab,
02081                              h->h264dsp.biweight_h264_pixels_tab, 1);
02082             } else
02083                 hl_motion_8(h, dest[0], dest[1], dest[2],
02084                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02085                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02086                             h->h264dsp.weight_h264_pixels_tab,
02087                             h->h264dsp.biweight_h264_pixels_tab, 1);
02088         }
02089 
02090         for (p = 0; p < plane_count; p++)
02091             hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02092     }
02093     if(h->cbp || IS_INTRA(mb_type))
02094     {
02095         s->dsp.clear_blocks(h->mb);
02096         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
02097     }
02098 }
02099 
02103 #define hl_decode_mb_simple(sh, bits) \
02104 static void hl_decode_mb_simple_ ## bits(H264Context *h){ \
02105     hl_decode_mb_internal(h, 1, sh); \
02106 }
02107 hl_decode_mb_simple(0, 8);
02108 hl_decode_mb_simple(1, 16);
02109 
02113 static void av_noinline hl_decode_mb_complex(H264Context *h){
02114     hl_decode_mb_internal(h, 0, h->pixel_shift);
02115 }
02116 
02117 static void av_noinline hl_decode_mb_444_complex(H264Context *h){
02118     hl_decode_mb_444_internal(h, 0, h->pixel_shift);
02119 }
02120 
02121 static void av_noinline hl_decode_mb_444_simple(H264Context *h){
02122     hl_decode_mb_444_internal(h, 1, 0);
02123 }
02124 
02125 void ff_h264_hl_decode_mb(H264Context *h){
02126     MpegEncContext * const s = &h->s;
02127     const int mb_xy= h->mb_xy;
02128     const int mb_type= s->current_picture.mb_type[mb_xy];
02129     int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
02130 
02131     if (CHROMA444) {
02132         if(is_complex || h->pixel_shift)
02133             hl_decode_mb_444_complex(h);
02134         else
02135             hl_decode_mb_444_simple(h);
02136     } else if (is_complex) {
02137         hl_decode_mb_complex(h);
02138     } else if (h->pixel_shift) {
02139         hl_decode_mb_simple_16(h);
02140     } else
02141         hl_decode_mb_simple_8(h);
02142 }
02143 
02144 static int pred_weight_table(H264Context *h){
02145     MpegEncContext * const s = &h->s;
02146     int list, i;
02147     int luma_def, chroma_def;
02148 
02149     h->use_weight= 0;
02150     h->use_weight_chroma= 0;
02151     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
02152     if(h->sps.chroma_format_idc)
02153         h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
02154     luma_def = 1<<h->luma_log2_weight_denom;
02155     chroma_def = 1<<h->chroma_log2_weight_denom;
02156 
02157     for(list=0; list<2; list++){
02158         h->luma_weight_flag[list]   = 0;
02159         h->chroma_weight_flag[list] = 0;
02160         for(i=0; i<h->ref_count[list]; i++){
02161             int luma_weight_flag, chroma_weight_flag;
02162 
02163             luma_weight_flag= get_bits1(&s->gb);
02164             if(luma_weight_flag){
02165                 h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
02166                 h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
02167                 if(   h->luma_weight[i][list][0] != luma_def
02168                    || h->luma_weight[i][list][1] != 0) {
02169                     h->use_weight= 1;
02170                     h->luma_weight_flag[list]= 1;
02171                 }
02172             }else{
02173                 h->luma_weight[i][list][0]= luma_def;
02174                 h->luma_weight[i][list][1]= 0;
02175             }
02176 
02177             if(h->sps.chroma_format_idc){
02178                 chroma_weight_flag= get_bits1(&s->gb);
02179                 if(chroma_weight_flag){
02180                     int j;
02181                     for(j=0; j<2; j++){
02182                         h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
02183                         h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
02184                         if(   h->chroma_weight[i][list][j][0] != chroma_def
02185                            || h->chroma_weight[i][list][j][1] != 0) {
02186                             h->use_weight_chroma= 1;
02187                             h->chroma_weight_flag[list]= 1;
02188                         }
02189                     }
02190                 }else{
02191                     int j;
02192                     for(j=0; j<2; j++){
02193                         h->chroma_weight[i][list][j][0]= chroma_def;
02194                         h->chroma_weight[i][list][j][1]= 0;
02195                     }
02196                 }
02197             }
02198         }
02199         if(h->slice_type_nos != AV_PICTURE_TYPE_B) break;
02200     }
02201     h->use_weight= h->use_weight || h->use_weight_chroma;
02202     return 0;
02203 }
02204 
02210 static void implicit_weight_table(H264Context *h, int field){
02211     MpegEncContext * const s = &h->s;
02212     int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
02213 
02214     for (i = 0; i < 2; i++) {
02215         h->luma_weight_flag[i]   = 0;
02216         h->chroma_weight_flag[i] = 0;
02217     }
02218 
02219     if(field < 0){
02220         if (s->picture_structure == PICT_FRAME) {
02221             cur_poc = s->current_picture_ptr->poc;
02222         } else {
02223             cur_poc = s->current_picture_ptr->field_poc[s->picture_structure - 1];
02224         }
02225     if(   h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF
02226        && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
02227         h->use_weight= 0;
02228         h->use_weight_chroma= 0;
02229         return;
02230     }
02231         ref_start= 0;
02232         ref_count0= h->ref_count[0];
02233         ref_count1= h->ref_count[1];
02234     }else{
02235         cur_poc = s->current_picture_ptr->field_poc[field];
02236         ref_start= 16;
02237         ref_count0= 16+2*h->ref_count[0];
02238         ref_count1= 16+2*h->ref_count[1];
02239     }
02240 
02241     h->use_weight= 2;
02242     h->use_weight_chroma= 2;
02243     h->luma_log2_weight_denom= 5;
02244     h->chroma_log2_weight_denom= 5;
02245 
02246     for(ref0=ref_start; ref0 < ref_count0; ref0++){
02247         int poc0 = h->ref_list[0][ref0].poc;
02248         for(ref1=ref_start; ref1 < ref_count1; ref1++){
02249             int w = 32;
02250             if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) {
02251                 int poc1 = h->ref_list[1][ref1].poc;
02252                 int td = av_clip(poc1 - poc0, -128, 127);
02253                 if(td){
02254                     int tb = av_clip(cur_poc - poc0, -128, 127);
02255                     int tx = (16384 + (FFABS(td) >> 1)) / td;
02256                     int dist_scale_factor = (tb*tx + 32) >> 8;
02257                     if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
02258                         w = 64 - dist_scale_factor;
02259                 }
02260             }
02261             if(field<0){
02262                 h->implicit_weight[ref0][ref1][0]=
02263                 h->implicit_weight[ref0][ref1][1]= w;
02264             }else{
02265                 h->implicit_weight[ref0][ref1][field]=w;
02266             }
02267         }
02268     }
02269 }
02270 
02274 static void idr(H264Context *h){
02275     ff_h264_remove_all_refs(h);
02276     h->prev_frame_num= 0;
02277     h->prev_frame_num_offset= 0;
02278     h->prev_poc_msb=
02279     h->prev_poc_lsb= 0;
02280 }
02281 
02282 /* forget old pics after a seek */
02283 static void flush_dpb(AVCodecContext *avctx){
02284     H264Context *h= avctx->priv_data;
02285     int i;
02286     for(i=0; i<=MAX_DELAYED_PIC_COUNT; i++) {
02287         if(h->delayed_pic[i])
02288             h->delayed_pic[i]->reference= 0;
02289         h->delayed_pic[i]= NULL;
02290     }
02291     h->outputed_poc=h->next_outputed_poc= INT_MIN;
02292     h->prev_interlaced_frame = 1;
02293     idr(h);
02294     if(h->s.current_picture_ptr)
02295         h->s.current_picture_ptr->reference= 0;
02296     h->s.first_field= 0;
02297     ff_h264_reset_sei(h);
02298     ff_mpeg_flush(avctx);
02299 }
02300 
02301 static int init_poc(H264Context *h){
02302     MpegEncContext * const s = &h->s;
02303     const int max_frame_num= 1<<h->sps.log2_max_frame_num;
02304     int field_poc[2];
02305     Picture *cur = s->current_picture_ptr;
02306 
02307     h->frame_num_offset= h->prev_frame_num_offset;
02308     if(h->frame_num < h->prev_frame_num)
02309         h->frame_num_offset += max_frame_num;
02310 
02311     if(h->sps.poc_type==0){
02312         const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
02313 
02314         if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
02315             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
02316         else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
02317             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
02318         else
02319             h->poc_msb = h->prev_poc_msb;
02320 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
02321         field_poc[0] =
02322         field_poc[1] = h->poc_msb + h->poc_lsb;
02323         if(s->picture_structure == PICT_FRAME)
02324             field_poc[1] += h->delta_poc_bottom;
02325     }else if(h->sps.poc_type==1){
02326         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
02327         int i;
02328 
02329         if(h->sps.poc_cycle_length != 0)
02330             abs_frame_num = h->frame_num_offset + h->frame_num;
02331         else
02332             abs_frame_num = 0;
02333 
02334         if(h->nal_ref_idc==0 && abs_frame_num > 0)
02335             abs_frame_num--;
02336 
02337         expected_delta_per_poc_cycle = 0;
02338         for(i=0; i < h->sps.poc_cycle_length; i++)
02339             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
02340 
02341         if(abs_frame_num > 0){
02342             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
02343             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
02344 
02345             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
02346             for(i = 0; i <= frame_num_in_poc_cycle; i++)
02347                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
02348         } else
02349             expectedpoc = 0;
02350 
02351         if(h->nal_ref_idc == 0)
02352             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
02353 
02354         field_poc[0] = expectedpoc + h->delta_poc[0];
02355         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
02356 
02357         if(s->picture_structure == PICT_FRAME)
02358             field_poc[1] += h->delta_poc[1];
02359     }else{
02360         int poc= 2*(h->frame_num_offset + h->frame_num);
02361 
02362         if(!h->nal_ref_idc)
02363             poc--;
02364 
02365         field_poc[0]= poc;
02366         field_poc[1]= poc;
02367     }
02368 
02369     if(s->picture_structure != PICT_BOTTOM_FIELD)
02370         s->current_picture_ptr->field_poc[0]= field_poc[0];
02371     if(s->picture_structure != PICT_TOP_FIELD)
02372         s->current_picture_ptr->field_poc[1]= field_poc[1];
02373     cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
02374 
02375     return 0;
02376 }
02377 
02378 
02382 static void init_scan_tables(H264Context *h){
02383     int i;
02384     for(i=0; i<16; i++){
02385 #define T(x) (x>>2) | ((x<<2) & 0xF)
02386         h->zigzag_scan[i] = T(zigzag_scan[i]);
02387         h-> field_scan[i] = T( field_scan[i]);
02388 #undef T
02389     }
02390     for(i=0; i<64; i++){
02391 #define T(x) (x>>3) | ((x&7)<<3)
02392         h->zigzag_scan8x8[i]       = T(ff_zigzag_direct[i]);
02393         h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
02394         h->field_scan8x8[i]        = T(field_scan8x8[i]);
02395         h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
02396 #undef T
02397     }
02398     if(h->sps.transform_bypass){ //FIXME same ugly
02399         h->zigzag_scan_q0          = zigzag_scan;
02400         h->zigzag_scan8x8_q0       = ff_zigzag_direct;
02401         h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
02402         h->field_scan_q0           = field_scan;
02403         h->field_scan8x8_q0        = field_scan8x8;
02404         h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
02405     }else{
02406         h->zigzag_scan_q0          = h->zigzag_scan;
02407         h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
02408         h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
02409         h->field_scan_q0           = h->field_scan;
02410         h->field_scan8x8_q0        = h->field_scan8x8;
02411         h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
02412     }
02413 }
02414 
02415 static void field_end(H264Context *h, int in_setup){
02416     MpegEncContext * const s = &h->s;
02417     AVCodecContext * const avctx= s->avctx;
02418     s->mb_y= 0;
02419 
02420     if (!in_setup && !s->dropable)
02421         ff_thread_report_progress((AVFrame*)s->current_picture_ptr, (16*s->mb_height >> FIELD_PICTURE) - 1,
02422                                  s->picture_structure==PICT_BOTTOM_FIELD);
02423 
02424     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02425         ff_vdpau_h264_set_reference_frames(s);
02426 
02427     if(in_setup || !(avctx->active_thread_type&FF_THREAD_FRAME)){
02428         if(!s->dropable) {
02429             ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
02430             h->prev_poc_msb= h->poc_msb;
02431             h->prev_poc_lsb= h->poc_lsb;
02432         }
02433         h->prev_frame_num_offset= h->frame_num_offset;
02434         h->prev_frame_num= h->frame_num;
02435         h->outputed_poc = h->next_outputed_poc;
02436     }
02437 
02438     if (avctx->hwaccel) {
02439         if (avctx->hwaccel->end_frame(avctx) < 0)
02440             av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
02441     }
02442 
02443     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02444         ff_vdpau_h264_picture_complete(s);
02445 
02446     /*
02447      * FIXME: Error handling code does not seem to support interlaced
02448      * when slices span multiple rows
02449      * The ff_er_add_slice calls don't work right for bottom
02450      * fields; they cause massive erroneous error concealing
02451      * Error marking covers both fields (top and bottom).
02452      * This causes a mismatched s->error_count
02453      * and a bad error table. Further, the error count goes to
02454      * INT_MAX when called for bottom field, because mb_y is
02455      * past end by one (callers fault) and resync_mb_y != 0
02456      * causes problems for the first MB line, too.
02457      */
02458     if (!FIELD_PICTURE)
02459         ff_er_frame_end(s);
02460 
02461     MPV_frame_end(s);
02462 
02463     h->current_slice=0;
02464 }
02465 
02469 static void clone_slice(H264Context *dst, H264Context *src)
02470 {
02471     memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
02472     dst->s.current_picture_ptr  = src->s.current_picture_ptr;
02473     dst->s.current_picture      = src->s.current_picture;
02474     dst->s.linesize             = src->s.linesize;
02475     dst->s.uvlinesize           = src->s.uvlinesize;
02476     dst->s.first_field          = src->s.first_field;
02477 
02478     dst->prev_poc_msb           = src->prev_poc_msb;
02479     dst->prev_poc_lsb           = src->prev_poc_lsb;
02480     dst->prev_frame_num_offset  = src->prev_frame_num_offset;
02481     dst->prev_frame_num         = src->prev_frame_num;
02482     dst->short_ref_count        = src->short_ref_count;
02483 
02484     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
02485     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
02486     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
02487     memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
02488 
02489     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
02490     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
02491 }
02492 
02500 int ff_h264_get_profile(SPS *sps)
02501 {
02502     int profile = sps->profile_idc;
02503 
02504     switch(sps->profile_idc) {
02505     case FF_PROFILE_H264_BASELINE:
02506         // constraint_set1_flag set to 1
02507         profile |= (sps->constraint_set_flags & 1<<1) ? FF_PROFILE_H264_CONSTRAINED : 0;
02508         break;
02509     case FF_PROFILE_H264_HIGH_10:
02510     case FF_PROFILE_H264_HIGH_422:
02511     case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
02512         // constraint_set3_flag set to 1
02513         profile |= (sps->constraint_set_flags & 1<<3) ? FF_PROFILE_H264_INTRA : 0;
02514         break;
02515     }
02516 
02517     return profile;
02518 }
02519 
02529 static int decode_slice_header(H264Context *h, H264Context *h0){
02530     MpegEncContext * const s = &h->s;
02531     MpegEncContext * const s0 = &h0->s;
02532     unsigned int first_mb_in_slice;
02533     unsigned int pps_id;
02534     int num_ref_idx_active_override_flag;
02535     unsigned int slice_type, tmp, i, j;
02536     int default_ref_list_done = 0;
02537     int last_pic_structure;
02538 
02539     s->dropable= h->nal_ref_idc == 0;
02540 
02541     /* FIXME: 2tap qpel isn't implemented for high bit depth. */
02542     if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc && !h->pixel_shift){
02543         s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
02544         s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
02545     }else{
02546         s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
02547         s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
02548     }
02549 
02550     first_mb_in_slice= get_ue_golomb(&s->gb);
02551 
02552     if(first_mb_in_slice == 0){ //FIXME better field boundary detection
02553         if(h0->current_slice && FIELD_PICTURE){
02554             field_end(h, 1);
02555         }
02556 
02557         h0->current_slice = 0;
02558         if (!s0->first_field)
02559             s->current_picture_ptr= NULL;
02560     }
02561 
02562     slice_type= get_ue_golomb_31(&s->gb);
02563     if(slice_type > 9){
02564         av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
02565         return -1;
02566     }
02567     if(slice_type > 4){
02568         slice_type -= 5;
02569         h->slice_type_fixed=1;
02570     }else
02571         h->slice_type_fixed=0;
02572 
02573     slice_type= golomb_to_pict_type[ slice_type ];
02574     if (slice_type == AV_PICTURE_TYPE_I
02575         || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
02576         default_ref_list_done = 1;
02577     }
02578     h->slice_type= slice_type;
02579     h->slice_type_nos= slice_type & 3;
02580 
02581     s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
02582 
02583     pps_id= get_ue_golomb(&s->gb);
02584     if(pps_id>=MAX_PPS_COUNT){
02585         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
02586         return -1;
02587     }
02588     if(!h0->pps_buffers[pps_id]) {
02589         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
02590         return -1;
02591     }
02592     h->pps= *h0->pps_buffers[pps_id];
02593 
02594     if(!h0->sps_buffers[h->pps.sps_id]) {
02595         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
02596         return -1;
02597     }
02598     h->sps = *h0->sps_buffers[h->pps.sps_id];
02599 
02600     s->avctx->profile = ff_h264_get_profile(&h->sps);
02601     s->avctx->level   = h->sps.level_idc;
02602     s->avctx->refs    = h->sps.ref_frame_count;
02603 
02604     if(h == h0 && h->dequant_coeff_pps != pps_id){
02605         h->dequant_coeff_pps = pps_id;
02606         init_dequant_tables(h);
02607     }
02608 
02609     s->mb_width= h->sps.mb_width;
02610     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
02611 
02612     h->b_stride=  s->mb_width*4;
02613 
02614     s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);
02615     if(h->sps.frame_mbs_only_flag)
02616         s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
02617     else
02618         s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);
02619 
02620     if (FFALIGN(s->avctx->width,  16) == s->width &&
02621         FFALIGN(s->avctx->height, 16) == s->height) {
02622         s->width  = s->avctx->width;
02623         s->height = s->avctx->height;
02624     }
02625 
02626     if (s->context_initialized
02627         && (   s->width != s->avctx->width || s->height != s->avctx->height
02628             || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
02629         if(h != h0 || (HAVE_THREADS && h->s.avctx->active_thread_type & FF_THREAD_FRAME)) {
02630             av_log_missing_feature(s->avctx, "Width/height changing with threads is", 0);
02631             return AVERROR_PATCHWELCOME;   // width / height changed during parallelized decoding
02632         }
02633         free_tables(h, 0);
02634         flush_dpb(s->avctx);
02635         MPV_common_end(s);
02636         h->list_count = 0;
02637     }
02638     if (!s->context_initialized) {
02639         if (h != h0) {
02640             av_log(h->s.avctx, AV_LOG_ERROR, "Cannot (re-)initialize context during parallel decoding.\n");
02641             return -1;
02642         }
02643 
02644         avcodec_set_dimensions(s->avctx, s->width, s->height);
02645         s->avctx->sample_aspect_ratio= h->sps.sar;
02646         av_assert0(s->avctx->sample_aspect_ratio.den);
02647 
02648         h->s.avctx->coded_width = 16*s->mb_width;
02649         h->s.avctx->coded_height = 16*s->mb_height;
02650 
02651         if(h->sps.video_signal_type_present_flag){
02652             s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
02653             if(h->sps.colour_description_present_flag){
02654                 s->avctx->color_primaries = h->sps.color_primaries;
02655                 s->avctx->color_trc       = h->sps.color_trc;
02656                 s->avctx->colorspace      = h->sps.colorspace;
02657             }
02658         }
02659 
02660         if(h->sps.timing_info_present_flag){
02661             int64_t den= h->sps.time_scale;
02662             if(h->x264_build < 44U)
02663                 den *= 2;
02664             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
02665                       h->sps.num_units_in_tick, den, 1<<30);
02666         }
02667 
02668         switch (h->sps.bit_depth_luma) {
02669             case 9 :
02670                 s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9;
02671                 break;
02672             case 10 :
02673                 s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10;
02674                 break;
02675             default:
02676                 if (CHROMA444){
02677                     s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P;
02678                 }else{
02679                     s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
02680                                                              s->avctx->codec->pix_fmts ?
02681                                                              s->avctx->codec->pix_fmts :
02682                                                              s->avctx->color_range == AVCOL_RANGE_JPEG ?
02683                                                              hwaccel_pixfmt_list_h264_jpeg_420 :
02684                                                              ff_hwaccel_pixfmt_list_420);
02685                 }
02686         }
02687 
02688         s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
02689 
02690         if (MPV_common_init(s) < 0) {
02691             av_log(h->s.avctx, AV_LOG_ERROR, "MPV_common_init() failed.\n");
02692             return -1;
02693         }
02694         s->first_field = 0;
02695         h->prev_interlaced_frame = 1;
02696 
02697         init_scan_tables(h);
02698         if (ff_h264_alloc_tables(h) < 0) {
02699             av_log(h->s.avctx, AV_LOG_ERROR, "Could not allocate memory for h264\n");
02700             return AVERROR(ENOMEM);
02701         }
02702 
02703         if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_SLICE)) {
02704             if (context_init(h) < 0) {
02705                 av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02706                 return -1;
02707             }
02708         } else {
02709             for(i = 1; i < s->avctx->thread_count; i++) {
02710                 H264Context *c;
02711                 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
02712                 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
02713                 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
02714                 c->h264dsp = h->h264dsp;
02715                 c->sps = h->sps;
02716                 c->pps = h->pps;
02717                 c->pixel_shift = h->pixel_shift;
02718                 init_scan_tables(c);
02719                 clone_tables(c, h, i);
02720             }
02721 
02722             for(i = 0; i < s->avctx->thread_count; i++)
02723                 if (context_init(h->thread_context[i]) < 0) {
02724                     av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02725                     return -1;
02726                 }
02727         }
02728     }
02729 
02730     h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
02731 
02732     h->mb_mbaff = 0;
02733     h->mb_aff_frame = 0;
02734     last_pic_structure = s0->picture_structure;
02735     if(h->sps.frame_mbs_only_flag){
02736         s->picture_structure= PICT_FRAME;
02737     }else{
02738         if(get_bits1(&s->gb)) { //field_pic_flag
02739             s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
02740         } else {
02741             s->picture_structure= PICT_FRAME;
02742             h->mb_aff_frame = h->sps.mb_aff;
02743         }
02744     }
02745     h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
02746 
02747     if(h0->current_slice == 0){
02748         // Shorten frame num gaps so we don't have to allocate reference frames just to throw them away
02749         if(h->frame_num != h->prev_frame_num) {
02750             int unwrap_prev_frame_num = h->prev_frame_num, max_frame_num = 1<<h->sps.log2_max_frame_num;
02751 
02752             if (unwrap_prev_frame_num > h->frame_num) unwrap_prev_frame_num -= max_frame_num;
02753 
02754             if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) {
02755                 unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1;
02756                 if (unwrap_prev_frame_num < 0)
02757                     unwrap_prev_frame_num += max_frame_num;
02758 
02759                 h->prev_frame_num = unwrap_prev_frame_num;
02760             }
02761         }
02762 
02763         while(h->frame_num !=  h->prev_frame_num &&
02764               h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
02765             Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
02766             av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
02767             if (ff_h264_frame_start(h) < 0)
02768                 return -1;
02769             h->prev_frame_num++;
02770             h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
02771             s->current_picture_ptr->frame_num= h->prev_frame_num;
02772             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 0);
02773             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 1);
02774             ff_generate_sliding_window_mmcos(h);
02775             ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
02776             /* Error concealment: if a ref is missing, copy the previous ref in its place.
02777              * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
02778              * about there being no actual duplicates.
02779              * FIXME: this doesn't copy padding for out-of-frame motion vectors.  Given we're
02780              * concealing a lost frame, this probably isn't noticable by comparison, but it should
02781              * be fixed. */
02782             if (h->short_ref_count) {
02783                 if (prev) {
02784                     av_image_copy(h->short_ref[0]->data, h->short_ref[0]->linesize,
02785                                   (const uint8_t**)prev->data, prev->linesize,
02786                                   s->avctx->pix_fmt, s->mb_width*16, s->mb_height*16);
02787                     h->short_ref[0]->poc = prev->poc+2;
02788                 }
02789                 h->short_ref[0]->frame_num = h->prev_frame_num;
02790             }
02791         }
02792 
02793         /* See if we have a decoded first field looking for a pair... */
02794         if (s0->first_field) {
02795             assert(s0->current_picture_ptr);
02796             assert(s0->current_picture_ptr->data[0]);
02797             assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
02798 
02799             /* figure out if we have a complementary field pair */
02800             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
02801                 /*
02802                  * Previous field is unmatched. Don't display it, but let it
02803                  * remain for reference if marked as such.
02804                  */
02805                 s0->current_picture_ptr = NULL;
02806                 s0->first_field = FIELD_PICTURE;
02807 
02808             } else {
02809                 if (s0->current_picture_ptr->frame_num != h->frame_num) {
02810                     /*
02811                      * This and previous field had
02812                      * different frame_nums. Consider this field first in
02813                      * pair. Throw away previous field except for reference
02814                      * purposes.
02815                      */
02816                     s0->first_field = 1;
02817                     s0->current_picture_ptr = NULL;
02818 
02819                 } else {
02820                     /* Second field in complementary pair */
02821                     s0->first_field = 0;
02822                 }
02823             }
02824 
02825         } else {
02826             /* Frame or first field in a potentially complementary pair */
02827             assert(!s0->current_picture_ptr);
02828             s0->first_field = FIELD_PICTURE;
02829         }
02830 
02831         if(!FIELD_PICTURE || s0->first_field) {
02832             if (ff_h264_frame_start(h) < 0) {
02833                 s0->first_field = 0;
02834                 return -1;
02835             }
02836         } else {
02837             ff_release_unused_pictures(s, 0);
02838         }
02839     }
02840     if(h != h0)
02841         clone_slice(h, h0);
02842 
02843     s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
02844 
02845     assert(s->mb_num == s->mb_width * s->mb_height);
02846     if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
02847        first_mb_in_slice                    >= s->mb_num){
02848         av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
02849         return -1;
02850     }
02851     s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
02852     s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
02853     if (s->picture_structure == PICT_BOTTOM_FIELD)
02854         s->resync_mb_y = s->mb_y = s->mb_y + 1;
02855     assert(s->mb_y < s->mb_height);
02856 
02857     if(s->picture_structure==PICT_FRAME){
02858         h->curr_pic_num=   h->frame_num;
02859         h->max_pic_num= 1<< h->sps.log2_max_frame_num;
02860     }else{
02861         h->curr_pic_num= 2*h->frame_num + 1;
02862         h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
02863     }
02864 
02865     if(h->nal_unit_type == NAL_IDR_SLICE){
02866         get_ue_golomb(&s->gb); /* idr_pic_id */
02867     }
02868 
02869     if(h->sps.poc_type==0){
02870         h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
02871 
02872         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
02873             h->delta_poc_bottom= get_se_golomb(&s->gb);
02874         }
02875     }
02876 
02877     if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
02878         h->delta_poc[0]= get_se_golomb(&s->gb);
02879 
02880         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
02881             h->delta_poc[1]= get_se_golomb(&s->gb);
02882     }
02883 
02884     init_poc(h);
02885 
02886     if(h->pps.redundant_pic_cnt_present){
02887         h->redundant_pic_count= get_ue_golomb(&s->gb);
02888     }
02889 
02890     //set defaults, might be overridden a few lines later
02891     h->ref_count[0]= h->pps.ref_count[0];
02892     h->ref_count[1]= h->pps.ref_count[1];
02893 
02894     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
02895         unsigned max= s->picture_structure == PICT_FRAME ? 15 : 31;
02896 
02897         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
02898             h->direct_spatial_mv_pred= get_bits1(&s->gb);
02899         }
02900         num_ref_idx_active_override_flag= get_bits1(&s->gb);
02901 
02902         if(num_ref_idx_active_override_flag){
02903             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
02904             if (h->ref_count[0] < 1)
02905                 return AVERROR_INVALIDDATA;
02906             if (h->slice_type_nos == AV_PICTURE_TYPE_B) {
02907                 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
02908                 if (h->ref_count[1] < 1)
02909                     return AVERROR_INVALIDDATA;
02910             }
02911         }
02912 
02913         if (h->ref_count[0]-1 > max || h->ref_count[1]-1 > max){
02914             av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
02915             h->ref_count[0] = h->ref_count[1] = 1;
02916             return AVERROR_INVALIDDATA;
02917         }
02918 
02919         if(h->slice_type_nos == AV_PICTURE_TYPE_B)
02920             h->list_count= 2;
02921         else
02922             h->list_count= 1;
02923     }else
02924         h->ref_count[1]= h->ref_count[0]= h->list_count= 0;
02925 
02926     if(!default_ref_list_done){
02927         ff_h264_fill_default_ref_list(h);
02928     }
02929 
02930     if(h->slice_type_nos!=AV_PICTURE_TYPE_I && ff_h264_decode_ref_pic_list_reordering(h) < 0) {
02931         h->ref_count[1]= h->ref_count[0]= 0;
02932         return -1;
02933     }
02934 
02935     if(h->slice_type_nos!=AV_PICTURE_TYPE_I){
02936         s->last_picture_ptr= &h->ref_list[0][0];
02937         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
02938     }
02939     if(h->slice_type_nos==AV_PICTURE_TYPE_B){
02940         s->next_picture_ptr= &h->ref_list[1][0];
02941         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
02942     }
02943 
02944     if(   (h->pps.weighted_pred          && h->slice_type_nos == AV_PICTURE_TYPE_P )
02945        ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== AV_PICTURE_TYPE_B ) )
02946         pred_weight_table(h);
02947     else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
02948         implicit_weight_table(h, -1);
02949     }else {
02950         h->use_weight = 0;
02951         for (i = 0; i < 2; i++) {
02952             h->luma_weight_flag[i]   = 0;
02953             h->chroma_weight_flag[i] = 0;
02954         }
02955     }
02956 
02957     if(h->nal_ref_idc)
02958         ff_h264_decode_ref_pic_marking(h0, &s->gb);
02959 
02960     if(FRAME_MBAFF){
02961         ff_h264_fill_mbaff_ref_list(h);
02962 
02963         if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
02964             implicit_weight_table(h, 0);
02965             implicit_weight_table(h, 1);
02966         }
02967     }
02968 
02969     if(h->slice_type_nos==AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred)
02970         ff_h264_direct_dist_scale_factor(h);
02971     ff_h264_direct_ref_list_init(h);
02972 
02973     if( h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac ){
02974         tmp = get_ue_golomb_31(&s->gb);
02975         if(tmp > 2){
02976             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
02977             return -1;
02978         }
02979         h->cabac_init_idc= tmp;
02980     }
02981 
02982     h->last_qscale_diff = 0;
02983     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
02984     if(tmp>51+6*(h->sps.bit_depth_luma-8)){
02985         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
02986         return -1;
02987     }
02988     s->qscale= tmp;
02989     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
02990     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
02991     //FIXME qscale / qp ... stuff
02992     if(h->slice_type == AV_PICTURE_TYPE_SP){
02993         get_bits1(&s->gb); /* sp_for_switch_flag */
02994     }
02995     if(h->slice_type==AV_PICTURE_TYPE_SP || h->slice_type == AV_PICTURE_TYPE_SI){
02996         get_se_golomb(&s->gb); /* slice_qs_delta */
02997     }
02998 
02999     h->deblocking_filter = 1;
03000     h->slice_alpha_c0_offset = 52;
03001     h->slice_beta_offset = 52;
03002     if( h->pps.deblocking_filter_parameters_present ) {
03003         tmp= get_ue_golomb_31(&s->gb);
03004         if(tmp > 2){
03005             av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
03006             return -1;
03007         }
03008         h->deblocking_filter= tmp;
03009         if(h->deblocking_filter < 2)
03010             h->deblocking_filter^= 1; // 1<->0
03011 
03012         if( h->deblocking_filter ) {
03013             h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
03014             h->slice_beta_offset     += get_se_golomb(&s->gb) << 1;
03015             if(   h->slice_alpha_c0_offset > 104U
03016                || h->slice_beta_offset     > 104U){
03017                 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
03018                 return -1;
03019             }
03020         }
03021     }
03022 
03023     if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
03024        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != AV_PICTURE_TYPE_I)
03025        ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == AV_PICTURE_TYPE_B)
03026        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
03027         h->deblocking_filter= 0;
03028 
03029     if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
03030         if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
03031             /* Cheat slightly for speed:
03032                Do not bother to deblock across slices. */
03033             h->deblocking_filter = 2;
03034         } else {
03035             h0->max_contexts = 1;
03036             if(!h0->single_decode_warning) {
03037                 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
03038                 h0->single_decode_warning = 1;
03039             }
03040             if (h != h0) {
03041                 av_log(h->s.avctx, AV_LOG_ERROR, "Deblocking switched inside frame.\n");
03042                 return 1;
03043             }
03044         }
03045     }
03046     h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
03047 
03048 #if 0 //FMO
03049     if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
03050         slice_group_change_cycle= get_bits(&s->gb, ?);
03051 #endif
03052 
03053     h0->last_slice_type = slice_type;
03054     h->slice_num = ++h0->current_slice;
03055     if(h->slice_num >= MAX_SLICES){
03056         av_log(s->avctx, AV_LOG_ERROR, "Too many slices (%d >= %d), increase MAX_SLICES and recompile\n", h->slice_num, MAX_SLICES);
03057     }
03058 
03059     for(j=0; j<2; j++){
03060         int id_list[16];
03061         int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
03062         for(i=0; i<16; i++){
03063             id_list[i]= 60;
03064             if(h->ref_list[j][i].data[0]){
03065                 int k;
03066                 uint8_t *base= h->ref_list[j][i].base[0];
03067                 for(k=0; k<h->short_ref_count; k++)
03068                     if(h->short_ref[k]->base[0] == base){
03069                         id_list[i]= k;
03070                         break;
03071                     }
03072                 for(k=0; k<h->long_ref_count; k++)
03073                     if(h->long_ref[k] && h->long_ref[k]->base[0] == base){
03074                         id_list[i]= h->short_ref_count + k;
03075                         break;
03076                     }
03077             }
03078         }
03079 
03080         ref2frm[0]=
03081         ref2frm[1]= -1;
03082         for(i=0; i<16; i++)
03083             ref2frm[i+2]= 4*id_list[i]
03084                           +(h->ref_list[j][i].reference&3);
03085         ref2frm[18+0]=
03086         ref2frm[18+1]= -1;
03087         for(i=16; i<48; i++)
03088             ref2frm[i+4]= 4*id_list[(i-16)>>1]
03089                           +(h->ref_list[j][i].reference&3);
03090     }
03091 
03092     //FIXME: fix draw_edges+PAFF+frame threads
03093     h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16;
03094     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
03095 
03096     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
03097         av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
03098                h->slice_num,
03099                (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
03100                first_mb_in_slice,
03101                av_get_picture_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
03102                pps_id, h->frame_num,
03103                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
03104                h->ref_count[0], h->ref_count[1],
03105                s->qscale,
03106                h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
03107                h->use_weight,
03108                h->use_weight==1 && h->use_weight_chroma ? "c" : "",
03109                h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
03110                );
03111     }
03112 
03113     return 0;
03114 }
03115 
03116 int ff_h264_get_slice_type(const H264Context *h)
03117 {
03118     switch (h->slice_type) {
03119     case AV_PICTURE_TYPE_P:  return 0;
03120     case AV_PICTURE_TYPE_B:  return 1;
03121     case AV_PICTURE_TYPE_I:  return 2;
03122     case AV_PICTURE_TYPE_SP: return 3;
03123     case AV_PICTURE_TYPE_SI: return 4;
03124     default:         return -1;
03125     }
03126 }
03127 
03132 static int fill_filter_caches(H264Context *h, int mb_type){
03133     MpegEncContext * const s = &h->s;
03134     const int mb_xy= h->mb_xy;
03135     int top_xy, left_xy[2];
03136     int top_type, left_type[2];
03137 
03138     top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
03139 
03140     //FIXME deblocking could skip the intra and nnz parts.
03141 
03142     /* Wow, what a mess, why didn't they simplify the interlacing & intra
03143      * stuff, I can't imagine that these complex rules are worth it. */
03144 
03145     left_xy[1] = left_xy[0] = mb_xy-1;
03146     if(FRAME_MBAFF){
03147         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
03148         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
03149         if(s->mb_y&1){
03150             if (left_mb_field_flag != curr_mb_field_flag) {
03151                 left_xy[0] -= s->mb_stride;
03152             }
03153         }else{
03154             if(curr_mb_field_flag){
03155                 top_xy      += s->mb_stride & (((s->current_picture.mb_type[top_xy    ]>>7)&1)-1);
03156             }
03157             if (left_mb_field_flag != curr_mb_field_flag) {
03158                 left_xy[1] += s->mb_stride;
03159             }
03160         }
03161     }
03162 
03163     h->top_mb_xy = top_xy;
03164     h->left_mb_xy[0] = left_xy[0];
03165     h->left_mb_xy[1] = left_xy[1];
03166     {
03167         //for sufficiently low qp, filtering wouldn't do anything
03168         //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
03169         int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
03170         int qp = s->current_picture.qscale_table[mb_xy];
03171         if(qp <= qp_thresh
03172            && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
03173            && (top_xy   < 0 || ((qp + s->current_picture.qscale_table[top_xy    ] + 1)>>1) <= qp_thresh)){
03174             if(!FRAME_MBAFF)
03175                 return 1;
03176             if(   (left_xy[0]< 0            || ((qp + s->current_picture.qscale_table[left_xy[1]             ] + 1)>>1) <= qp_thresh)
03177                && (top_xy    < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy    -s->mb_stride] + 1)>>1) <= qp_thresh))
03178                 return 1;
03179         }
03180     }
03181 
03182     top_type     = s->current_picture.mb_type[top_xy]    ;
03183     left_type[0] = s->current_picture.mb_type[left_xy[0]];
03184     left_type[1] = s->current_picture.mb_type[left_xy[1]];
03185     if(h->deblocking_filter == 2){
03186         if(h->slice_table[top_xy     ] != h->slice_num) top_type= 0;
03187         if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;
03188     }else{
03189         if(h->slice_table[top_xy     ] == 0xFFFF) top_type= 0;
03190         if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
03191     }
03192     h->top_type    = top_type    ;
03193     h->left_type[0]= left_type[0];
03194     h->left_type[1]= left_type[1];
03195 
03196     if(IS_INTRA(mb_type))
03197         return 0;
03198 
03199     AV_COPY32(&h->non_zero_count_cache[4+8* 1], &h->non_zero_count[mb_xy][ 0]);
03200     AV_COPY32(&h->non_zero_count_cache[4+8* 2], &h->non_zero_count[mb_xy][ 4]);
03201     AV_COPY32(&h->non_zero_count_cache[4+8* 3], &h->non_zero_count[mb_xy][ 8]);
03202     AV_COPY32(&h->non_zero_count_cache[4+8* 4], &h->non_zero_count[mb_xy][12]);
03203 
03204     h->cbp= h->cbp_table[mb_xy];
03205 
03206     {
03207         int list;
03208         for(list=0; list<h->list_count; list++){
03209             int8_t *ref;
03210             int y, b_stride;
03211             int16_t (*mv_dst)[2];
03212             int16_t (*mv_src)[2];
03213 
03214             if(!USES_LIST(mb_type, list)){
03215                 fill_rectangle(  h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
03216                 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03217                 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03218                 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03219                 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03220                 continue;
03221             }
03222 
03223             ref = &s->current_picture.ref_index[list][4*mb_xy];
03224             {
03225                 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03226                 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03227                 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03228                 ref += 2;
03229                 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03230                 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
03231             }
03232 
03233             b_stride = h->b_stride;
03234             mv_dst   = &h->mv_cache[list][scan8[0]];
03235             mv_src   = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
03236             for(y=0; y<4; y++){
03237                 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
03238             }
03239 
03240         }
03241     }
03242 
03243 
03244 /*
03245 0 . T T. T T T T
03246 1 L . .L . . . .
03247 2 L . .L . . . .
03248 3 . T TL . . . .
03249 4 L . .L . . . .
03250 5 L . .. . . . .
03251 */
03252 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
03253     if(top_type){
03254         AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][3*4]);
03255     }
03256 
03257     if(left_type[0]){
03258         h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][3+0*4];
03259         h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][3+1*4];
03260         h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][3+2*4];
03261         h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][3+3*4];
03262     }
03263 
03264     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
03265     if(!CABAC && h->pps.transform_8x8_mode){
03266         if(IS_8x8DCT(top_type)){
03267             h->non_zero_count_cache[4+8*0]=
03268             h->non_zero_count_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
03269             h->non_zero_count_cache[6+8*0]=
03270             h->non_zero_count_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
03271         }
03272         if(IS_8x8DCT(left_type[0])){
03273             h->non_zero_count_cache[3+8*1]=
03274             h->non_zero_count_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF
03275         }
03276         if(IS_8x8DCT(left_type[1])){
03277             h->non_zero_count_cache[3+8*3]=
03278             h->non_zero_count_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF
03279         }
03280 
03281         if(IS_8x8DCT(mb_type)){
03282             h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
03283             h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= (h->cbp & 0x1000) >> 12;
03284 
03285             h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
03286             h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
03287 
03288             h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
03289             h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
03290 
03291             h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
03292             h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
03293         }
03294     }
03295 
03296     if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
03297         int list;
03298         for(list=0; list<h->list_count; list++){
03299             if(USES_LIST(top_type, list)){
03300                 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
03301                 const int b8_xy= 4*top_xy + 2;
03302                 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03303                 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
03304                 h->ref_cache[list][scan8[0] + 0 - 1*8]=
03305                 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
03306                 h->ref_cache[list][scan8[0] + 2 - 1*8]=
03307                 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
03308             }else{
03309                 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
03310                 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03311             }
03312 
03313             if(!IS_INTERLACED(mb_type^left_type[0])){
03314                 if(USES_LIST(left_type[0], list)){
03315                     const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
03316                     const int b8_xy= 4*left_xy[0] + 1;
03317                     int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03318                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
03319                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
03320                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
03321                     AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
03322                     h->ref_cache[list][scan8[0] - 1 + 0 ]=
03323                     h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
03324                     h->ref_cache[list][scan8[0] - 1 +16 ]=
03325                     h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
03326                 }else{
03327                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
03328                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
03329                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
03330                     AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
03331                     h->ref_cache[list][scan8[0] - 1 + 0  ]=
03332                     h->ref_cache[list][scan8[0] - 1 + 8  ]=
03333                     h->ref_cache[list][scan8[0] - 1 + 16 ]=
03334                     h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
03335                 }
03336             }
03337         }
03338     }
03339 
03340     return 0;
03341 }
03342 
03343 static void loop_filter(H264Context *h, int start_x, int end_x){
03344     MpegEncContext * const s = &h->s;
03345     uint8_t  *dest_y, *dest_cb, *dest_cr;
03346     int linesize, uvlinesize, mb_x, mb_y;
03347     const int end_mb_y= s->mb_y + FRAME_MBAFF;
03348     const int old_slice_type= h->slice_type;
03349     const int pixel_shift = h->pixel_shift;
03350 
03351     if(h->deblocking_filter) {
03352         for(mb_x= start_x; mb_x<end_x; mb_x++){
03353             for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
03354                 int mb_xy, mb_type;
03355                 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
03356                 h->slice_num= h->slice_table[mb_xy];
03357                 mb_type= s->current_picture.mb_type[mb_xy];
03358                 h->list_count= h->list_counts[mb_xy];
03359 
03360                 if(FRAME_MBAFF)
03361                     h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
03362 
03363                 s->mb_x= mb_x;
03364                 s->mb_y= mb_y;
03365                 dest_y  = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
03366                 dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
03367                 dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);
03368                     //FIXME simplify above
03369 
03370                 if (MB_FIELD) {
03371                     linesize   = h->mb_linesize   = s->linesize * 2;
03372                     uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
03373                     if(mb_y&1){ //FIXME move out of this function?
03374                         dest_y -= s->linesize*15;
03375                         dest_cb-= s->uvlinesize*((8 << CHROMA444)-1);
03376                         dest_cr-= s->uvlinesize*((8 << CHROMA444)-1);
03377                     }
03378                 } else {
03379                     linesize   = h->mb_linesize   = s->linesize;
03380                     uvlinesize = h->mb_uvlinesize = s->uvlinesize;
03381                 }
03382                 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0);
03383                 if(fill_filter_caches(h, mb_type))
03384                     continue;
03385                 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
03386                 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
03387 
03388                 if (FRAME_MBAFF) {
03389                     ff_h264_filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03390                 } else {
03391                     ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03392                 }
03393             }
03394         }
03395     }
03396     h->slice_type= old_slice_type;
03397     s->mb_x= end_x;
03398     s->mb_y= end_mb_y - FRAME_MBAFF;
03399     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
03400     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
03401 }
03402 
03403 static void predict_field_decoding_flag(H264Context *h){
03404     MpegEncContext * const s = &h->s;
03405     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
03406     int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
03407                 ? s->current_picture.mb_type[mb_xy-1]
03408                 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
03409                 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
03410                 : 0;
03411     h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
03412 }
03413 
03417 static void decode_finish_row(H264Context *h){
03418     MpegEncContext * const s = &h->s;
03419     int top = 16*(s->mb_y >> FIELD_PICTURE);
03420     int height = 16 << FRAME_MBAFF;
03421     int deblock_border = (16 + 4) << FRAME_MBAFF;
03422     int pic_height = 16*s->mb_height >> FIELD_PICTURE;
03423 
03424     if (h->deblocking_filter) {
03425         if((top + height) >= pic_height)
03426             height += deblock_border;
03427 
03428         top -= deblock_border;
03429     }
03430 
03431     if (top >= pic_height || (top + height) < h->emu_edge_height)
03432         return;
03433 
03434     height = FFMIN(height, pic_height - top);
03435     if (top < h->emu_edge_height) {
03436         height = top+height;
03437         top = 0;
03438     }
03439 
03440     ff_draw_horiz_band(s, top, height);
03441 
03442     if (s->dropable) return;
03443 
03444     ff_thread_report_progress((AVFrame*)s->current_picture_ptr, top + height - 1,
03445                              s->picture_structure==PICT_BOTTOM_FIELD);
03446 }
03447 
03448 static int decode_slice(struct AVCodecContext *avctx, void *arg){
03449     H264Context *h = *(void**)arg;
03450     MpegEncContext * const s = &h->s;
03451     const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
03452     int lf_x_start = s->mb_x;
03453 
03454     s->mb_skip_run= -1;
03455 
03456     h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
03457                     (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
03458 
03459     if( h->pps.cabac ) {
03460         /* realign */
03461         align_get_bits( &s->gb );
03462 
03463         /* init cabac */
03464         ff_init_cabac_states( &h->cabac);
03465         ff_init_cabac_decoder( &h->cabac,
03466                                s->gb.buffer + get_bits_count(&s->gb)/8,
03467                                (get_bits_left(&s->gb) + 7)/8);
03468 
03469         ff_h264_init_cabac_states(h);
03470 
03471         for(;;){
03472 //START_TIMER
03473             int ret = ff_h264_decode_mb_cabac(h);
03474             int eos;
03475 //STOP_TIMER("decode_mb_cabac")
03476 
03477             if(ret>=0) ff_h264_hl_decode_mb(h);
03478 
03479             if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
03480                 s->mb_y++;
03481 
03482                 ret = ff_h264_decode_mb_cabac(h);
03483 
03484                 if(ret>=0) ff_h264_hl_decode_mb(h);
03485                 s->mb_y--;
03486             }
03487             eos = get_cabac_terminate( &h->cabac );
03488 
03489             if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
03490                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03491                 if (s->mb_x >= lf_x_start) loop_filter(h, lf_x_start, s->mb_x + 1);
03492                 return 0;
03493             }
03494             if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
03495                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
03496                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03497                 return -1;
03498             }
03499 
03500             if( ++s->mb_x >= s->mb_width ) {
03501                 loop_filter(h, lf_x_start, s->mb_x);
03502                 s->mb_x = lf_x_start = 0;
03503                 decode_finish_row(h);
03504                 ++s->mb_y;
03505                 if(FIELD_OR_MBAFF_PICTURE) {
03506                     ++s->mb_y;
03507                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03508                         predict_field_decoding_flag(h);
03509                 }
03510             }
03511 
03512             if( eos || s->mb_y >= s->mb_height ) {
03513                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03514                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03515                 if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03516                 return 0;
03517             }
03518         }
03519 
03520     } else {
03521         for(;;){
03522             int ret = ff_h264_decode_mb_cavlc(h);
03523 
03524             if(ret>=0) ff_h264_hl_decode_mb(h);
03525 
03526             if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
03527                 s->mb_y++;
03528                 ret = ff_h264_decode_mb_cavlc(h);
03529 
03530                 if(ret>=0) ff_h264_hl_decode_mb(h);
03531                 s->mb_y--;
03532             }
03533 
03534             if(ret<0){
03535                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
03536                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03537                 return -1;
03538             }
03539 
03540             if(++s->mb_x >= s->mb_width){
03541                 loop_filter(h, lf_x_start, s->mb_x);
03542                 s->mb_x = lf_x_start = 0;
03543                 decode_finish_row(h);
03544                 ++s->mb_y;
03545                 if(FIELD_OR_MBAFF_PICTURE) {
03546                     ++s->mb_y;
03547                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03548                         predict_field_decoding_flag(h);
03549                 }
03550                 if(s->mb_y >= s->mb_height){
03551                     tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03552 
03553                     if(   get_bits_count(&s->gb) == s->gb.size_in_bits
03554                        || get_bits_count(&s->gb) <  s->gb.size_in_bits && s->avctx->error_recognition < FF_ER_AGGRESSIVE) {
03555                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03556 
03557                         return 0;
03558                     }else{
03559                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y,
03560                                         s->mb_x - 1, s->mb_y,
03561                                         (AC_END|DC_END|MV_END)&part_mask);
03562 
03563                         return -1;
03564                     }
03565                 }
03566             }
03567 
03568             if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
03569                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03570                 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
03571                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03572                     if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03573 
03574                     return 0;
03575                 }else{
03576                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03577 
03578                     return -1;
03579                 }
03580             }
03581         }
03582     }
03583 
03584 #if 0
03585     for(;s->mb_y < s->mb_height; s->mb_y++){
03586         for(;s->mb_x < s->mb_width; s->mb_x++){
03587             int ret= decode_mb(h);
03588 
03589             ff_h264_hl_decode_mb(h);
03590 
03591             if(ret<0){
03592                 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
03593                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03594 
03595                 return -1;
03596             }
03597 
03598             if(++s->mb_x >= s->mb_width){
03599                 s->mb_x=0;
03600                 if(++s->mb_y >= s->mb_height){
03601                     if(get_bits_count(s->gb) == s->gb.size_in_bits){
03602                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03603 
03604                         return 0;
03605                     }else{
03606                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03607 
03608                         return -1;
03609                     }
03610                 }
03611             }
03612 
03613             if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
03614                 if(get_bits_count(s->gb) == s->gb.size_in_bits){
03615                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
03616 
03617                     return 0;
03618                 }else{
03619                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
03620 
03621                     return -1;
03622                 }
03623             }
03624         }
03625         s->mb_x=0;
03626         ff_draw_horiz_band(s, 16*s->mb_y, 16);
03627     }
03628 #endif
03629     return -1; //not reached
03630 }
03631 
03638 static void execute_decode_slices(H264Context *h, int context_count){
03639     MpegEncContext * const s = &h->s;
03640     AVCodecContext * const avctx= s->avctx;
03641     H264Context *hx;
03642     int i;
03643 
03644     if (s->avctx->hwaccel)
03645         return;
03646     if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03647         return;
03648     if(context_count == 1) {
03649         decode_slice(avctx, &h);
03650     } else {
03651         for(i = 1; i < context_count; i++) {
03652             hx = h->thread_context[i];
03653             hx->s.error_recognition = avctx->error_recognition;
03654             hx->s.error_count = 0;
03655             hx->x264_build= h->x264_build;
03656         }
03657 
03658         avctx->execute(avctx, (void *)decode_slice,
03659                        h->thread_context, NULL, context_count, sizeof(void*));
03660 
03661         /* pull back stuff from slices to master context */
03662         hx = h->thread_context[context_count - 1];
03663         s->mb_x = hx->s.mb_x;
03664         s->mb_y = hx->s.mb_y;
03665         s->dropable = hx->s.dropable;
03666         s->picture_structure = hx->s.picture_structure;
03667         for(i = 1; i < context_count; i++)
03668             h->s.error_count += h->thread_context[i]->s.error_count;
03669     }
03670 }
03671 
03672 
03673 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
03674     MpegEncContext * const s = &h->s;
03675     AVCodecContext * const avctx= s->avctx;
03676     H264Context *hx; 
03677     int buf_index;
03678     int context_count;
03679     int next_avc;
03680     int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
03681     int nals_needed=0; 
03682     int nal_index;
03683 
03684     h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1;
03685     if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
03686         h->current_slice = 0;
03687         if (!s->first_field)
03688             s->current_picture_ptr= NULL;
03689         ff_h264_reset_sei(h);
03690     }
03691 
03692     for(;pass <= 1;pass++){
03693         buf_index = 0;
03694         context_count = 0;
03695         next_avc = h->is_avc ? 0 : buf_size;
03696         nal_index = 0;
03697     for(;;){
03698         int consumed;
03699         int dst_length;
03700         int bit_length;
03701         const uint8_t *ptr;
03702         int i, nalsize = 0;
03703         int err;
03704 
03705         if(buf_index >= next_avc) {
03706             if(buf_index >= buf_size) break;
03707             nalsize = 0;
03708             for(i = 0; i < h->nal_length_size; i++)
03709                 nalsize = (nalsize << 8) | buf[buf_index++];
03710             if(nalsize <= 0 || nalsize > buf_size - buf_index){
03711                 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
03712                 break;
03713             }
03714             next_avc= buf_index + nalsize;
03715         } else {
03716             // start code prefix search
03717             for(; buf_index + 3 < next_avc; buf_index++){
03718                 // This should always succeed in the first iteration.
03719                 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
03720                     break;
03721             }
03722 
03723 
03724             if (buf_index + 3 >= buf_size) {
03725                 buf_index = buf_size;
03726                 break;
03727             }
03728 
03729             buf_index+=3;
03730             if(buf_index >= next_avc) continue;
03731         }
03732 
03733         hx = h->thread_context[context_count];
03734 
03735         ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
03736         if (ptr==NULL || dst_length < 0){
03737             return -1;
03738         }
03739         i= buf_index + consumed;
03740         if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
03741            buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
03742             s->workaround_bugs |= FF_BUG_TRUNCATED;
03743 
03744         if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
03745         while(dst_length > 0 && ptr[dst_length - 1] == 0)
03746             dst_length--;
03747         }
03748         bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
03749 
03750         if(s->avctx->debug&FF_DEBUG_STARTCODE){
03751             av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d/%d at %d/%d length %d\n", hx->nal_unit_type, hx->nal_ref_idc, buf_index, buf_size, dst_length);
03752         }
03753 
03754         if (h->is_avc && (nalsize != consumed) && nalsize){
03755             av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
03756         }
03757 
03758         buf_index += consumed;
03759         nal_index++;
03760 
03761         if(pass == 0) {
03762             // packets can sometimes contain multiple PPS/SPS
03763             // e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely
03764             // if so, when frame threading we can't start the next thread until we've read all of them
03765             switch (hx->nal_unit_type) {
03766                 case NAL_SPS:
03767                 case NAL_PPS:
03768                     nals_needed = nal_index;
03769                     break;
03770                 case NAL_IDR_SLICE:
03771                 case NAL_SLICE:
03772                     init_get_bits(&hx->s.gb, ptr, bit_length);
03773                     if (!get_ue_golomb(&hx->s.gb))
03774                         nals_needed = nal_index;
03775             }
03776             continue;
03777         }
03778 
03779         //FIXME do not discard SEI id
03780         if(
03781 #if FF_API_HURRY_UP
03782            (s->hurry_up == 1 && h->nal_ref_idc  == 0) ||
03783 #endif
03784            (avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0))
03785             continue;
03786 
03787       again:
03788         err = 0;
03789         switch(hx->nal_unit_type){
03790         case NAL_IDR_SLICE:
03791             if (h->nal_unit_type != NAL_IDR_SLICE) {
03792                 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
03793                 return -1;
03794             }
03795             idr(h); //FIXME ensure we don't loose some frames if there is reordering
03796         case NAL_SLICE:
03797             init_get_bits(&hx->s.gb, ptr, bit_length);
03798             hx->intra_gb_ptr=
03799             hx->inter_gb_ptr= &hx->s.gb;
03800             hx->s.data_partitioning = 0;
03801 
03802             if((err = decode_slice_header(hx, h)))
03803                break;
03804 
03805             s->current_picture_ptr->key_frame |=
03806                     (hx->nal_unit_type == NAL_IDR_SLICE) ||
03807                     (h->sei_recovery_frame_cnt >= 0);
03808 
03809             if (h->current_slice == 1) {
03810                 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) {
03811                     decode_postinit(h, nal_index >= nals_needed);
03812                 }
03813 
03814                 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
03815                     return -1;
03816                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03817                     ff_vdpau_h264_picture_start(s);
03818             }
03819 
03820             if(hx->redundant_pic_count==0
03821 #if FF_API_HURRY_UP
03822                && hx->s.hurry_up < 5
03823 #endif
03824                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
03825                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
03826                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
03827                && avctx->skip_frame < AVDISCARD_ALL){
03828                 if(avctx->hwaccel) {
03829                     if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
03830                         return -1;
03831                 }else
03832                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
03833                     static const uint8_t start_code[] = {0x00, 0x00, 0x01};
03834                     ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
03835                     ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
03836                 }else
03837                     context_count++;
03838             }
03839             break;
03840         case NAL_DPA:
03841             init_get_bits(&hx->s.gb, ptr, bit_length);
03842             hx->intra_gb_ptr=
03843             hx->inter_gb_ptr= NULL;
03844 
03845             if ((err = decode_slice_header(hx, h)) < 0)
03846                 break;
03847 
03848             hx->s.data_partitioning = 1;
03849 
03850             break;
03851         case NAL_DPB:
03852             init_get_bits(&hx->intra_gb, ptr, bit_length);
03853             hx->intra_gb_ptr= &hx->intra_gb;
03854             break;
03855         case NAL_DPC:
03856             init_get_bits(&hx->inter_gb, ptr, bit_length);
03857             hx->inter_gb_ptr= &hx->inter_gb;
03858 
03859             if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
03860                && s->current_picture_ptr
03861                && s->context_initialized
03862 #if FF_API_HURRY_UP
03863                && s->hurry_up < 5
03864 #endif
03865                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
03866                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
03867                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
03868                && avctx->skip_frame < AVDISCARD_ALL)
03869                 context_count++;
03870             break;
03871         case NAL_SEI:
03872             init_get_bits(&s->gb, ptr, bit_length);
03873             ff_h264_decode_sei(h);
03874             break;
03875         case NAL_SPS:
03876             init_get_bits(&s->gb, ptr, bit_length);
03877             ff_h264_decode_seq_parameter_set(h);
03878 
03879             if (s->flags & CODEC_FLAG_LOW_DELAY ||
03880                 (h->sps.bitstream_restriction_flag &&
03881                  !h->sps.num_reorder_frames)) {
03882                 if (s->avctx->has_b_frames > 1 || h->delayed_pic[0])
03883                     av_log(avctx, AV_LOG_WARNING, "Delayed frames seen "
03884                            "reenabling low delay requires a codec "
03885                            "flush.\n");
03886                 else
03887                     s->low_delay = 1;
03888             }
03889 
03890             if(avctx->has_b_frames < 2)
03891                 avctx->has_b_frames= !s->low_delay;
03892 
03893             if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {
03894                 if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
03895                     avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
03896                     h->pixel_shift = h->sps.bit_depth_luma > 8;
03897 
03898                     ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
03899                     ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
03900                     dsputil_init(&s->dsp, s->avctx);
03901                 } else {
03902                     av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
03903                     return -1;
03904                 }
03905             }
03906             break;
03907         case NAL_PPS:
03908             init_get_bits(&s->gb, ptr, bit_length);
03909 
03910             ff_h264_decode_picture_parameter_set(h, bit_length);
03911 
03912             break;
03913         case NAL_AUD:
03914         case NAL_END_SEQUENCE:
03915         case NAL_END_STREAM:
03916         case NAL_FILLER_DATA:
03917         case NAL_SPS_EXT:
03918         case NAL_AUXILIARY_SLICE:
03919             break;
03920         default:
03921             av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
03922         }
03923 
03924         if(context_count == h->max_contexts) {
03925             execute_decode_slices(h, context_count);
03926             context_count = 0;
03927         }
03928 
03929         if (err < 0)
03930             av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
03931         else if(err == 1) {
03932             /* Slice could not be decoded in parallel mode, copy down
03933              * NAL unit stuff to context 0 and restart. Note that
03934              * rbsp_buffer is not transferred, but since we no longer
03935              * run in parallel mode this should not be an issue. */
03936             h->nal_unit_type = hx->nal_unit_type;
03937             h->nal_ref_idc   = hx->nal_ref_idc;
03938             hx = h;
03939             goto again;
03940         }
03941     }
03942     }
03943     if(context_count)
03944         execute_decode_slices(h, context_count);
03945     return buf_index;
03946 }
03947 
03951 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
03952         if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
03953         if(pos+10>buf_size) pos=buf_size; // oops ;)
03954 
03955         return pos;
03956 }
03957 
03958 static int decode_frame(AVCodecContext *avctx,
03959                              void *data, int *data_size,
03960                              AVPacket *avpkt)
03961 {
03962     const uint8_t *buf = avpkt->data;
03963     int buf_size = avpkt->size;
03964     H264Context *h = avctx->priv_data;
03965     MpegEncContext *s = &h->s;
03966     AVFrame *pict = data;
03967     int buf_index;
03968 
03969     s->flags= avctx->flags;
03970     s->flags2= avctx->flags2;
03971 
03972    /* end of stream, output what is still in the buffers */
03973  out:
03974     if (buf_size == 0) {
03975         Picture *out;
03976         int i, out_idx;
03977 
03978         s->current_picture_ptr = NULL;
03979 
03980 //FIXME factorize this with the output code below
03981         out = h->delayed_pic[0];
03982         out_idx = 0;
03983         for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
03984             if(h->delayed_pic[i]->poc < out->poc){
03985                 out = h->delayed_pic[i];
03986                 out_idx = i;
03987             }
03988 
03989         for(i=out_idx; h->delayed_pic[i]; i++)
03990             h->delayed_pic[i] = h->delayed_pic[i+1];
03991 
03992         if(out){
03993             *data_size = sizeof(AVFrame);
03994             *pict= *(AVFrame*)out;
03995         }
03996 
03997         return 0;
03998     }
03999 
04000     buf_index=decode_nal_units(h, buf, buf_size);
04001     if(buf_index < 0)
04002         return -1;
04003 
04004     if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
04005         buf_size = 0;
04006         goto out;
04007     }
04008 
04009     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
04010         if (avctx->skip_frame >= AVDISCARD_NONREF
04011 #if FF_API_HURRY_UP
04012                 || s->hurry_up
04013 #endif
04014            )
04015             return 0;
04016         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
04017         return -1;
04018     }
04019 
04020     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
04021 
04022         if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1);
04023 
04024         field_end(h, 0);
04025 
04026         if (!h->next_output_pic) {
04027             /* Wait for second field. */
04028             *data_size = 0;
04029 
04030         } else {
04031             *data_size = sizeof(AVFrame);
04032             *pict = *(AVFrame*)h->next_output_pic;
04033         }
04034     }
04035 
04036     assert(pict->data[0] || !*data_size);
04037     ff_print_debug_info(s, pict);
04038 //printf("out %d\n", (int)pict->data[0]);
04039 
04040     return get_consumed_bytes(s, buf_index, buf_size);
04041 }
04042 #if 0
04043 static inline void fill_mb_avail(H264Context *h){
04044     MpegEncContext * const s = &h->s;
04045     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
04046 
04047     if(s->mb_y){
04048         h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
04049         h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
04050         h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
04051     }else{
04052         h->mb_avail[0]=
04053         h->mb_avail[1]=
04054         h->mb_avail[2]= 0;
04055     }
04056     h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
04057     h->mb_avail[4]= 1; //FIXME move out
04058     h->mb_avail[5]= 0; //FIXME move out
04059 }
04060 #endif
04061 
04062 #ifdef TEST
04063 #undef printf
04064 #undef random
04065 #define COUNT 8000
04066 #define SIZE (COUNT*40)
04067 int main(void){
04068     int i;
04069     uint8_t temp[SIZE];
04070     PutBitContext pb;
04071     GetBitContext gb;
04072 //    int int_temp[10000];
04073     DSPContext dsp;
04074     AVCodecContext avctx;
04075 
04076     dsputil_init(&dsp, &avctx);
04077 
04078     init_put_bits(&pb, temp, SIZE);
04079     printf("testing unsigned exp golomb\n");
04080     for(i=0; i<COUNT; i++){
04081         START_TIMER
04082         set_ue_golomb(&pb, i);
04083         STOP_TIMER("set_ue_golomb");
04084     }
04085     flush_put_bits(&pb);
04086 
04087     init_get_bits(&gb, temp, 8*SIZE);
04088     for(i=0; i<COUNT; i++){
04089         int j, s;
04090 
04091         s= show_bits(&gb, 24);
04092 
04093         START_TIMER
04094         j= get_ue_golomb(&gb);
04095         if(j != i){
04096             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04097 //            return -1;
04098         }
04099         STOP_TIMER("get_ue_golomb");
04100     }
04101 
04102 
04103     init_put_bits(&pb, temp, SIZE);
04104     printf("testing signed exp golomb\n");
04105     for(i=0; i<COUNT; i++){
04106         START_TIMER
04107         set_se_golomb(&pb, i - COUNT/2);
04108         STOP_TIMER("set_se_golomb");
04109     }
04110     flush_put_bits(&pb);
04111 
04112     init_get_bits(&gb, temp, 8*SIZE);
04113     for(i=0; i<COUNT; i++){
04114         int j, s;
04115 
04116         s= show_bits(&gb, 24);
04117 
04118         START_TIMER
04119         j= get_se_golomb(&gb);
04120         if(j != i - COUNT/2){
04121             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04122 //            return -1;
04123         }
04124         STOP_TIMER("get_se_golomb");
04125     }
04126 
04127 #if 0
04128     printf("testing 4x4 (I)DCT\n");
04129 
04130     DCTELEM block[16];
04131     uint8_t src[16], ref[16];
04132     uint64_t error= 0, max_error=0;
04133 
04134     for(i=0; i<COUNT; i++){
04135         int j;
04136 //        printf("%d %d %d\n", r1, r2, (r2-r1)*16);
04137         for(j=0; j<16; j++){
04138             ref[j]= random()%255;
04139             src[j]= random()%255;
04140         }
04141 
04142         h264_diff_dct_c(block, src, ref, 4);
04143 
04144         //normalize
04145         for(j=0; j<16; j++){
04146 //            printf("%d ", block[j]);
04147             block[j]= block[j]*4;
04148             if(j&1) block[j]= (block[j]*4 + 2)/5;
04149             if(j&4) block[j]= (block[j]*4 + 2)/5;
04150         }
04151 //        printf("\n");
04152 
04153         h->h264dsp.h264_idct_add(ref, block, 4);
04154 /*        for(j=0; j<16; j++){
04155             printf("%d ", ref[j]);
04156         }
04157         printf("\n");*/
04158 
04159         for(j=0; j<16; j++){
04160             int diff= FFABS(src[j] - ref[j]);
04161 
04162             error+= diff*diff;
04163             max_error= FFMAX(max_error, diff);
04164         }
04165     }
04166     printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
04167     printf("testing quantizer\n");
04168     for(qp=0; qp<52; qp++){
04169         for(i=0; i<16; i++)
04170             src1_block[i]= src2_block[i]= random()%255;
04171 
04172     }
04173     printf("Testing NAL layer\n");
04174 
04175     uint8_t bitstream[COUNT];
04176     uint8_t nal[COUNT*2];
04177     H264Context h;
04178     memset(&h, 0, sizeof(H264Context));
04179 
04180     for(i=0; i<COUNT; i++){
04181         int zeros= i;
04182         int nal_length;
04183         int consumed;
04184         int out_length;
04185         uint8_t *out;
04186         int j;
04187 
04188         for(j=0; j<COUNT; j++){
04189             bitstream[j]= (random() % 255) + 1;
04190         }
04191 
04192         for(j=0; j<zeros; j++){
04193             int pos= random() % COUNT;
04194             while(bitstream[pos] == 0){
04195                 pos++;
04196                 pos %= COUNT;
04197             }
04198             bitstream[pos]=0;
04199         }
04200 
04201         START_TIMER
04202 
04203         nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
04204         if(nal_length<0){
04205             printf("encoding failed\n");
04206             return -1;
04207         }
04208 
04209         out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
04210 
04211         STOP_TIMER("NAL")
04212 
04213         if(out_length != COUNT){
04214             printf("incorrect length %d %d\n", out_length, COUNT);
04215             return -1;
04216         }
04217 
04218         if(consumed != nal_length){
04219             printf("incorrect consumed length %d %d\n", nal_length, consumed);
04220             return -1;
04221         }
04222 
04223         if(memcmp(bitstream, out, COUNT)){
04224             printf("mismatch\n");
04225             return -1;
04226         }
04227     }
04228 #endif
04229 
04230     printf("Testing RBSP\n");
04231 
04232 
04233     return 0;
04234 }
04235 #endif /* TEST */
04236 
04237 
04238 av_cold void ff_h264_free_context(H264Context *h)
04239 {
04240     int i;
04241 
04242     free_tables(h, 1); //FIXME cleanup init stuff perhaps
04243 
04244     for(i = 0; i < MAX_SPS_COUNT; i++)
04245         av_freep(h->sps_buffers + i);
04246 
04247     for(i = 0; i < MAX_PPS_COUNT; i++)
04248         av_freep(h->pps_buffers + i);
04249 }
04250 
04251 av_cold int ff_h264_decode_end(AVCodecContext *avctx)
04252 {
04253     H264Context *h = avctx->priv_data;
04254     MpegEncContext *s = &h->s;
04255 
04256     ff_h264_free_context(h);
04257 
04258     MPV_common_end(s);
04259 
04260 //    memset(h, 0, sizeof(H264Context));
04261 
04262     return 0;
04263 }
04264 
04265 static const AVProfile profiles[] = {
04266     { FF_PROFILE_H264_BASELINE,             "Baseline"              },
04267     { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
04268     { FF_PROFILE_H264_MAIN,                 "Main"                  },
04269     { FF_PROFILE_H264_EXTENDED,             "Extended"              },
04270     { FF_PROFILE_H264_HIGH,                 "High"                  },
04271     { FF_PROFILE_H264_HIGH_10,              "High 10"               },
04272     { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
04273     { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
04274     { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
04275     { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
04276     { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
04277     { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
04278     { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
04279     { FF_PROFILE_UNKNOWN },
04280 };
04281 
04282 AVCodec ff_h264_decoder = {
04283     "h264",
04284     AVMEDIA_TYPE_VIDEO,
04285     CODEC_ID_H264,
04286     sizeof(H264Context),
04287     ff_h264_decode_init,
04288     NULL,
04289     ff_h264_decode_end,
04290     decode_frame,
04291     /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY |
04292         CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
04293     .flush= flush_dpb,
04294     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
04295     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
04296     .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
04297     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04298 };
04299 
04300 #if CONFIG_H264_VDPAU_DECODER
04301 AVCodec ff_h264_vdpau_decoder = {
04302     "h264_vdpau",
04303     AVMEDIA_TYPE_VIDEO,
04304     CODEC_ID_H264,
04305     sizeof(H264Context),
04306     ff_h264_decode_init,
04307     NULL,
04308     ff_h264_decode_end,
04309     decode_frame,
04310     CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
04311     .flush= flush_dpb,
04312     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
04313     .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},
04314     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04315 };
04316 #endif

Generated on Fri Feb 22 2013 07:24:26 for FFmpeg by  doxygen 1.7.1