• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • Examples
  • File List
  • Globals

libavcodec/vp8.c

Go to the documentation of this file.
00001 
00025 #include "libavutil/imgutils.h"
00026 #include "avcodec.h"
00027 #include "vp8.h"
00028 #include "vp8data.h"
00029 #include "rectangle.h"
00030 #include "thread.h"
00031 
00032 #if ARCH_ARM
00033 #   include "arm/vp8.h"
00034 #endif
00035 
00036 static void free_buffers(VP8Context *s)
00037 {
00038     av_freep(&s->macroblocks_base);
00039     av_freep(&s->filter_strength);
00040     av_freep(&s->intra4x4_pred_mode_top);
00041     av_freep(&s->top_nnz);
00042     av_freep(&s->edge_emu_buffer);
00043     av_freep(&s->top_border);
00044     av_freep(&s->segmentation_map);
00045 
00046     s->macroblocks = NULL;
00047 }
00048 
00049 static void vp8_decode_flush(AVCodecContext *avctx)
00050 {
00051     VP8Context *s = avctx->priv_data;
00052     int i;
00053 
00054     if (!avctx->is_copy) {
00055         for (i = 0; i < 5; i++)
00056             if (s->frames[i].data[0])
00057                 ff_thread_release_buffer(avctx, &s->frames[i]);
00058     }
00059     memset(s->framep, 0, sizeof(s->framep));
00060 
00061     free_buffers(s);
00062 }
00063 
00064 static int update_dimensions(VP8Context *s, int width, int height)
00065 {
00066     if (width  != s->avctx->width ||
00067         height != s->avctx->height) {
00068         if (av_image_check_size(width, height, 0, s->avctx))
00069             return AVERROR_INVALIDDATA;
00070 
00071         vp8_decode_flush(s->avctx);
00072 
00073         avcodec_set_dimensions(s->avctx, width, height);
00074     }
00075 
00076     s->mb_width  = (s->avctx->coded_width +15) / 16;
00077     s->mb_height = (s->avctx->coded_height+15) / 16;
00078 
00079     s->macroblocks_base        = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
00080     s->filter_strength         = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
00081     s->intra4x4_pred_mode_top  = av_mallocz(s->mb_width*4);
00082     s->top_nnz                 = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
00083     s->top_border              = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
00084     s->segmentation_map        = av_mallocz(s->mb_width*s->mb_height);
00085 
00086     if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
00087         !s->top_nnz || !s->top_border || !s->segmentation_map)
00088         return AVERROR(ENOMEM);
00089 
00090     s->macroblocks        = s->macroblocks_base + 1;
00091 
00092     return 0;
00093 }
00094 
00095 static void parse_segment_info(VP8Context *s)
00096 {
00097     VP56RangeCoder *c = &s->c;
00098     int i;
00099 
00100     s->segmentation.update_map = vp8_rac_get(c);
00101 
00102     if (vp8_rac_get(c)) { // update segment feature data
00103         s->segmentation.absolute_vals = vp8_rac_get(c);
00104 
00105         for (i = 0; i < 4; i++)
00106             s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
00107 
00108         for (i = 0; i < 4; i++)
00109             s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
00110     }
00111     if (s->segmentation.update_map)
00112         for (i = 0; i < 3; i++)
00113             s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
00114 }
00115 
00116 static void update_lf_deltas(VP8Context *s)
00117 {
00118     VP56RangeCoder *c = &s->c;
00119     int i;
00120 
00121     for (i = 0; i < 4; i++)
00122         s->lf_delta.ref[i]  = vp8_rac_get_sint(c, 6);
00123 
00124     for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
00125         s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
00126 }
00127 
00128 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
00129 {
00130     const uint8_t *sizes = buf;
00131     int i;
00132 
00133     s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
00134 
00135     buf      += 3*(s->num_coeff_partitions-1);
00136     buf_size -= 3*(s->num_coeff_partitions-1);
00137     if (buf_size < 0)
00138         return -1;
00139 
00140     for (i = 0; i < s->num_coeff_partitions-1; i++) {
00141         int size = AV_RL24(sizes + 3*i);
00142         if (buf_size - size < 0)
00143             return -1;
00144 
00145         ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
00146         buf      += size;
00147         buf_size -= size;
00148     }
00149     ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
00150 
00151     return 0;
00152 }
00153 
00154 static void get_quants(VP8Context *s)
00155 {
00156     VP56RangeCoder *c = &s->c;
00157     int i, base_qi;
00158 
00159     int yac_qi     = vp8_rac_get_uint(c, 7);
00160     int ydc_delta  = vp8_rac_get_sint(c, 4);
00161     int y2dc_delta = vp8_rac_get_sint(c, 4);
00162     int y2ac_delta = vp8_rac_get_sint(c, 4);
00163     int uvdc_delta = vp8_rac_get_sint(c, 4);
00164     int uvac_delta = vp8_rac_get_sint(c, 4);
00165 
00166     for (i = 0; i < 4; i++) {
00167         if (s->segmentation.enabled) {
00168             base_qi = s->segmentation.base_quant[i];
00169             if (!s->segmentation.absolute_vals)
00170                 base_qi += yac_qi;
00171         } else
00172             base_qi = yac_qi;
00173 
00174         s->qmat[i].luma_qmul[0]    =       vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
00175         s->qmat[i].luma_qmul[1]    =       vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
00176         s->qmat[i].luma_dc_qmul[0] =   2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
00177         s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
00178         s->qmat[i].chroma_qmul[0]  =       vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
00179         s->qmat[i].chroma_qmul[1]  =       vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
00180 
00181         s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
00182         s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
00183     }
00184 }
00185 
00199 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
00200 {
00201     VP56RangeCoder *c = &s->c;
00202 
00203     if (update)
00204         return VP56_FRAME_CURRENT;
00205 
00206     switch (vp8_rac_get_uint(c, 2)) {
00207     case 1:
00208         return VP56_FRAME_PREVIOUS;
00209     case 2:
00210         return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
00211     }
00212     return VP56_FRAME_NONE;
00213 }
00214 
00215 static void update_refs(VP8Context *s)
00216 {
00217     VP56RangeCoder *c = &s->c;
00218 
00219     int update_golden = vp8_rac_get(c);
00220     int update_altref = vp8_rac_get(c);
00221 
00222     s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
00223     s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
00224 }
00225 
00226 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
00227 {
00228     VP56RangeCoder *c = &s->c;
00229     int header_size, hscale, vscale, i, j, k, l, m, ret;
00230     int width  = s->avctx->width;
00231     int height = s->avctx->height;
00232 
00233     s->keyframe  = !(buf[0] & 1);
00234     s->profile   =  (buf[0]>>1) & 7;
00235     s->invisible = !(buf[0] & 0x10);
00236     header_size  = AV_RL24(buf) >> 5;
00237     buf      += 3;
00238     buf_size -= 3;
00239 
00240     if (s->profile > 3)
00241         av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
00242 
00243     if (!s->profile)
00244         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
00245     else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
00246         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
00247 
00248     if (header_size > buf_size - 7*s->keyframe) {
00249         av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
00250         return AVERROR_INVALIDDATA;
00251     }
00252 
00253     if (s->keyframe) {
00254         if (AV_RL24(buf) != 0x2a019d) {
00255             av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
00256             return AVERROR_INVALIDDATA;
00257         }
00258         width  = AV_RL16(buf+3) & 0x3fff;
00259         height = AV_RL16(buf+5) & 0x3fff;
00260         hscale = buf[4] >> 6;
00261         vscale = buf[6] >> 6;
00262         buf      += 7;
00263         buf_size -= 7;
00264 
00265         if (hscale || vscale)
00266             av_log_missing_feature(s->avctx, "Upscaling", 1);
00267 
00268         s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
00269         for (i = 0; i < 4; i++)
00270             for (j = 0; j < 16; j++)
00271                 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
00272                        sizeof(s->prob->token[i][j]));
00273         memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
00274         memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
00275         memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
00276         memset(&s->segmentation, 0, sizeof(s->segmentation));
00277         memset(&s->lf_delta, 0, sizeof(s->lf_delta));
00278     }
00279 
00280     if (!s->macroblocks_base || /* first frame */
00281         width != s->avctx->width || height != s->avctx->height) {
00282         if ((ret = update_dimensions(s, width, height)) < 0)
00283             return ret;
00284     }
00285 
00286     ff_vp56_init_range_decoder(c, buf, header_size);
00287     buf      += header_size;
00288     buf_size -= header_size;
00289 
00290     if (s->keyframe) {
00291         if (vp8_rac_get(c))
00292             av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
00293         vp8_rac_get(c); // whether we can skip clamping in dsp functions
00294     }
00295 
00296     if ((s->segmentation.enabled = vp8_rac_get(c)))
00297         parse_segment_info(s);
00298     else
00299         s->segmentation.update_map = 0; // FIXME: move this to some init function?
00300 
00301     s->filter.simple    = vp8_rac_get(c);
00302     s->filter.level     = vp8_rac_get_uint(c, 6);
00303     s->filter.sharpness = vp8_rac_get_uint(c, 3);
00304 
00305     if ((s->lf_delta.enabled = vp8_rac_get(c)))
00306         if (vp8_rac_get(c))
00307             update_lf_deltas(s);
00308 
00309     if (setup_partitions(s, buf, buf_size)) {
00310         av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
00311         return AVERROR_INVALIDDATA;
00312     }
00313 
00314     get_quants(s);
00315 
00316     if (!s->keyframe) {
00317         update_refs(s);
00318         s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
00319         s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
00320     }
00321 
00322     // if we aren't saving this frame's probabilities for future frames,
00323     // make a copy of the current probabilities
00324     if (!(s->update_probabilities = vp8_rac_get(c)))
00325         s->prob[1] = s->prob[0];
00326 
00327     s->update_last = s->keyframe || vp8_rac_get(c);
00328 
00329     for (i = 0; i < 4; i++)
00330         for (j = 0; j < 8; j++)
00331             for (k = 0; k < 3; k++)
00332                 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
00333                     if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
00334                         int prob = vp8_rac_get_uint(c, 8);
00335                         for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
00336                             s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
00337                     }
00338 
00339     if ((s->mbskip_enabled = vp8_rac_get(c)))
00340         s->prob->mbskip = vp8_rac_get_uint(c, 8);
00341 
00342     if (!s->keyframe) {
00343         s->prob->intra  = vp8_rac_get_uint(c, 8);
00344         s->prob->last   = vp8_rac_get_uint(c, 8);
00345         s->prob->golden = vp8_rac_get_uint(c, 8);
00346 
00347         if (vp8_rac_get(c))
00348             for (i = 0; i < 4; i++)
00349                 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
00350         if (vp8_rac_get(c))
00351             for (i = 0; i < 3; i++)
00352                 s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
00353 
00354         // 17.2 MV probability update
00355         for (i = 0; i < 2; i++)
00356             for (j = 0; j < 19; j++)
00357                 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
00358                     s->prob->mvc[i][j] = vp8_rac_get_nn(c);
00359     }
00360 
00361     return 0;
00362 }
00363 
00364 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
00365 {
00366     dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
00367     dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
00368 }
00369 
00373 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
00374 {
00375     int bit, x = 0;
00376 
00377     if (vp56_rac_get_prob_branchy(c, p[0])) {
00378         int i;
00379 
00380         for (i = 0; i < 3; i++)
00381             x += vp56_rac_get_prob(c, p[9 + i]) << i;
00382         for (i = 9; i > 3; i--)
00383             x += vp56_rac_get_prob(c, p[9 + i]) << i;
00384         if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
00385             x += 8;
00386     } else {
00387         // small_mvtree
00388         const uint8_t *ps = p+2;
00389         bit = vp56_rac_get_prob(c, *ps);
00390         ps += 1 + 3*bit;
00391         x  += 4*bit;
00392         bit = vp56_rac_get_prob(c, *ps);
00393         ps += 1 + bit;
00394         x  += 2*bit;
00395         x  += vp56_rac_get_prob(c, *ps);
00396     }
00397 
00398     return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
00399 }
00400 
00401 static av_always_inline
00402 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
00403 {
00404     if (left == top)
00405         return vp8_submv_prob[4-!!left];
00406     if (!top)
00407         return vp8_submv_prob[2];
00408     return vp8_submv_prob[1-!!left];
00409 }
00410 
00415 static av_always_inline
00416 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
00417 {
00418     int part_idx;
00419     int n, num;
00420     VP8Macroblock *top_mb  = &mb[2];
00421     VP8Macroblock *left_mb = &mb[-1];
00422     const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
00423                   *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
00424                   *mbsplits_cur, *firstidx;
00425     VP56mv *top_mv  = top_mb->bmv;
00426     VP56mv *left_mv = left_mb->bmv;
00427     VP56mv *cur_mv  = mb->bmv;
00428 
00429     if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
00430         if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
00431             part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
00432         } else {
00433             part_idx = VP8_SPLITMVMODE_8x8;
00434         }
00435     } else {
00436         part_idx = VP8_SPLITMVMODE_4x4;
00437     }
00438 
00439     num = vp8_mbsplit_count[part_idx];
00440     mbsplits_cur = vp8_mbsplits[part_idx],
00441     firstidx = vp8_mbfirstidx[part_idx];
00442     mb->partitioning = part_idx;
00443 
00444     for (n = 0; n < num; n++) {
00445         int k = firstidx[n];
00446         uint32_t left, above;
00447         const uint8_t *submv_prob;
00448 
00449         if (!(k & 3))
00450             left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
00451         else
00452             left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
00453         if (k <= 3)
00454             above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
00455         else
00456             above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
00457 
00458         submv_prob = get_submv_prob(left, above);
00459 
00460         if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
00461             if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
00462                 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
00463                     mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
00464                     mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
00465                 } else {
00466                     AV_ZERO32(&mb->bmv[n]);
00467                 }
00468             } else {
00469                 AV_WN32A(&mb->bmv[n], above);
00470             }
00471         } else {
00472             AV_WN32A(&mb->bmv[n], left);
00473         }
00474     }
00475 
00476     return num;
00477 }
00478 
00479 static av_always_inline
00480 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
00481 {
00482     VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
00483                                   mb - 1 /* left */,
00484                                   mb + 1 /* top-left */ };
00485     enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
00486     enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
00487     int idx = CNT_ZERO;
00488     int cur_sign_bias = s->sign_bias[mb->ref_frame];
00489     int8_t *sign_bias = s->sign_bias;
00490     VP56mv near_mv[4];
00491     uint8_t cnt[4] = { 0 };
00492     VP56RangeCoder *c = &s->c;
00493 
00494     AV_ZERO32(&near_mv[0]);
00495     AV_ZERO32(&near_mv[1]);
00496     AV_ZERO32(&near_mv[2]);
00497 
00498     /* Process MB on top, left and top-left */
00499     #define MV_EDGE_CHECK(n)\
00500     {\
00501         VP8Macroblock *edge = mb_edge[n];\
00502         int edge_ref = edge->ref_frame;\
00503         if (edge_ref != VP56_FRAME_CURRENT) {\
00504             uint32_t mv = AV_RN32A(&edge->mv);\
00505             if (mv) {\
00506                 if (cur_sign_bias != sign_bias[edge_ref]) {\
00507                     /* SWAR negate of the values in mv. */\
00508                     mv = ~mv;\
00509                     mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
00510                 }\
00511                 if (!n || mv != AV_RN32A(&near_mv[idx]))\
00512                     AV_WN32A(&near_mv[++idx], mv);\
00513                 cnt[idx]      += 1 + (n != 2);\
00514             } else\
00515                 cnt[CNT_ZERO] += 1 + (n != 2);\
00516         }\
00517     }
00518 
00519     MV_EDGE_CHECK(0)
00520     MV_EDGE_CHECK(1)
00521     MV_EDGE_CHECK(2)
00522 
00523     mb->partitioning = VP8_SPLITMVMODE_NONE;
00524     if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
00525         mb->mode = VP8_MVMODE_MV;
00526 
00527         /* If we have three distinct MVs, merge first and last if they're the same */
00528         if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
00529             cnt[CNT_NEAREST] += 1;
00530 
00531         /* Swap near and nearest if necessary */
00532         if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
00533             FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
00534             FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
00535         }
00536 
00537         if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
00538             if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
00539 
00540                 /* Choose the best mv out of 0,0 and the nearest mv */
00541                 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
00542                 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
00543                                     (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
00544                                     (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
00545 
00546                 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
00547                     mb->mode = VP8_MVMODE_SPLIT;
00548                     mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
00549                 } else {
00550                     mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
00551                     mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
00552                     mb->bmv[0] = mb->mv;
00553                 }
00554             } else {
00555                 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
00556                 mb->bmv[0] = mb->mv;
00557             }
00558         } else {
00559             clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
00560             mb->bmv[0] = mb->mv;
00561         }
00562     } else {
00563         mb->mode = VP8_MVMODE_ZERO;
00564         AV_ZERO32(&mb->mv);
00565         mb->bmv[0] = mb->mv;
00566     }
00567 }
00568 
00569 static av_always_inline
00570 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
00571                            int mb_x, int keyframe)
00572 {
00573     uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
00574     if (keyframe) {
00575         int x, y;
00576         uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
00577         uint8_t* const left = s->intra4x4_pred_mode_left;
00578         for (y = 0; y < 4; y++) {
00579             for (x = 0; x < 4; x++) {
00580                 const uint8_t *ctx;
00581                 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
00582                 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
00583                 left[y] = top[x] = *intra4x4;
00584                 intra4x4++;
00585             }
00586         }
00587     } else {
00588         int i;
00589         for (i = 0; i < 16; i++)
00590             intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
00591     }
00592 }
00593 
00594 static av_always_inline
00595 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
00596 {
00597     VP56RangeCoder *c = &s->c;
00598 
00599     if (s->segmentation.update_map)
00600         *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
00601     else
00602         *segment = ref ? *ref : *segment;
00603     s->segment = *segment;
00604 
00605     mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
00606 
00607     if (s->keyframe) {
00608         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
00609 
00610         if (mb->mode == MODE_I4x4) {
00611             decode_intra4x4_modes(s, c, mb_x, 1);
00612         } else {
00613             const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
00614             AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
00615             AV_WN32A(s->intra4x4_pred_mode_left, modes);
00616         }
00617 
00618         s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
00619         mb->ref_frame = VP56_FRAME_CURRENT;
00620     } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
00621         // inter MB, 16.2
00622         if (vp56_rac_get_prob_branchy(c, s->prob->last))
00623             mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
00624                 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
00625         else
00626             mb->ref_frame = VP56_FRAME_PREVIOUS;
00627         s->ref_count[mb->ref_frame-1]++;
00628 
00629         // motion vectors, 16.3
00630         decode_mvs(s, mb, mb_x, mb_y);
00631     } else {
00632         // intra MB, 16.1
00633         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
00634 
00635         if (mb->mode == MODE_I4x4)
00636             decode_intra4x4_modes(s, c, mb_x, 0);
00637 
00638         s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
00639         mb->ref_frame = VP56_FRAME_CURRENT;
00640         mb->partitioning = VP8_SPLITMVMODE_NONE;
00641         AV_ZERO32(&mb->bmv[0]);
00642     }
00643 }
00644 
00645 #ifndef decode_block_coeffs_internal
00646 
00655 static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
00656                                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00657                                         int i, uint8_t *token_prob, int16_t qmul[2])
00658 {
00659     goto skip_eob;
00660     do {
00661         int coeff;
00662         if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
00663             return i;
00664 
00665 skip_eob:
00666         if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
00667             if (++i == 16)
00668                 return i; // invalid input; blocks should end with EOB
00669             token_prob = probs[i][0];
00670             goto skip_eob;
00671         }
00672 
00673         if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
00674             coeff = 1;
00675             token_prob = probs[i+1][1];
00676         } else {
00677             if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
00678                 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
00679                 if (coeff)
00680                     coeff += vp56_rac_get_prob(c, token_prob[5]);
00681                 coeff += 2;
00682             } else {
00683                 // DCT_CAT*
00684                 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
00685                     if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
00686                         coeff  = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
00687                     } else {                                    // DCT_CAT2
00688                         coeff  = 7;
00689                         coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
00690                         coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
00691                     }
00692                 } else {    // DCT_CAT3 and up
00693                     int a = vp56_rac_get_prob(c, token_prob[8]);
00694                     int b = vp56_rac_get_prob(c, token_prob[9+a]);
00695                     int cat = (a<<1) + b;
00696                     coeff  = 3 + (8<<cat);
00697                     coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
00698                 }
00699             }
00700             token_prob = probs[i+1][2];
00701         }
00702         block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
00703     } while (++i < 16);
00704 
00705     return i;
00706 }
00707 #endif
00708 
00720 static av_always_inline
00721 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
00722                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00723                         int i, int zero_nhood, int16_t qmul[2])
00724 {
00725     uint8_t *token_prob = probs[i][zero_nhood];
00726     if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
00727         return 0;
00728     return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
00729 }
00730 
00731 static av_always_inline
00732 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
00733                       uint8_t t_nnz[9], uint8_t l_nnz[9])
00734 {
00735     int i, x, y, luma_start = 0, luma_ctx = 3;
00736     int nnz_pred, nnz, nnz_total = 0;
00737     int segment = s->segment;
00738     int block_dc = 0;
00739 
00740     if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
00741         nnz_pred = t_nnz[8] + l_nnz[8];
00742 
00743         // decode DC values and do hadamard
00744         nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
00745                                   s->qmat[segment].luma_dc_qmul);
00746         l_nnz[8] = t_nnz[8] = !!nnz;
00747         if (nnz) {
00748             nnz_total += nnz;
00749             block_dc = 1;
00750             if (nnz == 1)
00751                 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
00752             else
00753                 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
00754         }
00755         luma_start = 1;
00756         luma_ctx = 0;
00757     }
00758 
00759     // luma blocks
00760     for (y = 0; y < 4; y++)
00761         for (x = 0; x < 4; x++) {
00762             nnz_pred = l_nnz[y] + t_nnz[x];
00763             nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
00764                                       nnz_pred, s->qmat[segment].luma_qmul);
00765             // nnz+block_dc may be one more than the actual last index, but we don't care
00766             s->non_zero_count_cache[y][x] = nnz + block_dc;
00767             t_nnz[x] = l_nnz[y] = !!nnz;
00768             nnz_total += nnz;
00769         }
00770 
00771     // chroma blocks
00772     // TODO: what to do about dimensions? 2nd dim for luma is x,
00773     // but for chroma it's (y<<1)|x
00774     for (i = 4; i < 6; i++)
00775         for (y = 0; y < 2; y++)
00776             for (x = 0; x < 2; x++) {
00777                 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
00778                 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
00779                                           nnz_pred, s->qmat[segment].chroma_qmul);
00780                 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
00781                 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
00782                 nnz_total += nnz;
00783             }
00784 
00785     // if there were no coded coeffs despite the macroblock not being marked skip,
00786     // we MUST not do the inner loop filter and should not do IDCT
00787     // Since skip isn't used for bitstream prediction, just manually set it.
00788     if (!nnz_total)
00789         mb->skip = 1;
00790 }
00791 
00792 static av_always_inline
00793 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00794                       int linesize, int uvlinesize, int simple)
00795 {
00796     AV_COPY128(top_border, src_y + 15*linesize);
00797     if (!simple) {
00798         AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
00799         AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
00800     }
00801 }
00802 
00803 static av_always_inline
00804 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00805                     int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
00806                     int simple, int xchg)
00807 {
00808     uint8_t *top_border_m1 = top_border-32;     // for TL prediction
00809     src_y  -=   linesize;
00810     src_cb -= uvlinesize;
00811     src_cr -= uvlinesize;
00812 
00813 #define XCHG(a,b,xchg) do {                     \
00814         if (xchg) AV_SWAP64(b,a);               \
00815         else      AV_COPY64(b,a);               \
00816     } while (0)
00817 
00818     XCHG(top_border_m1+8, src_y-8, xchg);
00819     XCHG(top_border,      src_y,   xchg);
00820     XCHG(top_border+8,    src_y+8, 1);
00821     if (mb_x < mb_width-1)
00822         XCHG(top_border+32, src_y+16, 1);
00823 
00824     // only copy chroma for normal loop filter
00825     // or to initialize the top row to 127
00826     if (!simple || !mb_y) {
00827         XCHG(top_border_m1+16, src_cb-8, xchg);
00828         XCHG(top_border_m1+24, src_cr-8, xchg);
00829         XCHG(top_border+16,    src_cb, 1);
00830         XCHG(top_border+24,    src_cr, 1);
00831     }
00832 }
00833 
00834 static av_always_inline
00835 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
00836 {
00837     if (!mb_x) {
00838         return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
00839     } else {
00840         return mb_y ? mode : LEFT_DC_PRED8x8;
00841     }
00842 }
00843 
00844 static av_always_inline
00845 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
00846 {
00847     if (!mb_x) {
00848         return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
00849     } else {
00850         return mb_y ? mode : HOR_PRED8x8;
00851     }
00852 }
00853 
00854 static av_always_inline
00855 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
00856 {
00857     if (mode == DC_PRED8x8) {
00858         return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00859     } else {
00860         return mode;
00861     }
00862 }
00863 
00864 static av_always_inline
00865 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
00866 {
00867     switch (mode) {
00868     case DC_PRED8x8:
00869         return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00870     case VERT_PRED8x8:
00871         return !mb_y ? DC_127_PRED8x8 : mode;
00872     case HOR_PRED8x8:
00873         return !mb_x ? DC_129_PRED8x8 : mode;
00874     case PLANE_PRED8x8 /*TM*/:
00875         return check_tm_pred8x8_mode(mode, mb_x, mb_y);
00876     }
00877     return mode;
00878 }
00879 
00880 static av_always_inline
00881 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
00882 {
00883     if (!mb_x) {
00884         return mb_y ? VERT_VP8_PRED : DC_129_PRED;
00885     } else {
00886         return mb_y ? mode : HOR_VP8_PRED;
00887     }
00888 }
00889 
00890 static av_always_inline
00891 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
00892 {
00893     switch (mode) {
00894     case VERT_PRED:
00895         if (!mb_x && mb_y) {
00896             *copy_buf = 1;
00897             return mode;
00898         }
00899         /* fall-through */
00900     case DIAG_DOWN_LEFT_PRED:
00901     case VERT_LEFT_PRED:
00902         return !mb_y ? DC_127_PRED : mode;
00903     case HOR_PRED:
00904         if (!mb_y) {
00905             *copy_buf = 1;
00906             return mode;
00907         }
00908         /* fall-through */
00909     case HOR_UP_PRED:
00910         return !mb_x ? DC_129_PRED : mode;
00911     case TM_VP8_PRED:
00912         return check_tm_pred4x4_mode(mode, mb_x, mb_y);
00913     case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
00914     case DIAG_DOWN_RIGHT_PRED:
00915     case VERT_RIGHT_PRED:
00916     case HOR_DOWN_PRED:
00917         if (!mb_y || !mb_x)
00918             *copy_buf = 1;
00919         return mode;
00920     }
00921     return mode;
00922 }
00923 
00924 static av_always_inline
00925 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
00926                    int mb_x, int mb_y)
00927 {
00928     AVCodecContext *avctx = s->avctx;
00929     int x, y, mode, nnz;
00930     uint32_t tr;
00931 
00932     // for the first row, we need to run xchg_mb_border to init the top edge to 127
00933     // otherwise, skip it if we aren't going to deblock
00934     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
00935         xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
00936                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
00937                        s->filter.simple, 1);
00938 
00939     if (mb->mode < MODE_I4x4) {
00940         if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
00941             mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
00942         } else {
00943             mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
00944         }
00945         s->hpc.pred16x16[mode](dst[0], s->linesize);
00946     } else {
00947         uint8_t *ptr = dst[0];
00948         uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
00949         uint8_t tr_top[4] = { 127, 127, 127, 127 };
00950 
00951         // all blocks on the right edge of the macroblock use bottom edge
00952         // the top macroblock for their topright edge
00953         uint8_t *tr_right = ptr - s->linesize + 16;
00954 
00955         // if we're on the right edge of the frame, said edge is extended
00956         // from the top macroblock
00957         if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
00958             mb_x == s->mb_width-1) {
00959             tr = tr_right[-1]*0x01010101u;
00960             tr_right = (uint8_t *)&tr;
00961         }
00962 
00963         if (mb->skip)
00964             AV_ZERO128(s->non_zero_count_cache);
00965 
00966         for (y = 0; y < 4; y++) {
00967             uint8_t *topright = ptr + 4 - s->linesize;
00968             for (x = 0; x < 4; x++) {
00969                 int copy = 0, linesize = s->linesize;
00970                 uint8_t *dst = ptr+4*x;
00971                 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
00972 
00973                 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
00974                     topright = tr_top;
00975                 } else if (x == 3)
00976                     topright = tr_right;
00977 
00978                 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
00979                     mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
00980                     if (copy) {
00981                         dst = copy_dst + 12;
00982                         linesize = 8;
00983                         if (!(mb_y + y)) {
00984                             copy_dst[3] = 127U;
00985                             AV_WN32A(copy_dst+4, 127U * 0x01010101U);
00986                         } else {
00987                             AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
00988                             if (!(mb_x + x)) {
00989                                 copy_dst[3] = 129U;
00990                             } else {
00991                                 copy_dst[3] = ptr[4*x-s->linesize-1];
00992                             }
00993                         }
00994                         if (!(mb_x + x)) {
00995                             copy_dst[11] =
00996                             copy_dst[19] =
00997                             copy_dst[27] =
00998                             copy_dst[35] = 129U;
00999                         } else {
01000                             copy_dst[11] = ptr[4*x              -1];
01001                             copy_dst[19] = ptr[4*x+s->linesize  -1];
01002                             copy_dst[27] = ptr[4*x+s->linesize*2-1];
01003                             copy_dst[35] = ptr[4*x+s->linesize*3-1];
01004                         }
01005                     }
01006                 } else {
01007                     mode = intra4x4[x];
01008                 }
01009                 s->hpc.pred4x4[mode](dst, topright, linesize);
01010                 if (copy) {
01011                     AV_COPY32(ptr+4*x              , copy_dst+12);
01012                     AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
01013                     AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
01014                     AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
01015                 }
01016 
01017                 nnz = s->non_zero_count_cache[y][x];
01018                 if (nnz) {
01019                     if (nnz == 1)
01020                         s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
01021                     else
01022                         s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
01023                 }
01024                 topright += 4;
01025             }
01026 
01027             ptr   += 4*s->linesize;
01028             intra4x4 += 4;
01029         }
01030     }
01031 
01032     if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
01033         mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
01034     } else {
01035         mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
01036     }
01037     s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
01038     s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
01039 
01040     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
01041         xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
01042                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
01043                        s->filter.simple, 0);
01044 }
01045 
01046 static const uint8_t subpel_idx[3][8] = {
01047     { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
01048                                 // also function pointer index
01049     { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
01050     { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
01051 };
01052 
01069 static av_always_inline
01070 void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
01071                  int x_off, int y_off, int block_w, int block_h,
01072                  int width, int height, int linesize,
01073                  vp8_mc_func mc_func[3][3])
01074 {
01075     uint8_t *src = ref->data[0];
01076 
01077     if (AV_RN32A(mv)) {
01078 
01079         int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
01080         int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
01081 
01082         x_off += mv->x >> 2;
01083         y_off += mv->y >> 2;
01084 
01085         // edge emulation
01086         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
01087         src += y_off * linesize + x_off;
01088         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
01089             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01090             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
01091                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01092                                     x_off - mx_idx, y_off - my_idx, width, height);
01093             src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01094         }
01095         mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
01096     } else {
01097         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
01098         mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
01099     }
01100 }
01101 
01119 static av_always_inline
01120 void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
01121                    const VP56mv *mv, int x_off, int y_off,
01122                    int block_w, int block_h, int width, int height, int linesize,
01123                    vp8_mc_func mc_func[3][3])
01124 {
01125     uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
01126 
01127     if (AV_RN32A(mv)) {
01128         int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
01129         int my = mv->y&7, my_idx = subpel_idx[0][my];
01130 
01131         x_off += mv->x >> 3;
01132         y_off += mv->y >> 3;
01133 
01134         // edge emulation
01135         src1 += y_off * linesize + x_off;
01136         src2 += y_off * linesize + x_off;
01137         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
01138         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
01139             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01140             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
01141                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01142                                     x_off - mx_idx, y_off - my_idx, width, height);
01143             src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01144             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01145 
01146             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
01147                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01148                                     x_off - mx_idx, y_off - my_idx, width, height);
01149             src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01150             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01151         } else {
01152             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01153             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01154         }
01155     } else {
01156         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
01157         mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01158         mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01159     }
01160 }
01161 
01162 static av_always_inline
01163 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
01164                  AVFrame *ref_frame, int x_off, int y_off,
01165                  int bx_off, int by_off,
01166                  int block_w, int block_h,
01167                  int width, int height, VP56mv *mv)
01168 {
01169     VP56mv uvmv = *mv;
01170 
01171     /* Y */
01172     vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
01173                 ref_frame, mv, x_off + bx_off, y_off + by_off,
01174                 block_w, block_h, width, height, s->linesize,
01175                 s->put_pixels_tab[block_w == 8]);
01176 
01177     /* U/V */
01178     if (s->profile == 3) {
01179         uvmv.x &= ~7;
01180         uvmv.y &= ~7;
01181     }
01182     x_off   >>= 1; y_off   >>= 1;
01183     bx_off  >>= 1; by_off  >>= 1;
01184     width   >>= 1; height  >>= 1;
01185     block_w >>= 1; block_h >>= 1;
01186     vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
01187                   dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
01188                   &uvmv, x_off + bx_off, y_off + by_off,
01189                   block_w, block_h, width, height, s->uvlinesize,
01190                   s->put_pixels_tab[1 + (block_w == 4)]);
01191 }
01192 
01193 /* Fetch pixels for estimated mv 4 macroblocks ahead.
01194  * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
01195 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
01196 {
01197     /* Don't prefetch refs that haven't been used very often this frame. */
01198     if (s->ref_count[ref-1] > (mb_xy >> 5)) {
01199         int x_off = mb_x << 4, y_off = mb_y << 4;
01200         int mx = (mb->mv.x>>2) + x_off + 8;
01201         int my = (mb->mv.y>>2) + y_off;
01202         uint8_t **src= s->framep[ref]->data;
01203         int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
01204         /* For threading, a ff_thread_await_progress here might be useful, but
01205          * it actually slows down the decoder. Since a bad prefetch doesn't
01206          * generate bad decoder output, we don't run it here. */
01207         s->dsp.prefetch(src[0]+off, s->linesize, 4);
01208         off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
01209         s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
01210     }
01211 }
01212 
01216 static av_always_inline
01217 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
01218                    int mb_x, int mb_y)
01219 {
01220     int x_off = mb_x << 4, y_off = mb_y << 4;
01221     int width = 16*s->mb_width, height = 16*s->mb_height;
01222     AVFrame *ref = s->framep[mb->ref_frame];
01223     VP56mv *bmv = mb->bmv;
01224 
01225     switch (mb->partitioning) {
01226     case VP8_SPLITMVMODE_NONE:
01227         vp8_mc_part(s, dst, ref, x_off, y_off,
01228                     0, 0, 16, 16, width, height, &mb->mv);
01229         break;
01230     case VP8_SPLITMVMODE_4x4: {
01231         int x, y;
01232         VP56mv uvmv;
01233 
01234         /* Y */
01235         for (y = 0; y < 4; y++) {
01236             for (x = 0; x < 4; x++) {
01237                 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
01238                             ref, &bmv[4*y + x],
01239                             4*x + x_off, 4*y + y_off, 4, 4,
01240                             width, height, s->linesize,
01241                             s->put_pixels_tab[2]);
01242             }
01243         }
01244 
01245         /* U/V */
01246         x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
01247         for (y = 0; y < 2; y++) {
01248             for (x = 0; x < 2; x++) {
01249                 uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
01250                          mb->bmv[ 2*y    * 4 + 2*x+1].x +
01251                          mb->bmv[(2*y+1) * 4 + 2*x  ].x +
01252                          mb->bmv[(2*y+1) * 4 + 2*x+1].x;
01253                 uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
01254                          mb->bmv[ 2*y    * 4 + 2*x+1].y +
01255                          mb->bmv[(2*y+1) * 4 + 2*x  ].y +
01256                          mb->bmv[(2*y+1) * 4 + 2*x+1].y;
01257                 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
01258                 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
01259                 if (s->profile == 3) {
01260                     uvmv.x &= ~7;
01261                     uvmv.y &= ~7;
01262                 }
01263                 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
01264                               dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
01265                               4*x + x_off, 4*y + y_off, 4, 4,
01266                               width, height, s->uvlinesize,
01267                               s->put_pixels_tab[2]);
01268             }
01269         }
01270         break;
01271     }
01272     case VP8_SPLITMVMODE_16x8:
01273         vp8_mc_part(s, dst, ref, x_off, y_off,
01274                     0, 0, 16, 8, width, height, &bmv[0]);
01275         vp8_mc_part(s, dst, ref, x_off, y_off,
01276                     0, 8, 16, 8, width, height, &bmv[1]);
01277         break;
01278     case VP8_SPLITMVMODE_8x16:
01279         vp8_mc_part(s, dst, ref, x_off, y_off,
01280                     0, 0, 8, 16, width, height, &bmv[0]);
01281         vp8_mc_part(s, dst, ref, x_off, y_off,
01282                     8, 0, 8, 16, width, height, &bmv[1]);
01283         break;
01284     case VP8_SPLITMVMODE_8x8:
01285         vp8_mc_part(s, dst, ref, x_off, y_off,
01286                     0, 0, 8, 8, width, height, &bmv[0]);
01287         vp8_mc_part(s, dst, ref, x_off, y_off,
01288                     8, 0, 8, 8, width, height, &bmv[1]);
01289         vp8_mc_part(s, dst, ref, x_off, y_off,
01290                     0, 8, 8, 8, width, height, &bmv[2]);
01291         vp8_mc_part(s, dst, ref, x_off, y_off,
01292                     8, 8, 8, 8, width, height, &bmv[3]);
01293         break;
01294     }
01295 }
01296 
01297 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
01298 {
01299     int x, y, ch;
01300 
01301     if (mb->mode != MODE_I4x4) {
01302         uint8_t *y_dst = dst[0];
01303         for (y = 0; y < 4; y++) {
01304             uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
01305             if (nnz4) {
01306                 if (nnz4&~0x01010101) {
01307                     for (x = 0; x < 4; x++) {
01308                         if ((uint8_t)nnz4 == 1)
01309                             s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
01310                         else if((uint8_t)nnz4 > 1)
01311                             s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
01312                         nnz4 >>= 8;
01313                         if (!nnz4)
01314                             break;
01315                     }
01316                 } else {
01317                     s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
01318                 }
01319             }
01320             y_dst += 4*s->linesize;
01321         }
01322     }
01323 
01324     for (ch = 0; ch < 2; ch++) {
01325         uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
01326         if (nnz4) {
01327             uint8_t *ch_dst = dst[1+ch];
01328             if (nnz4&~0x01010101) {
01329                 for (y = 0; y < 2; y++) {
01330                     for (x = 0; x < 2; x++) {
01331                         if ((uint8_t)nnz4 == 1)
01332                             s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
01333                         else if((uint8_t)nnz4 > 1)
01334                             s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
01335                         nnz4 >>= 8;
01336                         if (!nnz4)
01337                             goto chroma_idct_end;
01338                     }
01339                     ch_dst += 4*s->uvlinesize;
01340                 }
01341             } else {
01342                 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
01343             }
01344         }
01345 chroma_idct_end: ;
01346     }
01347 }
01348 
01349 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
01350 {
01351     int interior_limit, filter_level;
01352 
01353     if (s->segmentation.enabled) {
01354         filter_level = s->segmentation.filter_level[s->segment];
01355         if (!s->segmentation.absolute_vals)
01356             filter_level += s->filter.level;
01357     } else
01358         filter_level = s->filter.level;
01359 
01360     if (s->lf_delta.enabled) {
01361         filter_level += s->lf_delta.ref[mb->ref_frame];
01362         filter_level += s->lf_delta.mode[mb->mode];
01363     }
01364 
01365     filter_level = av_clip_uintp2(filter_level, 6);
01366 
01367     interior_limit = filter_level;
01368     if (s->filter.sharpness) {
01369         interior_limit >>= (s->filter.sharpness + 3) >> 2;
01370         interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
01371     }
01372     interior_limit = FFMAX(interior_limit, 1);
01373 
01374     f->filter_level = filter_level;
01375     f->inner_limit = interior_limit;
01376     f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
01377 }
01378 
01379 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
01380 {
01381     int mbedge_lim, bedge_lim, hev_thresh;
01382     int filter_level = f->filter_level;
01383     int inner_limit = f->inner_limit;
01384     int inner_filter = f->inner_filter;
01385     int linesize = s->linesize;
01386     int uvlinesize = s->uvlinesize;
01387     static const uint8_t hev_thresh_lut[2][64] = {
01388         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01389           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01390           3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
01391           3, 3, 3, 3 },
01392         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01393           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01394           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01395           2, 2, 2, 2 }
01396     };
01397 
01398     if (!filter_level)
01399         return;
01400 
01401      bedge_lim = 2*filter_level + inner_limit;
01402     mbedge_lim = bedge_lim + 4;
01403 
01404     hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
01405 
01406     if (mb_x) {
01407         s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
01408                                        mbedge_lim, inner_limit, hev_thresh);
01409         s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
01410                                        mbedge_lim, inner_limit, hev_thresh);
01411     }
01412 
01413     if (inner_filter) {
01414         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
01415                                              inner_limit, hev_thresh);
01416         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
01417                                              inner_limit, hev_thresh);
01418         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
01419                                              inner_limit, hev_thresh);
01420         s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
01421                                              uvlinesize,  bedge_lim,
01422                                              inner_limit, hev_thresh);
01423     }
01424 
01425     if (mb_y) {
01426         s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
01427                                        mbedge_lim, inner_limit, hev_thresh);
01428         s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
01429                                        mbedge_lim, inner_limit, hev_thresh);
01430     }
01431 
01432     if (inner_filter) {
01433         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
01434                                              linesize,    bedge_lim,
01435                                              inner_limit, hev_thresh);
01436         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
01437                                              linesize,    bedge_lim,
01438                                              inner_limit, hev_thresh);
01439         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
01440                                              linesize,    bedge_lim,
01441                                              inner_limit, hev_thresh);
01442         s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
01443                                              dst[2] + 4 * uvlinesize,
01444                                              uvlinesize,  bedge_lim,
01445                                              inner_limit, hev_thresh);
01446     }
01447 }
01448 
01449 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
01450 {
01451     int mbedge_lim, bedge_lim;
01452     int filter_level = f->filter_level;
01453     int inner_limit = f->inner_limit;
01454     int inner_filter = f->inner_filter;
01455     int linesize = s->linesize;
01456 
01457     if (!filter_level)
01458         return;
01459 
01460      bedge_lim = 2*filter_level + inner_limit;
01461     mbedge_lim = bedge_lim + 4;
01462 
01463     if (mb_x)
01464         s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
01465     if (inner_filter) {
01466         s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
01467         s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
01468         s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
01469     }
01470 
01471     if (mb_y)
01472         s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
01473     if (inner_filter) {
01474         s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
01475         s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
01476         s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
01477     }
01478 }
01479 
01480 static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
01481 {
01482     VP8FilterStrength *f = s->filter_strength;
01483     uint8_t *dst[3] = {
01484         curframe->data[0] + 16*mb_y*s->linesize,
01485         curframe->data[1] +  8*mb_y*s->uvlinesize,
01486         curframe->data[2] +  8*mb_y*s->uvlinesize
01487     };
01488     int mb_x;
01489 
01490     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
01491         backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
01492         filter_mb(s, dst, f++, mb_x, mb_y);
01493         dst[0] += 16;
01494         dst[1] += 8;
01495         dst[2] += 8;
01496     }
01497 }
01498 
01499 static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
01500 {
01501     VP8FilterStrength *f = s->filter_strength;
01502     uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
01503     int mb_x;
01504 
01505     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
01506         backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
01507         filter_mb_simple(s, dst, f++, mb_x, mb_y);
01508         dst += 16;
01509     }
01510 }
01511 
01512 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
01513                             AVPacket *avpkt)
01514 {
01515     VP8Context *s = avctx->priv_data;
01516     int ret, mb_x, mb_y, i, y, referenced;
01517     enum AVDiscard skip_thresh;
01518     AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT];
01519 
01520     if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
01521         return ret;
01522 
01523     referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
01524                                 || s->update_altref == VP56_FRAME_CURRENT;
01525 
01526     skip_thresh = !referenced ? AVDISCARD_NONREF :
01527                     !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
01528 
01529     if (avctx->skip_frame >= skip_thresh) {
01530         s->invisible = 1;
01531         goto skip_decode;
01532     }
01533     s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
01534 
01535     // release no longer referenced frames
01536     for (i = 0; i < 5; i++)
01537         if (s->frames[i].data[0] &&
01538             &s->frames[i] != prev_frame &&
01539             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01540             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01541             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
01542             ff_thread_release_buffer(avctx, &s->frames[i]);
01543 
01544     // find a free buffer
01545     for (i = 0; i < 5; i++)
01546         if (&s->frames[i] != prev_frame &&
01547             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01548             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01549             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
01550             curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
01551             break;
01552         }
01553     if (i == 5) {
01554         av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
01555         abort();
01556     }
01557     if (curframe->data[0])
01558         ff_thread_release_buffer(avctx, curframe);
01559 
01560     curframe->key_frame = s->keyframe;
01561     curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
01562     curframe->reference = referenced ? 3 : 0;
01563     curframe->ref_index[0] = s->segmentation_map;
01564     if ((ret = ff_thread_get_buffer(avctx, curframe))) {
01565         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
01566         return ret;
01567     }
01568 
01569     // check if golden and altref are swapped
01570     if (s->update_altref != VP56_FRAME_NONE) {
01571         s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[s->update_altref];
01572     } else {
01573         s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[VP56_FRAME_GOLDEN2];
01574     }
01575     if (s->update_golden != VP56_FRAME_NONE) {
01576         s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[s->update_golden];
01577     } else {
01578         s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[VP56_FRAME_GOLDEN];
01579     }
01580     if (s->update_last) {
01581         s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
01582     } else {
01583         s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
01584     }
01585     s->next_framep[VP56_FRAME_CURRENT]      = curframe;
01586 
01587     ff_thread_finish_setup(avctx);
01588 
01589     // Given that arithmetic probabilities are updated every frame, it's quite likely
01590     // that the values we have on a random interframe are complete junk if we didn't
01591     // start decode on a keyframe. So just don't display anything rather than junk.
01592     if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
01593                          !s->framep[VP56_FRAME_GOLDEN] ||
01594                          !s->framep[VP56_FRAME_GOLDEN2])) {
01595         av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
01596         return AVERROR_INVALIDDATA;
01597     }
01598 
01599     s->linesize   = curframe->linesize[0];
01600     s->uvlinesize = curframe->linesize[1];
01601 
01602     if (!s->edge_emu_buffer)
01603         s->edge_emu_buffer = av_malloc(21*s->linesize);
01604 
01605     memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
01606 
01607     /* Zero macroblock structures for top/top-left prediction from outside the frame. */
01608     memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
01609 
01610     // top edge of 127 for intra prediction
01611     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01612         s->top_border[0][15] = s->top_border[0][23] = 127;
01613         memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
01614     }
01615     memset(s->ref_count, 0, sizeof(s->ref_count));
01616     if (s->keyframe)
01617         memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
01618 
01619 #define MARGIN (16 << 2)
01620     s->mv_min.y = -MARGIN;
01621     s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
01622 
01623     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
01624         VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
01625         VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
01626         int mb_xy = mb_y*s->mb_width;
01627         uint8_t *dst[3] = {
01628             curframe->data[0] + 16*mb_y*s->linesize,
01629             curframe->data[1] +  8*mb_y*s->uvlinesize,
01630             curframe->data[2] +  8*mb_y*s->uvlinesize
01631         };
01632 
01633         memset(mb - 1, 0, sizeof(*mb));   // zero left macroblock
01634         memset(s->left_nnz, 0, sizeof(s->left_nnz));
01635         AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
01636 
01637         // left edge of 129 for intra prediction
01638         if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01639             for (i = 0; i < 3; i++)
01640                 for (y = 0; y < 16>>!!i; y++)
01641                     dst[i][y*curframe->linesize[i]-1] = 129;
01642             if (mb_y == 1) // top left edge is also 129
01643                 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
01644         }
01645 
01646         s->mv_min.x = -MARGIN;
01647         s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;
01648         if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
01649             ff_thread_await_progress(prev_frame, mb_y, 0);
01650 
01651         for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
01652             /* Prefetch the current frame, 4 MBs ahead */
01653             s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
01654             s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
01655 
01656             decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy,
01657                            prev_frame ? prev_frame->ref_index[0] + mb_xy : NULL);
01658 
01659             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
01660 
01661             if (!mb->skip)
01662                 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
01663 
01664             if (mb->mode <= MODE_I4x4)
01665                 intra_predict(s, dst, mb, mb_x, mb_y);
01666             else
01667                 inter_predict(s, dst, mb, mb_x, mb_y);
01668 
01669             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
01670 
01671             if (!mb->skip) {
01672                 idct_mb(s, dst, mb);
01673             } else {
01674                 AV_ZERO64(s->left_nnz);
01675                 AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
01676 
01677                 // Reset DC block predictors if they would exist if the mb had coefficients
01678                 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
01679                     s->left_nnz[8]      = 0;
01680                     s->top_nnz[mb_x][8] = 0;
01681                 }
01682             }
01683 
01684             if (s->deblock_filter)
01685                 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
01686 
01687             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
01688 
01689             dst[0] += 16;
01690             dst[1] += 8;
01691             dst[2] += 8;
01692             s->mv_min.x -= 64;
01693             s->mv_max.x -= 64;
01694         }
01695         if (s->deblock_filter) {
01696             if (s->filter.simple)
01697                 filter_mb_row_simple(s, curframe, mb_y);
01698             else
01699                 filter_mb_row(s, curframe, mb_y);
01700         }
01701         s->mv_min.y -= 64;
01702         s->mv_max.y -= 64;
01703 
01704         ff_thread_report_progress(curframe, mb_y, 0);
01705     }
01706 
01707     ff_thread_report_progress(curframe, INT_MAX, 0);
01708 skip_decode:
01709     // if future frames don't use the updated probabilities,
01710     // reset them to the values we saved
01711     if (!s->update_probabilities)
01712         s->prob[0] = s->prob[1];
01713 
01714     memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
01715 
01716     if (!s->invisible) {
01717         *(AVFrame*)data = *curframe;
01718         *data_size = sizeof(AVFrame);
01719     }
01720 
01721     return avpkt->size;
01722 }
01723 
01724 static av_cold int vp8_decode_init(AVCodecContext *avctx)
01725 {
01726     VP8Context *s = avctx->priv_data;
01727 
01728     s->avctx = avctx;
01729     avctx->pix_fmt = PIX_FMT_YUV420P;
01730 
01731     dsputil_init(&s->dsp, avctx);
01732     ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8);
01733     ff_vp8dsp_init(&s->vp8dsp);
01734 
01735     return 0;
01736 }
01737 
01738 static av_cold int vp8_decode_free(AVCodecContext *avctx)
01739 {
01740     vp8_decode_flush(avctx);
01741     return 0;
01742 }
01743 
01744 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
01745 {
01746     VP8Context *s = avctx->priv_data;
01747 
01748     s->avctx = avctx;
01749 
01750     return 0;
01751 }
01752 
01753 #define REBASE(pic) \
01754     pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
01755 
01756 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
01757 {
01758     VP8Context *s = dst->priv_data, *s_src = src->priv_data;
01759 
01760     if (s->macroblocks_base &&
01761         (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
01762         free_buffers(s);
01763     }
01764 
01765     s->prob[0] = s_src->prob[!s_src->update_probabilities];
01766     s->segmentation = s_src->segmentation;
01767     s->lf_delta = s_src->lf_delta;
01768     memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
01769 
01770     memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
01771     s->framep[0] = REBASE(s_src->next_framep[0]);
01772     s->framep[1] = REBASE(s_src->next_framep[1]);
01773     s->framep[2] = REBASE(s_src->next_framep[2]);
01774     s->framep[3] = REBASE(s_src->next_framep[3]);
01775 
01776     return 0;
01777 }
01778 
01779 AVCodec ff_vp8_decoder = {
01780     "vp8",
01781     AVMEDIA_TYPE_VIDEO,
01782     CODEC_ID_VP8,
01783     sizeof(VP8Context),
01784     vp8_decode_init,
01785     NULL,
01786     vp8_decode_free,
01787     vp8_decode_frame,
01788     CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
01789     .flush = vp8_decode_flush,
01790     .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
01791     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
01792     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
01793 };

Generated on Fri Feb 22 2013 07:24:29 for FFmpeg by  doxygen 1.7.1