• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • Examples
  • File List
  • Globals

libavcodec/x86/h264dsp_mmx.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
00003  *
00004  * This file is part of FFmpeg.
00005  *
00006  * FFmpeg is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * FFmpeg is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with FFmpeg; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00019  */
00020 
00021 #include "libavutil/cpu.h"
00022 #include "libavutil/x86_cpu.h"
00023 #include "libavcodec/h264dsp.h"
00024 #include "dsputil_mmx.h"
00025 
00026 DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1  ) = 0x0103010301030103ULL;
00027 
00028 /***********************************/
00029 /* IDCT */
00030 #define IDCT_ADD_FUNC(NUM, DEPTH, OPT) \
00031 void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT (uint8_t *dst, int16_t *block, int stride);
00032 
00033 IDCT_ADD_FUNC(, 8, mmx)
00034 IDCT_ADD_FUNC(, 10, sse2)
00035 IDCT_ADD_FUNC(_dc, 8, mmx2)
00036 IDCT_ADD_FUNC(_dc, 10, mmx2)
00037 IDCT_ADD_FUNC(8_dc, 8, mmx2)
00038 IDCT_ADD_FUNC(8_dc, 10, sse2)
00039 IDCT_ADD_FUNC(8, 8, mmx)
00040 IDCT_ADD_FUNC(8, 8, sse2)
00041 IDCT_ADD_FUNC(8, 10, sse2)
00042 #if HAVE_AVX
00043 IDCT_ADD_FUNC(, 10, avx)
00044 IDCT_ADD_FUNC(8_dc, 10, avx)
00045 IDCT_ADD_FUNC(8, 10, avx)
00046 #endif
00047 
00048 
00049 #define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT) \
00050 void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
00051                               (uint8_t *dst, const int *block_offset, \
00052                               DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
00053 
00054 IDCT_ADD_REP_FUNC(8, 4, 8, mmx)
00055 IDCT_ADD_REP_FUNC(8, 4, 8, mmx2)
00056 IDCT_ADD_REP_FUNC(8, 4, 8, sse2)
00057 IDCT_ADD_REP_FUNC(8, 4, 10, sse2)
00058 IDCT_ADD_REP_FUNC(8, 4, 10, avx)
00059 IDCT_ADD_REP_FUNC(, 16, 8, mmx)
00060 IDCT_ADD_REP_FUNC(, 16, 8, mmx2)
00061 IDCT_ADD_REP_FUNC(, 16, 8, sse2)
00062 IDCT_ADD_REP_FUNC(, 16, 10, sse2)
00063 IDCT_ADD_REP_FUNC(, 16intra, 8, mmx)
00064 IDCT_ADD_REP_FUNC(, 16intra, 8, mmx2)
00065 IDCT_ADD_REP_FUNC(, 16intra, 8, sse2)
00066 IDCT_ADD_REP_FUNC(, 16intra, 10, sse2)
00067 #if HAVE_AVX
00068 IDCT_ADD_REP_FUNC(, 16, 10, avx)
00069 IDCT_ADD_REP_FUNC(, 16intra, 10, avx)
00070 #endif
00071 
00072 
00073 #define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT) \
00074 void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
00075                               (uint8_t **dst, const int *block_offset, \
00076                               DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
00077 IDCT_ADD_REP_FUNC2(, 8, 8, mmx)
00078 IDCT_ADD_REP_FUNC2(, 8, 8, mmx2)
00079 IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
00080 IDCT_ADD_REP_FUNC2(, 8, 10, sse2)
00081 #if HAVE_AVX
00082 IDCT_ADD_REP_FUNC2(, 8, 10, avx)
00083 #endif
00084 
00085 void ff_h264_luma_dc_dequant_idct_mmx (DCTELEM *output, DCTELEM *input, int qmul);
00086 void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul);
00087 
00088 /***********************************/
00089 /* deblocking */
00090 
00091 #define h264_loop_filter_strength_iteration_mmx2(bS, nz, ref, mv, bidir, edges, step, mask_mv, dir, d_idx, mask_dir) \
00092     do { \
00093         x86_reg b_idx; \
00094         mask_mv <<= 3; \
00095         for( b_idx=0; b_idx<edges; b_idx+=step ) { \
00096             if (!mask_dir) \
00097             __asm__ volatile( \
00098                     "pxor %%mm0, %%mm0 \n\t" \
00099                     :: \
00100             ); \
00101             if(!(mask_mv & b_idx)) { \
00102                 if(bidir) { \
00103                     __asm__ volatile( \
00104                         "movd         %a3(%0,%2), %%mm2 \n" \
00105                         "punpckldq    %a4(%0,%2), %%mm2 \n" /* { ref0[bn], ref1[bn] } */ \
00106                         "pshufw $0x44, 12(%0,%2), %%mm0 \n" /* { ref0[b], ref0[b] } */ \
00107                         "pshufw $0x44, 52(%0,%2), %%mm1 \n" /* { ref1[b], ref1[b] } */ \
00108                         "pshufw $0x4E, %%mm2, %%mm3 \n" \
00109                         "psubb         %%mm2, %%mm0 \n" /* { ref0[b]!=ref0[bn], ref0[b]!=ref1[bn] } */ \
00110                         "psubb         %%mm3, %%mm1 \n" /* { ref1[b]!=ref1[bn], ref1[b]!=ref0[bn] } */ \
00111  \
00112                         "por           %%mm1, %%mm0 \n" \
00113                         "movq   %a5(%1,%2,4), %%mm1 \n" \
00114                         "movq   %a6(%1,%2,4), %%mm2 \n" \
00115                         "movq          %%mm1, %%mm3 \n" \
00116                         "movq          %%mm2, %%mm4 \n" \
00117                         "psubw   48(%1,%2,4), %%mm1 \n" \
00118                         "psubw   56(%1,%2,4), %%mm2 \n" \
00119                         "psubw  208(%1,%2,4), %%mm3 \n" \
00120                         "psubw  216(%1,%2,4), %%mm4 \n" \
00121                         "packsswb      %%mm2, %%mm1 \n" \
00122                         "packsswb      %%mm4, %%mm3 \n" \
00123                         "paddb         %%mm6, %%mm1 \n" \
00124                         "paddb         %%mm6, %%mm3 \n" \
00125                         "psubusb       %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
00126                         "psubusb       %%mm5, %%mm3 \n" \
00127                         "packsswb      %%mm3, %%mm1 \n" \
00128  \
00129                         "por           %%mm1, %%mm0 \n" \
00130                         "movq   %a7(%1,%2,4), %%mm1 \n" \
00131                         "movq   %a8(%1,%2,4), %%mm2 \n" \
00132                         "movq          %%mm1, %%mm3 \n" \
00133                         "movq          %%mm2, %%mm4 \n" \
00134                         "psubw   48(%1,%2,4), %%mm1 \n" \
00135                         "psubw   56(%1,%2,4), %%mm2 \n" \
00136                         "psubw  208(%1,%2,4), %%mm3 \n" \
00137                         "psubw  216(%1,%2,4), %%mm4 \n" \
00138                         "packsswb      %%mm2, %%mm1 \n" \
00139                         "packsswb      %%mm4, %%mm3 \n" \
00140                         "paddb         %%mm6, %%mm1 \n" \
00141                         "paddb         %%mm6, %%mm3 \n" \
00142                         "psubusb       %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
00143                         "psubusb       %%mm5, %%mm3 \n" \
00144                         "packsswb      %%mm3, %%mm1 \n" \
00145  \
00146                         "pshufw $0x4E, %%mm1, %%mm1 \n" \
00147                         "por           %%mm1, %%mm0 \n" \
00148                         "pshufw $0x4E, %%mm0, %%mm1 \n" \
00149                         "pminub        %%mm1, %%mm0 \n" \
00150                         ::"r"(ref), \
00151                           "r"(mv), \
00152                           "r"(b_idx), \
00153                           "i"(d_idx+12), \
00154                           "i"(d_idx+52), \
00155                           "i"(d_idx*4+48), \
00156                           "i"(d_idx*4+56), \
00157                           "i"(d_idx*4+208), \
00158                           "i"(d_idx*4+216) \
00159                     ); \
00160                 } else { \
00161                     __asm__ volatile( \
00162                         "movd   12(%0,%2), %%mm0 \n" \
00163                         "psubb %a3(%0,%2), %%mm0 \n" /* ref[b] != ref[bn] */ \
00164                         "movq   48(%1,%2,4), %%mm1 \n" \
00165                         "movq   56(%1,%2,4), %%mm2 \n" \
00166                         "psubw %a4(%1,%2,4), %%mm1 \n" \
00167                         "psubw %a5(%1,%2,4), %%mm2 \n" \
00168                         "packsswb   %%mm2, %%mm1 \n" \
00169                         "paddb      %%mm6, %%mm1 \n" \
00170                         "psubusb    %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
00171                         "packsswb   %%mm1, %%mm1 \n" \
00172                         "por        %%mm1, %%mm0 \n" \
00173                         ::"r"(ref), \
00174                           "r"(mv), \
00175                           "r"(b_idx), \
00176                           "i"(d_idx+12), \
00177                           "i"(d_idx*4+48), \
00178                           "i"(d_idx*4+56) \
00179                     ); \
00180                 } \
00181             } \
00182             __asm__ volatile( \
00183                 "movd 12(%0,%1), %%mm1 \n" \
00184                 "por %a2(%0,%1), %%mm1 \n" /* nnz[b] || nnz[bn] */ \
00185                 ::"r"(nnz), \
00186                   "r"(b_idx), \
00187                   "i"(d_idx+12) \
00188             ); \
00189             __asm__ volatile( \
00190                 "pminub    %%mm7, %%mm1 \n" \
00191                 "pminub    %%mm7, %%mm0 \n" \
00192                 "psllw        $1, %%mm1 \n" \
00193                 "pxor      %%mm2, %%mm2 \n" \
00194                 "pmaxub    %%mm0, %%mm1 \n" \
00195                 "punpcklbw %%mm2, %%mm1 \n" \
00196                 "movq      %%mm1, %a1(%0,%2) \n" \
00197                 ::"r"(bS), \
00198                   "i"(32*dir), \
00199                   "r"(b_idx) \
00200                 :"memory" \
00201             ); \
00202         } \
00203     } while (0)
00204 
00205 static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
00206                                             int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) {
00207     __asm__ volatile(
00208         "movq %0, %%mm7 \n"
00209         "movq %1, %%mm6 \n"
00210         ::"m"(ff_pb_1), "m"(ff_pb_3)
00211     );
00212     if(field)
00213         __asm__ volatile(
00214             "movq %0, %%mm6 \n"
00215             ::"m"(ff_pb_3_1)
00216         );
00217     __asm__ volatile(
00218         "movq  %%mm6, %%mm5 \n"
00219         "paddb %%mm5, %%mm5 \n"
00220     :);
00221 
00222     // could do a special case for dir==0 && edges==1, but it only reduces the
00223     // average filter time by 1.2%
00224     step  <<= 3;
00225     edges <<= 3;
00226     h264_loop_filter_strength_iteration_mmx2(bS, nnz, ref, mv, bidir, edges, step, mask_mv1, 1, -8,  0);
00227     h264_loop_filter_strength_iteration_mmx2(bS, nnz, ref, mv, bidir,    32,    8, mask_mv0, 0, -1, -1);
00228 
00229     __asm__ volatile(
00230         "movq   (%0), %%mm0 \n\t"
00231         "movq  8(%0), %%mm1 \n\t"
00232         "movq 16(%0), %%mm2 \n\t"
00233         "movq 24(%0), %%mm3 \n\t"
00234         TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4)
00235         "movq %%mm0,   (%0) \n\t"
00236         "movq %%mm3,  8(%0) \n\t"
00237         "movq %%mm4, 16(%0) \n\t"
00238         "movq %%mm2, 24(%0) \n\t"
00239         ::"r"(bS[0])
00240         :"memory"
00241     );
00242 }
00243 
00244 #define LF_FUNC(DIR, TYPE, DEPTH, OPT) \
00245 void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, int stride, \
00246                                                                 int alpha, int beta, int8_t *tc0);
00247 #define LF_IFUNC(DIR, TYPE, DEPTH, OPT) \
00248 void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, int stride, \
00249                                                                 int alpha, int beta);
00250 
00251 #define LF_FUNCS(type, depth)\
00252 LF_FUNC (h,  chroma,       depth, mmxext)\
00253 LF_IFUNC(h,  chroma_intra, depth, mmxext)\
00254 LF_FUNC (v,  chroma,       depth, mmxext)\
00255 LF_IFUNC(v,  chroma_intra, depth, mmxext)\
00256 LF_FUNC (h,  luma,         depth, mmxext)\
00257 LF_IFUNC(h,  luma_intra,   depth, mmxext)\
00258 LF_FUNC (h,  luma,         depth, sse2)\
00259 LF_IFUNC(h,  luma_intra,   depth, sse2)\
00260 LF_FUNC (v,  luma,         depth, sse2)\
00261 LF_IFUNC(v,  luma_intra,   depth, sse2)\
00262 LF_FUNC (h,  chroma,       depth, sse2)\
00263 LF_IFUNC(h,  chroma_intra, depth, sse2)\
00264 LF_FUNC (v,  chroma,       depth, sse2)\
00265 LF_IFUNC(v,  chroma_intra, depth, sse2)\
00266 LF_FUNC (h,  luma,         depth,  avx)\
00267 LF_IFUNC(h,  luma_intra,   depth,  avx)\
00268 LF_FUNC (v,  luma,         depth,  avx)\
00269 LF_IFUNC(v,  luma_intra,   depth,  avx)\
00270 LF_FUNC (h,  chroma,       depth,  avx)\
00271 LF_IFUNC(h,  chroma_intra, depth,  avx)\
00272 LF_FUNC (v,  chroma,       depth,  avx)\
00273 LF_IFUNC(v,  chroma_intra, depth,  avx)
00274 
00275 LF_FUNCS( uint8_t,  8)
00276 LF_FUNCS(uint16_t, 10)
00277 
00278 #if ARCH_X86_32
00279 LF_FUNC (v8, luma,             8, mmxext)
00280 static void ff_deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
00281 {
00282     if((tc0[0] & tc0[1]) >= 0)
00283         ff_deblock_v8_luma_8_mmxext(pix+0, stride, alpha, beta, tc0);
00284     if((tc0[2] & tc0[3]) >= 0)
00285         ff_deblock_v8_luma_8_mmxext(pix+8, stride, alpha, beta, tc0+2);
00286 }
00287 LF_IFUNC(v8, luma_intra,        8, mmxext)
00288 static void ff_deblock_v_luma_intra_8_mmxext(uint8_t *pix, int stride, int alpha, int beta)
00289 {
00290     ff_deblock_v8_luma_intra_8_mmxext(pix+0, stride, alpha, beta);
00291     ff_deblock_v8_luma_intra_8_mmxext(pix+8, stride, alpha, beta);
00292 }
00293 #endif /* ARCH_X86_32 */
00294 
00295 LF_FUNC (v,  luma,            10, mmxext)
00296 LF_IFUNC(v,  luma_intra,      10, mmxext)
00297 
00298 /***********************************/
00299 /* weighted prediction */
00300 
00301 #define H264_WEIGHT(W, H, OPT) \
00302 void ff_h264_weight_ ## W ## x ## H ## _ ## OPT(uint8_t *dst, \
00303     int stride, int log2_denom, int weight, int offset);
00304 
00305 #define H264_BIWEIGHT(W, H, OPT) \
00306 void ff_h264_biweight_ ## W ## x ## H ## _ ## OPT(uint8_t *dst, \
00307     uint8_t *src, int stride, int log2_denom, int weightd, \
00308     int weights, int offset);
00309 
00310 #define H264_BIWEIGHT_MMX(W,H) \
00311 H264_WEIGHT  (W, H, mmx2) \
00312 H264_BIWEIGHT(W, H, mmx2)
00313 
00314 #define H264_BIWEIGHT_MMX_SSE(W,H) \
00315 H264_BIWEIGHT_MMX(W, H) \
00316 H264_WEIGHT      (W, H, sse2) \
00317 H264_BIWEIGHT    (W, H, sse2) \
00318 H264_BIWEIGHT    (W, H, ssse3)
00319 
00320 H264_BIWEIGHT_MMX_SSE(16, 16)
00321 H264_BIWEIGHT_MMX_SSE(16,  8)
00322 H264_BIWEIGHT_MMX_SSE( 8, 16)
00323 H264_BIWEIGHT_MMX_SSE( 8,  8)
00324 H264_BIWEIGHT_MMX_SSE( 8,  4)
00325 H264_BIWEIGHT_MMX    ( 4,  8)
00326 H264_BIWEIGHT_MMX    ( 4,  4)
00327 H264_BIWEIGHT_MMX    ( 4,  2)
00328 
00329 void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
00330 {
00331     int mm_flags = av_get_cpu_flags();
00332 
00333     if (bit_depth == 8) {
00334     if (mm_flags & AV_CPU_FLAG_MMX2) {
00335         c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
00336     }
00337 #if HAVE_YASM
00338     if (mm_flags & AV_CPU_FLAG_MMX) {
00339         c->h264_idct_dc_add         =
00340         c->h264_idct_add            = ff_h264_idct_add_8_mmx;
00341         c->h264_idct8_dc_add        =
00342         c->h264_idct8_add           = ff_h264_idct8_add_8_mmx;
00343 
00344         c->h264_idct_add16          = ff_h264_idct_add16_8_mmx;
00345         c->h264_idct8_add4          = ff_h264_idct8_add4_8_mmx;
00346         c->h264_idct_add8           = ff_h264_idct_add8_8_mmx;
00347         c->h264_idct_add16intra     = ff_h264_idct_add16intra_8_mmx;
00348         c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx;
00349 
00350         if (mm_flags & AV_CPU_FLAG_MMX2) {
00351             c->h264_idct_dc_add    = ff_h264_idct_dc_add_8_mmx2;
00352             c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_8_mmx2;
00353             c->h264_idct_add16     = ff_h264_idct_add16_8_mmx2;
00354             c->h264_idct8_add4     = ff_h264_idct8_add4_8_mmx2;
00355             c->h264_idct_add8      = ff_h264_idct_add8_8_mmx2;
00356             c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2;
00357 
00358             c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext;
00359             c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext;
00360             c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmxext;
00361             c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext;
00362 #if ARCH_X86_32
00363             c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmxext;
00364             c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmxext;
00365             c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext;
00366             c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext;
00367 #endif
00368             c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
00369             c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
00370             c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
00371             c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
00372             c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
00373             c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
00374             c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
00375             c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
00376 
00377             c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
00378             c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
00379             c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
00380             c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
00381             c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
00382             c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
00383             c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
00384             c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
00385 
00386             if (mm_flags&AV_CPU_FLAG_SSE2) {
00387                 c->h264_idct8_add           = ff_h264_idct8_add_8_sse2;
00388 
00389                 c->h264_idct_add16          = ff_h264_idct_add16_8_sse2;
00390                 c->h264_idct8_add4          = ff_h264_idct8_add4_8_sse2;
00391                 c->h264_idct_add8           = ff_h264_idct_add8_8_sse2;
00392                 c->h264_idct_add16intra     = ff_h264_idct_add16intra_8_sse2;
00393                 c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2;
00394 
00395                 c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_sse2;
00396                 c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_sse2;
00397                 c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_sse2;
00398                 c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_sse2;
00399                 c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_sse2;
00400 
00401                 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_sse2;
00402                 c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_sse2;
00403                 c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_sse2;
00404                 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_sse2;
00405                 c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_sse2;
00406 
00407 #if HAVE_ALIGNED_STACK
00408                 c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2;
00409                 c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_sse2;
00410                 c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
00411                 c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
00412 #endif
00413             }
00414             if (mm_flags&AV_CPU_FLAG_SSSE3) {
00415                 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3;
00416                 c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_ssse3;
00417                 c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_ssse3;
00418                 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3;
00419                 c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_ssse3;
00420             }
00421             if (HAVE_AVX && mm_flags&AV_CPU_FLAG_AVX) {
00422 #if HAVE_ALIGNED_STACK
00423                 c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx;
00424                 c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx;
00425                 c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
00426                 c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
00427 #endif
00428             }
00429         }
00430     }
00431 #endif
00432     } else if (bit_depth == 10) {
00433 #if HAVE_YASM
00434     if (mm_flags & AV_CPU_FLAG_MMX) {
00435         if (mm_flags & AV_CPU_FLAG_MMX2) {
00436 #if ARCH_X86_32
00437             c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_mmxext;
00438             c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_mmxext;
00439             c->h264_v_loop_filter_luma= ff_deblock_v_luma_10_mmxext;
00440             c->h264_h_loop_filter_luma= ff_deblock_h_luma_10_mmxext;
00441             c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext;
00442             c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext;
00443 #endif
00444             c->h264_idct_dc_add= ff_h264_idct_dc_add_10_mmx2;
00445             if (mm_flags&AV_CPU_FLAG_SSE2) {
00446                 c->h264_idct_add       = ff_h264_idct_add_10_sse2;
00447                 c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_10_sse2;
00448 
00449                 c->h264_idct_add16     = ff_h264_idct_add16_10_sse2;
00450                 c->h264_idct_add8      = ff_h264_idct_add8_10_sse2;
00451                 c->h264_idct_add16intra= ff_h264_idct_add16intra_10_sse2;
00452 #if HAVE_ALIGNED_STACK
00453                 c->h264_idct8_add      = ff_h264_idct8_add_10_sse2;
00454                 c->h264_idct8_add4     = ff_h264_idct8_add4_10_sse2;
00455 #endif
00456 
00457                 c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_sse2;
00458                 c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_sse2;
00459 #if HAVE_ALIGNED_STACK
00460                 c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2;
00461                 c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2;
00462                 c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
00463                 c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
00464 #endif
00465             }
00466 #if HAVE_AVX
00467             if (mm_flags&AV_CPU_FLAG_AVX) {
00468                 c->h264_idct_dc_add    =
00469                 c->h264_idct_add       = ff_h264_idct_add_10_avx;
00470                 c->h264_idct8_dc_add   = ff_h264_idct8_dc_add_10_avx;
00471 
00472                 c->h264_idct_add16     = ff_h264_idct_add16_10_avx;
00473                 c->h264_idct_add8      = ff_h264_idct_add8_10_avx;
00474                 c->h264_idct_add16intra= ff_h264_idct_add16intra_10_avx;
00475 #if HAVE_ALIGNED_STACK
00476                 c->h264_idct8_add      = ff_h264_idct8_add_10_avx;
00477                 c->h264_idct8_add4     = ff_h264_idct8_add4_10_avx;
00478 #endif
00479 
00480                 c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_avx;
00481                 c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_avx;
00482 #if HAVE_ALIGNED_STACK
00483                 c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx;
00484                 c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx;
00485                 c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx;
00486                 c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx;
00487 #endif
00488             }
00489 #endif /* HAVE_AVX */
00490         }
00491     }
00492 #endif
00493     }
00494 }

Generated on Fri Feb 22 2013 07:24:30 for FFmpeg by  doxygen 1.7.1