• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • Examples
  • File List
  • Globals

libavcodec/x86/png_mmx.c

Go to the documentation of this file.
00001 /*
00002  * MMX optimized PNG utils
00003  * Copyright (c) 2008 Loren Merritt
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  *
00021  */
00022 
00023 #include "libavutil/cpu.h"
00024 #include "libavutil/x86_cpu.h"
00025 #include "libavcodec/dsputil.h"
00026 #include "libavcodec/png.h"
00027 #include "dsputil_mmx.h"
00028 
00029 //#undef NDEBUG
00030 //#include <assert.h>
00031 
00032 static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w)
00033 {
00034     x86_reg i=0;
00035     __asm__ volatile(
00036         "jmp 2f                         \n\t"
00037         "1:                             \n\t"
00038         "movq   (%2, %0), %%mm0         \n\t"
00039         "movq  8(%2, %0), %%mm1         \n\t"
00040         "paddb  (%3, %0), %%mm0         \n\t"
00041         "paddb 8(%3, %0), %%mm1         \n\t"
00042         "movq %%mm0,  (%1, %0)          \n\t"
00043         "movq %%mm1, 8(%1, %0)          \n\t"
00044         "add $16, %0                    \n\t"
00045         "2:                             \n\t"
00046         "cmp %4, %0                     \n\t"
00047         " js 1b                         \n\t"
00048         : "+r" (i)
00049         : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w-15)
00050     );
00051     for(; i<w; i++)
00052         dst[i] = src1[i] + src2[i];
00053 }
00054 
00055 #define PAETH(cpu, abs3)\
00056 static void add_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\
00057 {\
00058     x86_reg i, end;\
00059     if(bpp>4) add_paeth_prediction_##cpu(dst+bpp/2, src+bpp/2, top+bpp/2, w-bpp/2, -bpp);\
00060     if(bpp<0) bpp=-bpp;\
00061     i= -bpp;\
00062     end = w-3;\
00063     __asm__ volatile(\
00064         "pxor      %%mm7, %%mm7 \n"\
00065         "movd    (%1,%0), %%mm0 \n"\
00066         "movd    (%2,%0), %%mm1 \n"\
00067         "punpcklbw %%mm7, %%mm0 \n"\
00068         "punpcklbw %%mm7, %%mm1 \n"\
00069         "add       %4, %0 \n"\
00070         "1: \n"\
00071         "movq      %%mm1, %%mm2 \n"\
00072         "movd    (%2,%0), %%mm1 \n"\
00073         "movq      %%mm2, %%mm3 \n"\
00074         "punpcklbw %%mm7, %%mm1 \n"\
00075         "movq      %%mm2, %%mm4 \n"\
00076         "psubw     %%mm1, %%mm3 \n"\
00077         "psubw     %%mm0, %%mm4 \n"\
00078         "movq      %%mm3, %%mm5 \n"\
00079         "paddw     %%mm4, %%mm5 \n"\
00080         abs3\
00081         "movq      %%mm4, %%mm6 \n"\
00082         "pminsw    %%mm5, %%mm6 \n"\
00083         "pcmpgtw   %%mm6, %%mm3 \n"\
00084         "pcmpgtw   %%mm5, %%mm4 \n"\
00085         "movq      %%mm4, %%mm6 \n"\
00086         "pand      %%mm3, %%mm4 \n"\
00087         "pandn     %%mm3, %%mm6 \n"\
00088         "pandn     %%mm0, %%mm3 \n"\
00089         "movd    (%3,%0), %%mm0 \n"\
00090         "pand      %%mm1, %%mm6 \n"\
00091         "pand      %%mm4, %%mm2 \n"\
00092         "punpcklbw %%mm7, %%mm0 \n"\
00093         "paddw     %%mm6, %%mm0 \n"\
00094         "paddw     %%mm2, %%mm3 \n"\
00095         "paddw     %%mm3, %%mm0 \n"\
00096         "pand      %6   , %%mm0 \n"\
00097         "movq      %%mm0, %%mm3 \n"\
00098         "packuswb  %%mm3, %%mm3 \n"\
00099         "movd      %%mm3, (%1,%0) \n"\
00100         "add       %4, %0 \n"\
00101         "cmp       %5, %0 \n"\
00102         "jle 1b \n"\
00103         :"+r"(i)\
00104         :"r"(dst), "r"(top), "r"(src), "r"((x86_reg)bpp), "g"(end),\
00105          "m"(ff_pw_255)\
00106         :"memory"\
00107     );\
00108 }
00109 
00110 #define ABS3_MMX2\
00111         "psubw     %%mm5, %%mm7 \n"\
00112         "pmaxsw    %%mm7, %%mm5 \n"\
00113         "pxor      %%mm6, %%mm6 \n"\
00114         "pxor      %%mm7, %%mm7 \n"\
00115         "psubw     %%mm3, %%mm6 \n"\
00116         "psubw     %%mm4, %%mm7 \n"\
00117         "pmaxsw    %%mm6, %%mm3 \n"\
00118         "pmaxsw    %%mm7, %%mm4 \n"\
00119         "pxor      %%mm7, %%mm7 \n"
00120 
00121 #define ABS3_SSSE3\
00122         "pabsw     %%mm3, %%mm3 \n"\
00123         "pabsw     %%mm4, %%mm4 \n"\
00124         "pabsw     %%mm5, %%mm5 \n"
00125 
00126 PAETH(mmx2, ABS3_MMX2)
00127 #if HAVE_SSSE3
00128 PAETH(ssse3, ABS3_SSSE3)
00129 #endif
00130 
00131 void ff_png_init_mmx(PNGDecContext *s)
00132 {
00133     int mm_flags = av_get_cpu_flags();
00134 
00135     if (mm_flags & AV_CPU_FLAG_MMX2) {
00136         s->add_bytes_l2 = add_bytes_l2_mmx;
00137         s->add_paeth_prediction = add_paeth_prediction_mmx2;
00138 #if HAVE_SSSE3
00139         if (mm_flags & AV_CPU_FLAG_SSSE3)
00140             s->add_paeth_prediction = add_paeth_prediction_ssse3;
00141 #endif
00142     }
00143 }

Generated on Fri Feb 22 2013 07:24:30 for FFmpeg by  doxygen 1.7.1