[xiph-commits] r14722 - trunk/theora/lib/dec/x86_vc
giles at svn.xiph.org
giles at svn.xiph.org
Sat Apr 12 23:28:01 PDT 2008
Author: giles
Date: 2008-04-12 23:28:00 -0700 (Sat, 12 Apr 2008)
New Revision: 14722
Modified:
trunk/theora/lib/dec/x86_vc/mmxfrag.c
trunk/theora/lib/dec/x86_vc/mmxidct.c
trunk/theora/lib/dec/x86_vc/mmxloopfilter.c
trunk/theora/lib/dec/x86_vc/mmxstate.c
trunk/theora/lib/dec/x86_vc/x86int.h
Log:
Remove duplicate code in the new MSVC inline asm. Patch from Andrew Chew.
Modified: trunk/theora/lib/dec/x86_vc/mmxfrag.c
===================================================================
--- trunk/theora/lib/dec/x86_vc/mmxfrag.c 2008-04-13 06:23:04 UTC (rev 14721)
+++ trunk/theora/lib/dec/x86_vc/mmxfrag.c 2008-04-13 06:28:00 UTC (rev 14722)
@@ -211,220 +211,4 @@
_asm { emms }
}
-#endif
-
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id:
-
- ********************************************************************/
-#include "../../internal.h"
-
-/* ------------------------------------------------------------------------
- MMX reconstruction fragment routines for Visual Studio.
- Tested with VS2005. Should compile for VS2003 and VC6 as well.
-
- Initial implementation 2007 by Nils Pipenbrinck.
- ---------------------------------------------------------------------*/
-
-#if defined(USE_ASM)
-
-void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
- const ogg_int16_t *_residue){
- /* ---------------------------------------------------------------------
- This function does the inter reconstruction step with 8 iterations
- unrolled. The iteration for each instruction is noted by the #id in the
- comments (in case you want to reconstruct it)
- --------------------------------------------------------------------- */
- _asm{
- mov edi, [_residue] /* load residue ptr */
- mov eax, 0x00800080 /* generate constant */
- mov ebx, [_dst_ystride] /* load dst-stride */
- mov edx, [_dst] /* load dest pointer */
-
- /* unrolled loop begins here */
-
- movd mm0, eax /* load constant */
- movq mm1, [edi+ 8*0] /* #1 load low residue */
- movq mm2, [edi+ 8*1] /* #1 load high residue */
- punpckldq mm0, mm0 /* build constant */
- movq mm3, [edi+ 8*2] /* #2 load low residue */
- movq mm4, [edi+ 8*3] /* #2 load high residue */
- movq mm5, [edi+ 8*4] /* #3 load low residue */
- movq mm6, [edi+ 8*5] /* #3 load high residue */
- paddsw mm1, mm0 /* #1 bias low residue */
- paddsw mm2, mm0 /* #1 bias high residue */
- packuswb mm1, mm2 /* #1 pack to byte */
- paddsw mm3, mm0 /* #2 bias low residue */
- paddsw mm4, mm0 /* #2 bias high residue */
- packuswb mm3, mm4 /* #2 pack to byte */
- paddsw mm5, mm0 /* #3 bias low residue */
- paddsw mm6, mm0 /* #3 bias high residue */
- packuswb mm5, mm6 /* #3 pack to byte */
- movq [edx], mm1 /* #1 write row */
- movq [edx + ebx], mm3 /* #2 write row */
- movq [edx + ebx*2], mm5 /* #3 write row */
- movq mm1, [edi+ 8*6] /* #4 load low residue */
- lea ecx, [ebx + ebx*2] /* make dst_ystride * 3 */
- movq mm2, [edi+ 8*7] /* #4 load high residue */
- movq mm3, [edi+ 8*8] /* #5 load low residue */
- lea esi, [ebx*4 + ebx] /* make dst_ystride * 5 */
- movq mm4, [edi+ 8*9] /* #5 load high residue */
- movq mm5, [edi+ 8*10] /* #6 load low residue */
- lea eax, [ecx*2 + ebx] /* make dst_ystride * 7 */
- movq mm6, [edi+ 8*11] /* #6 load high residue */
- paddsw mm1, mm0 /* #4 bias low residue */
- paddsw mm2, mm0 /* #4 bias high residue */
- packuswb mm1, mm2 /* #4 pack to byte */
- paddsw mm3, mm0 /* #5 bias low residue */
- paddsw mm4, mm0 /* #5 bias high residue */
- packuswb mm3, mm4 /* #5 pack to byte */
- paddsw mm5, mm0 /* #6 bias low residue */
- paddsw mm6, mm0 /* #6 bias high residue */
- packuswb mm5, mm6 /* #6 pack to byte */
- movq [edx + ecx], mm1 /* #4 write row */
- movq [edx + ebx*4], mm3 /* #5 write row */
- movq [edx + esi], mm5 /* #6 write row */
- movq mm1, [edi+ 8*12] /* #7 load low residue */
- movq mm2, [edi+ 8*13] /* #7 load high residue */
- movq mm3, [edi+ 8*14] /* #8 load low residue */
- movq mm4, [edi+ 8*15] /* #8 load high residue */
- paddsw mm1, mm0 /* #7 bias low residue */
- paddsw mm2, mm0 /* #7 bias high residue */
- packuswb mm1, mm2 /* #7 pack to byte */
- paddsw mm3, mm0 /* #8 bias low residue */
- paddsw mm4, mm0 /* #8 bias high residue */
- packuswb mm3, mm4 /* #8 pack to byte */
- movq [edx + ecx*2], mm1 /* #7 write row */
- movq [edx + eax], mm3 /* #8 write row */
- }
-}
-
-
-
-void oc_frag_recon_inter_mmx (unsigned char *_dst, int _dst_ystride,
- const unsigned char *_src, int _src_ystride, const ogg_int16_t *_residue){
- /* ---------------------------------------------------------------------
- This function does the inter reconstruction step with two iterations
- running in parallel to hide some load-latencies and break the dependency
- chains. The iteration for each instruction is noted by the #id in the
- comments (in case you want to reconstruct it)
- --------------------------------------------------------------------- */
- _asm{
- pxor mm0, mm0 /* generate constant 0 */
- mov esi, [_src]
- mov edi, [_residue]
- mov eax, [_src_ystride]
- mov edx, [_dst]
- mov ebx, [_dst_ystride]
- mov ecx, 4
-
- align 16
-
-nextchunk:
- movq mm3, [esi] /* #1 load source */
- movq mm1, [edi+0] /* #1 load residium low */
- movq mm2, [edi+8] /* #1 load residium high */
- movq mm7, [esi+eax] /* #2 load source */
- movq mm4, mm3 /* #1 get copy of src */
- movq mm5, [edi+16] /* #2 load residium low */
- punpckhbw mm4, mm0 /* #1 expand high source */
- movq mm6, [edi+24] /* #2 load residium high */
- punpcklbw mm3, mm0 /* #1 expand low source */
- paddsw mm4, mm2 /* #1 add residium high */
- movq mm2, mm7 /* #2 get copy of src */
- paddsw mm3, mm1 /* #1 add residium low */
- punpckhbw mm2, mm0 /* #2 expand high source */
- packuswb mm3, mm4 /* #1 final row pixels */
- punpcklbw mm7, mm0 /* #2 expand low source */
- movq [edx], mm3 /* #1 write row */
- paddsw mm2, mm6 /* #2 add residium high */
- add edi, 32 /* residue += 4 */
- paddsw mm7, mm5 /* #2 add residium low */
- sub ecx, 1 /* update loop counter */
- packuswb mm7, mm2 /* #2 final row */
- lea esi, [esi+eax*2] /* src += stride * 2 */
- movq [edx + ebx], mm7 /* #2 write row */
- lea edx, [edx+ebx*2] /* dst += stride * 2 */
- jne nextchunk
- }
-}
-
-
-void oc_frag_recon_inter2_mmx(unsigned char *_dst, int _dst_ystride,
- const unsigned char *_src1, int _src1_ystride, const unsigned char *_src2,
- int _src2_ystride,const ogg_int16_t *_residue){
- /* ---------------------------------------------------------------------
- This function does the inter2 reconstruction step.The building of the
- average is done with a bit-twiddeling trick to avoid excessive register
- copy work during byte to word conversion.
-
- average = (a & b) + (((a ^ b) & 0xfe) >> 1);
-
- (shown for a single byte; it's done with 8 of them at a time)
-
- Slightly faster than the obvious method using add and shift, but not
- earthshaking improvement either.
-
- If anyone comes up with a way that produces bit-identical outputs
- using the pavgb instruction let me know and I'll do the 3dnow codepath.
- --------------------------------------------------------------------- */
- _asm{
- mov eax, 0xfefefefe
- mov esi, [_src1]
- mov edi, [_src2]
- movd mm1, eax
- mov ebx, [_residue]
- mov edx, [_dst]
- mov eax, [_dst_ystride]
- punpckldq mm1, mm1 /* replicate lsb32 */
- mov ecx, 8 /* init loop counter */
- pxor mm0, mm0 /* constant zero */
- sub edx, eax /* dst -= dst_stride */
-
- align 16
-
-nextrow:
- movq mm2, [esi] /* load source1 */
- movq mm3, [edi] /* load source2 */
- movq mm5, [ebx + 0] /* load lower residue */
- movq mm6, [ebx + 8] /* load higer residue */
- add esi, _src1_ystride /* src1 += src1_stride */
- add edi, _src2_ystride /* src2 += src1_stride */
- movq mm4, mm2 /* get copy of source1 */
- pand mm2, mm3 /* s1 & s2 (avg part) */
- pxor mm3, mm4 /* s1 ^ s2 (avg part) */
- add ebx, 16 /* residue++ */
- pand mm3, mm1 /* mask out low bits */
- psrlq mm3, 1 /* shift xor avg-part */
- paddd mm3, mm2 /* build final average */
- add edx, eax /* dst += dst_stride */
- movq mm2, mm3 /* get copy of average */
- punpckhbw mm3, mm0 /* average high */
- punpcklbw mm2, mm0 /* average low */
- paddsw mm3, mm6 /* high + residue */
- paddsw mm2, mm5 /* low + residue */
- sub ecx, 1 /* update loop counter */
- packuswb mm2, mm3 /* pack and saturate */
- movq [edx], mm2 /* write row */
- jne nextrow
- }
-}
-
-void oc_restore_fpu_mmx(void){
- _asm { emms }
-}
-
-#endif
-
+#endif
\ No newline at end of file
Modified: trunk/theora/lib/dec/x86_vc/mmxidct.c
===================================================================
--- trunk/theora/lib/dec/x86_vc/mmxidct.c 2008-04-13 06:23:04 UTC (rev 14721)
+++ trunk/theora/lib/dec/x86_vc/mmxidct.c 2008-04-13 06:28:00 UTC (rev 14722)
@@ -1003,1012 +1003,4 @@
}
}
-#endif
-
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id:
-
- ********************************************************************/
-
-/* -------------------------------------------------------------------
- MMX based IDCT for the theora codec.
-
- Originally written by Rudolf Marek, based on code from On2's VP3.
- Converted to Visual Studio inline assembly by Nils Pipenbrinck.
-
- ---------------------------------------------------------------------*/
-#if defined(USE_ASM)
-
-#include <ogg/ogg.h>
-#include "../dct.h"
-#include "../idct.h"
-#include "x86int.h"
-
-/*A table of constants used by the MMX routines.*/
-static const __declspec(align(16)) ogg_uint16_t
- OC_IDCT_CONSTS[(7+1)*4]={
- (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
- (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
- (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
- (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
- (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
- (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
- (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
- (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
- (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
- (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
- (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
- (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
- (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
- (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
- 8, 8, 8, 8
-};
-
-
-void oc_idct8x8_10_mmx(ogg_int16_t _y[64]){
- _asm {
- mov edx, [_y]
- mov eax, offset OC_IDCT_CONSTS
- movq mm2, [edx + 30H]
- movq mm6, [eax + 10H]
- movq mm4, mm2
- movq mm7, [edx + 18H]
- pmulhw mm4, mm6
- movq mm1, [eax + 20H]
- pmulhw mm6, mm7
- movq mm5, mm1
- pmulhw mm1, mm2
- movq mm3, [edx + 10H]
- pmulhw mm5, mm7
- movq mm0, [eax]
- paddw mm4, mm2
- paddw mm6, mm7
- paddw mm2, mm1
- movq mm1, [edx + 38H]
- paddw mm7, mm5
- movq mm5, mm0
- pmulhw mm0, mm3
- paddw mm4, mm7
- pmulhw mm5, mm1
- movq mm7, [eax + 30H]
- psubw mm6, mm2
- paddw mm0, mm3
- pmulhw mm3, mm7
- movq mm2, [edx + 20H]
- pmulhw mm7, mm1
- paddw mm5, mm1
- movq mm1, mm2
- pmulhw mm2, [eax + 08H]
- psubw mm3, mm5
- movq mm5, [edx + 28H]
- paddw mm0, mm7
- movq mm7, mm5
- psubw mm0, mm4
- pmulhw mm5, [eax + 08H]
- paddw mm2, mm1
- pmulhw mm1, [eax + 28H]
- paddw mm4, mm4
- paddw mm4, mm0
- psubw mm3, mm6
- paddw mm5, mm7
- paddw mm6, mm6
- pmulhw mm7, [eax + 28H]
- paddw mm6, mm3
- movq [edx + 10H], mm4
- psubw mm1, mm5
- movq mm4, [eax + 18H]
- movq mm5, mm3
- pmulhw mm3, mm4
- paddw mm7, mm2
- movq [edx + 20H], mm6
- movq mm2, mm0
- movq mm6, [edx]
- pmulhw mm0, mm4
- paddw mm5, mm3
- movq mm3, [edx + 08H]
- psubw mm5, mm1
- paddw mm2, mm0
- psubw mm6, mm3
- movq mm0, mm6
- pmulhw mm6, mm4
- paddw mm3, mm3
- paddw mm1, mm1
- paddw mm3, mm0
- paddw mm1, mm5
- pmulhw mm4, mm3
- paddw mm6, mm0
- psubw mm6, mm2
- paddw mm2, mm2
- movq mm0, [edx + 10H]
- paddw mm2, mm6
- paddw mm4, mm3
- psubw mm2, mm1
- movq mm3, [edx + 20H]
- psubw mm4, mm7
- paddw mm1, mm1
- paddw mm7, mm7
- paddw mm1, mm2
- paddw mm7, mm4
- psubw mm4, mm3
- paddw mm3, mm3
- psubw mm6, mm5
- paddw mm5, mm5
- paddw mm3, mm4
- paddw mm5, mm6
- psubw mm7, mm0
- paddw mm0, mm0
- movq [edx + 10H], mm1
- paddw mm0, mm7
- movq mm1, mm4
- punpcklwd mm4, mm5
- movq [edx], mm0
- punpckhwd mm1, mm5
- movq mm0, mm6
- punpcklwd mm6, mm7
- movq mm5, mm4
- punpckldq mm4, mm6
- punpckhdq mm5, mm6
- movq mm6, mm1
- movq [edx + 08H], mm4
- punpckhwd mm0, mm7
- movq [edx + 18H], mm5
- punpckhdq mm6, mm0
- movq mm4, [edx]
- punpckldq mm1, mm0
- movq mm5, [edx + 10H]
- movq mm0, mm4
- movq [edx + 38H], mm6
- punpcklwd mm0, mm5
- movq [edx + 28H], mm1
- punpckhwd mm4, mm5
- movq mm5, mm2
- punpcklwd mm2, mm3
- movq mm1, mm0
- punpckldq mm0, mm2
- punpckhdq mm1, mm2
- movq mm2, mm4
- movq [edx], mm0
- punpckhwd mm5, mm3
- movq [edx + 10H], mm1
- punpckhdq mm4, mm5
- punpckldq mm2, mm5
- movq [edx + 30H], mm4
- movq [edx + 20H], mm2
- movq mm2, [edx + 70H]
- movq mm6, [eax + 10H]
- movq mm4, mm2
- movq mm7, [edx + 58H]
- pmulhw mm4, mm6
- movq mm1, [eax + 20H]
- pmulhw mm6, mm7
- movq mm5, mm1
- pmulhw mm1, mm2
- movq mm3, [edx + 50H]
- pmulhw mm5, mm7
- movq mm0, [eax]
- paddw mm4, mm2
- paddw mm6, mm7
- paddw mm2, mm1
- movq mm1, [edx + 78H]
- paddw mm7, mm5
- movq mm5, mm0
- pmulhw mm0, mm3
- paddw mm4, mm7
- pmulhw mm5, mm1
- movq mm7, [eax + 30H]
- psubw mm6, mm2
- paddw mm0, mm3
- pmulhw mm3, mm7
- movq mm2, [edx + 60H]
- pmulhw mm7, mm1
- paddw mm5, mm1
- movq mm1, mm2
- pmulhw mm2, [eax + 08H]
- psubw mm3, mm5
- movq mm5, [edx + 68H]
- paddw mm0, mm7
- movq mm7, mm5
- psubw mm0, mm4
- pmulhw mm5, [eax + 08H]
- paddw mm2, mm1
- pmulhw mm1, [eax + 28H]
- paddw mm4, mm4
- paddw mm4, mm0
- psubw mm3, mm6
- paddw mm5, mm7
- paddw mm6, mm6
- pmulhw mm7, [eax + 28H]
- paddw mm6, mm3
- movq [edx + 50H], mm4
- psubw mm1, mm5
- movq mm4, [eax + 18H]
- movq mm5, mm3
- pmulhw mm3, mm4
- paddw mm7, mm2
- movq [edx + 60H], mm6
- movq mm2, mm0
- movq mm6, [edx + 40H]
- pmulhw mm0, mm4
- paddw mm5, mm3
- movq mm3, [edx + 48H]
- psubw mm5, mm1
- paddw mm2, mm0
- psubw mm6, mm3
- movq mm0, mm6
- pmulhw mm6, mm4
- paddw mm3, mm3
- paddw mm1, mm1
- paddw mm3, mm0
- paddw mm1, mm5
- pmulhw mm4, mm3
- paddw mm6, mm0
- psubw mm6, mm2
- paddw mm2, mm2
- movq mm0, [edx + 50H]
- paddw mm2, mm6
- paddw mm4, mm3
- psubw mm2, mm1
- movq mm3, [edx + 60H]
- psubw mm4, mm7
- paddw mm1, mm1
- paddw mm7, mm7
- paddw mm1, mm2
- paddw mm7, mm4
- psubw mm4, mm3
- paddw mm3, mm3
- psubw mm6, mm5
- paddw mm5, mm5
- paddw mm3, mm4
- paddw mm5, mm6
- psubw mm7, mm0
- paddw mm0, mm0
- movq [edx + 50H], mm1
- paddw mm0, mm7
- movq mm1, mm4
- punpcklwd mm4, mm5
- movq [edx + 40H], mm0
- punpckhwd mm1, mm5
- movq mm0, mm6
- punpcklwd mm6, mm7
- movq mm5, mm4
- punpckldq mm4, mm6
- punpckhdq mm5, mm6
- movq mm6, mm1
- movq [edx + 48H], mm4
- punpckhwd mm0, mm7
- movq [edx + 58H], mm5
- punpckhdq mm6, mm0
- movq mm4, [edx + 40H]
- punpckldq mm1, mm0
- movq mm5, [edx + 50H]
- movq mm0, mm4
- movq [edx + 78H], mm6
- punpcklwd mm0, mm5
- movq [edx + 68H], mm1
- punpckhwd mm4, mm5
- movq mm5, mm2
- punpcklwd mm2, mm3
- movq mm1, mm0
- punpckldq mm0, mm2
- punpckhdq mm1, mm2
- movq mm2, mm4
- movq [edx + 40H], mm0
- punpckhwd mm5, mm3
- movq [edx + 50H], mm1
- punpckhdq mm4, mm5
- punpckldq mm2, mm5
- movq [edx + 70H], mm4
- movq [edx + 60H], mm2
- movq mm2, [edx + 30H]
- movq mm6, [eax + 10H]
- movq mm4, mm2
- movq mm7, [edx + 50H]
- pmulhw mm4, mm6
- movq mm1, [eax + 20H]
- pmulhw mm6, mm7
- movq mm5, mm1
- pmulhw mm1, mm2
- movq mm3, [edx + 10H]
- pmulhw mm5, mm7
- movq mm0, [eax]
- paddw mm4, mm2
- paddw mm6, mm7
- paddw mm2, mm1
- movq mm1, [edx + 70H]
- paddw mm7, mm5
- movq mm5, mm0
- pmulhw mm0, mm3
- paddw mm4, mm7
- pmulhw mm5, mm1
- movq mm7, [eax + 30H]
- psubw mm6, mm2
- paddw mm0, mm3
- pmulhw mm3, mm7
- movq mm2, [edx + 20H]
- pmulhw mm7, mm1
- paddw mm5, mm1
- movq mm1, mm2
- pmulhw mm2, [eax + 08H]
- psubw mm3, mm5
- movq mm5, [edx + 60H]
- paddw mm0, mm7
- movq mm7, mm5
- psubw mm0, mm4
- pmulhw mm5, [eax + 08H]
- paddw mm2, mm1
- pmulhw mm1, [eax + 28H]
- paddw mm4, mm4
- paddw mm4, mm0
- psubw mm3, mm6
- paddw mm5, mm7
- paddw mm6, mm6
- pmulhw mm7, [eax + 28H]
- paddw mm6, mm3
- movq [edx + 10H], mm4
- psubw mm1, mm5
- movq mm4, [eax + 18H]
- movq mm5, mm3
- pmulhw mm3, mm4
- paddw mm7, mm2
- movq [edx + 20H], mm6
- movq mm2, mm0
- movq mm6, [edx]
- pmulhw mm0, mm4
- paddw mm5, mm3
- movq mm3, [edx + 40H]
- psubw mm5, mm1
- paddw mm2, mm0
- psubw mm6, mm3
- movq mm0, mm6
- pmulhw mm6, mm4
- paddw mm3, mm3
- paddw mm1, mm1
- paddw mm3, mm0
- paddw mm1, mm5
- pmulhw mm4, mm3
- paddw mm6, mm0
- psubw mm6, mm2
- paddw mm2, mm2
- movq mm0, [edx + 10H]
- paddw mm2, mm6
- paddw mm4, mm3
- psubw mm2, mm1
- paddw mm2, [eax + 38H]
- paddw mm1, mm1
- paddw mm1, mm2
- psraw mm2, 4
- psubw mm4, mm7
- psraw mm1, 4
- movq mm3, [edx + 20H]
- paddw mm7, mm7
- movq [edx + 20H], mm2
- paddw mm7, mm4
- movq [edx + 10H], mm1
- psubw mm4, mm3
- paddw mm4, [eax + 38H]
- paddw mm3, mm3
- paddw mm3, mm4
- psraw mm4, 4
- psubw mm6, mm5
- psraw mm3, 4
- paddw mm6, [eax + 38H]
- paddw mm5, mm5
- paddw mm5, mm6
- psraw mm6, 4
- movq [edx + 40H], mm4
- psraw mm5, 4
- movq [edx + 30H], mm3
- psubw mm7, mm0
- paddw mm7, [eax + 38H]
- paddw mm0, mm0
- paddw mm0, mm7
- psraw mm7, 4
- movq [edx + 60H], mm6
- psraw mm0, 4
- movq [edx + 50H], mm5
- movq [edx + 70H], mm7
- movq [edx], mm0
- movq mm2, [edx + 38H]
- movq mm6, [eax + 10H]
- movq mm4, mm2
- movq mm7, [edx + 58H]
- pmulhw mm4, mm6
- movq mm1, [eax + 20H]
- pmulhw mm6, mm7
- movq mm5, mm1
- pmulhw mm1, mm2
- movq mm3, [edx + 18H]
- pmulhw mm5, mm7
- movq mm0, [eax]
- paddw mm4, mm2
- paddw mm6, mm7
- paddw mm2, mm1
- movq mm1, [edx + 78H]
- paddw mm7, mm5
- movq mm5, mm0
- pmulhw mm0, mm3
- paddw mm4, mm7
- pmulhw mm5, mm1
- movq mm7, [eax + 30H]
- psubw mm6, mm2
- paddw mm0, mm3
- pmulhw mm3, mm7
- movq mm2, [edx + 28H]
- pmulhw mm7, mm1
- paddw mm5, mm1
- movq mm1, mm2
- pmulhw mm2, [eax + 08H]
- psubw mm3, mm5
- movq mm5, [edx + 68H]
- paddw mm0, mm7
- movq mm7, mm5
- psubw mm0, mm4
- pmulhw mm5, [eax + 08H]
- paddw mm2, mm1
- pmulhw mm1, [eax + 28H]
- paddw mm4, mm4
- paddw mm4, mm0
- psubw mm3, mm6
- paddw mm5, mm7
- paddw mm6, mm6
- pmulhw mm7, [eax + 28H]
- paddw mm6, mm3
- movq [edx + 18H], mm4
- psubw mm1, mm5
- movq mm4, [eax + 18H]
- movq mm5, mm3
- pmulhw mm3, mm4
- paddw mm7, mm2
- movq [edx + 28H], mm6
- movq mm2, mm0
- movq mm6, [edx + 08H]
- pmulhw mm0, mm4
- paddw mm5, mm3
- movq mm3, [edx + 48H]
- psubw mm5, mm1
- paddw mm2, mm0
- psubw mm6, mm3
- movq mm0, mm6
- pmulhw mm6, mm4
- paddw mm3, mm3
- paddw mm1, mm1
- paddw mm3, mm0
- paddw mm1, mm5
- pmulhw mm4, mm3
- paddw mm6, mm0
- psubw mm6, mm2
- paddw mm2, mm2
- movq mm0, [edx + 18H]
- paddw mm2, mm6
- paddw mm4, mm3
- psubw mm2, mm1
- paddw mm2, [eax + 38H]
- paddw mm1, mm1
- paddw mm1, mm2
- psraw mm2, 4
- psubw mm4, mm7
- psraw mm1, 4
- movq mm3, [edx + 28H]
- paddw mm7, mm7
- movq [edx + 28H], mm2
- paddw mm7, mm4
- movq [edx + 18H], mm1
- psubw mm4, mm3
- paddw mm4, [eax + 38H]
- paddw mm3, mm3
- paddw mm3, mm4
- psraw mm4, 4
- psubw mm6, mm5
- psraw mm3, 4
- paddw mm6, [eax + 38H]
- paddw mm5, mm5
- paddw mm5, mm6
- psraw mm6, 4
- movq [edx + 48H], mm4
- psraw mm5, 4
- movq [edx + 38H], mm3
- psubw mm7, mm0
- paddw mm7, [eax + 38H]
- paddw mm0, mm0
- paddw mm0, mm7
- psraw mm7, 4
- movq [edx + 68H], mm6
- psraw mm0, 4
- movq [edx + 58H], mm5
- movq [edx + 78H], mm7
- movq [edx + 08H], mm0
- /* emms */
- }
-}
-
-
-void oc_idct8x8_mmx(ogg_int16_t _y[64]){
- _asm {
- mov edx, [_y]
- mov eax, offset OC_IDCT_CONSTS
- movq mm2, [edx + 30H]
- movq mm6, [eax + 10H]
- movq mm4, mm2
- movq mm7, [edx + 18H]
- pmulhw mm4, mm6
- movq mm1, [eax + 20H]
- pmulhw mm6, mm7
- movq mm5, mm1
- pmulhw mm1, mm2
- movq mm3, [edx + 10H]
- pmulhw mm5, mm7
- movq mm0, [eax]
- paddw mm4, mm2
- paddw mm6, mm7
- paddw mm2, mm1
- movq mm1, [edx + 38H]
- paddw mm7, mm5
- movq mm5, mm0
- pmulhw mm0, mm3
- paddw mm4, mm7
- pmulhw mm5, mm1
- movq mm7, [eax + 30H]
- psubw mm6, mm2
- paddw mm0, mm3
- pmulhw mm3, mm7
- movq mm2, [edx + 20H]
- pmulhw mm7, mm1
- paddw mm5, mm1
- movq mm1, mm2
- pmulhw mm2, [eax + 08H]
- psubw mm3, mm5
- movq mm5, [edx + 28H]
- paddw mm0, mm7
- movq mm7, mm5
- psubw mm0, mm4
- pmulhw mm5, [eax + 08H]
- paddw mm2, mm1
- pmulhw mm1, [eax + 28H]
- paddw mm4, mm4
- paddw mm4, mm0
- psubw mm3, mm6
- paddw mm5, mm7
- paddw mm6, mm6
- pmulhw mm7, [eax + 28H]
- paddw mm6, mm3
- movq [edx + 10H], mm4
- psubw mm1, mm5
- movq mm4, [eax + 18H]
- movq mm5, mm3
- pmulhw mm3, mm4
- paddw mm7, mm2
- movq [edx + 20H], mm6
- movq mm2, mm0
- movq mm6, [edx]
- pmulhw mm0, mm4
- paddw mm5, mm3
- movq mm3, [edx + 08H]
- psubw mm5, mm1
- paddw mm2, mm0
- psubw mm6, mm3
- movq mm0, mm6
- pmulhw mm6, mm4
- paddw mm3, mm3
- paddw mm1, mm1
- paddw mm3, mm0
- paddw mm1, mm5
- pmulhw mm4, mm3
- paddw mm6, mm0
- psubw mm6, mm2
- paddw mm2, mm2
- movq mm0, [edx + 10H]
- paddw mm2, mm6
- paddw mm4, mm3
- psubw mm2, mm1
- movq mm3, [edx + 20H]
- psubw mm4, mm7
- paddw mm1, mm1
- paddw mm7, mm7
- paddw mm1, mm2
- paddw mm7, mm4
- psubw mm4, mm3
- paddw mm3, mm3
- psubw mm6, mm5
- paddw mm5, mm5
- paddw mm3, mm4
- paddw mm5, mm6
- psubw mm7, mm0
- paddw mm0, mm0
- movq [edx + 10H], mm1
- paddw mm0, mm7
- movq mm1, mm4
- punpcklwd mm4, mm5
- movq [edx], mm0
- punpckhwd mm1, mm5
- movq mm0, mm6
- punpcklwd mm6, mm7
- movq mm5, mm4
- punpckldq mm4, mm6
- punpckhdq mm5, mm6
- movq mm6, mm1
- movq [edx + 08H], mm4
- punpckhwd mm0, mm7
- movq [edx + 18H], mm5
- punpckhdq mm6, mm0
- movq mm4, [edx]
- punpckldq mm1, mm0
- movq mm5, [edx + 10H]
- movq mm0, mm4
- movq [edx + 38H], mm6
- punpcklwd mm0, mm5
- movq [edx + 28H], mm1
- punpckhwd mm4, mm5
- movq mm5, mm2
- punpcklwd mm2, mm3
- movq mm1, mm0
- punpckldq mm0, mm2
- punpckhdq mm1, mm2
- movq mm2, mm4
- movq [edx], mm0
- punpckhwd mm5, mm3
- movq [edx + 10H], mm1
- punpckhdq mm4, mm5
- punpckldq mm2, mm5
- movq [edx + 30H], mm4
- movq [edx + 20H], mm2
- movq mm2, [edx + 70H]
- movq mm6, [eax + 10H]
- movq mm4, mm2
- movq mm7, [edx + 58H]
- pmulhw mm4, mm6
- movq mm1, [eax + 20H]
- pmulhw mm6, mm7
- movq mm5, mm1
- pmulhw mm1, mm2
- movq mm3, [edx + 50H]
- pmulhw mm5, mm7
- movq mm0, [eax]
- paddw mm4, mm2
- paddw mm6, mm7
- paddw mm2, mm1
- movq mm1, [edx + 78H]
- paddw mm7, mm5
- movq mm5, mm0
- pmulhw mm0, mm3
- paddw mm4, mm7
- pmulhw mm5, mm1
- movq mm7, [eax + 30H]
- psubw mm6, mm2
- paddw mm0, mm3
- pmulhw mm3, mm7
- movq mm2, [edx + 60H]
- pmulhw mm7, mm1
- paddw mm5, mm1
- movq mm1, mm2
- pmulhw mm2, [eax + 08H]
- psubw mm3, mm5
- movq mm5, [edx + 68H]
- paddw mm0, mm7
- movq mm7, mm5
- psubw mm0, mm4
- pmulhw mm5, [eax + 08H]
- paddw mm2, mm1
- pmulhw mm1, [eax + 28H]
- paddw mm4, mm4
- paddw mm4, mm0
- psubw mm3, mm6
- paddw mm5, mm7
- paddw mm6, mm6
- pmulhw mm7, [eax + 28H]
- paddw mm6, mm3
- movq [edx + 50H], mm4
- psubw mm1, mm5
- movq mm4, [eax + 18H]
- movq mm5, mm3
- pmulhw mm3, mm4
- paddw mm7, mm2
- movq [edx + 60H], mm6
- movq mm2, mm0
- movq mm6, [edx + 40H]
- pmulhw mm0, mm4
- paddw mm5, mm3
- movq mm3, [edx + 48H]
- psubw mm5, mm1
- paddw mm2, mm0
- psubw mm6, mm3
- movq mm0, mm6
- pmulhw mm6, mm4
- paddw mm3, mm3
- paddw mm1, mm1
- paddw mm3, mm0
- paddw mm1, mm5
- pmulhw mm4, mm3
- paddw mm6, mm0
- psubw mm6, mm2
- paddw mm2, mm2
- movq mm0, [edx + 50H]
- paddw mm2, mm6
- paddw mm4, mm3
- psubw mm2, mm1
- movq mm3, [edx + 60H]
- psubw mm4, mm7
- paddw mm1, mm1
- paddw mm7, mm7
- paddw mm1, mm2
- paddw mm7, mm4
- psubw mm4, mm3
- paddw mm3, mm3
- psubw mm6, mm5
- paddw mm5, mm5
- paddw mm3, mm4
- paddw mm5, mm6
- psubw mm7, mm0
- paddw mm0, mm0
- movq [edx + 50H], mm1
- paddw mm0, mm7
- movq mm1, mm4
- punpcklwd mm4, mm5
- movq [edx + 40H], mm0
- punpckhwd mm1, mm5
- movq mm0, mm6
- punpcklwd mm6, mm7
- movq mm5, mm4
- punpckldq mm4, mm6
- punpckhdq mm5, mm6
- movq mm6, mm1
- movq [edx + 48H], mm4
- punpckhwd mm0, mm7
- movq [edx + 58H], mm5
- punpckhdq mm6, mm0
- movq mm4, [edx + 40H]
- punpckldq mm1, mm0
- movq mm5, [edx + 50H]
- movq mm0, mm4
- movq [edx + 78H], mm6
- punpcklwd mm0, mm5
- movq [edx + 68H], mm1
- punpckhwd mm4, mm5
- movq mm5, mm2
- punpcklwd mm2, mm3
- movq mm1, mm0
- punpckldq mm0, mm2
- punpckhdq mm1, mm2
- movq mm2, mm4
- movq [edx + 40H], mm0
- punpckhwd mm5, mm3
- movq [edx + 50H], mm1
- punpckhdq mm4, mm5
- punpckldq mm2, mm5
- movq [edx + 70H], mm4
- movq [edx + 60H], mm2
- movq mm2, [edx + 30H]
- movq mm6, [eax + 10H]
- movq mm4, mm2
- movq mm7, [edx + 50H]
- pmulhw mm4, mm6
- movq mm1, [eax + 20H]
- pmulhw mm6, mm7
- movq mm5, mm1
- pmulhw mm1, mm2
- movq mm3, [edx + 10H]
- pmulhw mm5, mm7
- movq mm0, [eax]
- paddw mm4, mm2
- paddw mm6, mm7
- paddw mm2, mm1
- movq mm1, [edx + 70H]
- paddw mm7, mm5
- movq mm5, mm0
- pmulhw mm0, mm3
- paddw mm4, mm7
- pmulhw mm5, mm1
- movq mm7, [eax + 30H]
- psubw mm6, mm2
- paddw mm0, mm3
- pmulhw mm3, mm7
- movq mm2, [edx + 20H]
- pmulhw mm7, mm1
- paddw mm5, mm1
- movq mm1, mm2
- pmulhw mm2, [eax + 08H]
- psubw mm3, mm5
- movq mm5, [edx + 60H]
- paddw mm0, mm7
- movq mm7, mm5
- psubw mm0, mm4
- pmulhw mm5, [eax + 08H]
- paddw mm2, mm1
- pmulhw mm1, [eax + 28H]
- paddw mm4, mm4
- paddw mm4, mm0
- psubw mm3, mm6
- paddw mm5, mm7
- paddw mm6, mm6
- pmulhw mm7, [eax + 28H]
- paddw mm6, mm3
- movq [edx + 10H], mm4
- psubw mm1, mm5
- movq mm4, [eax + 18H]
- movq mm5, mm3
- pmulhw mm3, mm4
- paddw mm7, mm2
- movq [edx + 20H], mm6
- movq mm2, mm0
- movq mm6, [edx]
- pmulhw mm0, mm4
- paddw mm5, mm3
- movq mm3, [edx + 40H]
- psubw mm5, mm1
- paddw mm2, mm0
- psubw mm6, mm3
- movq mm0, mm6
- pmulhw mm6, mm4
- paddw mm3, mm3
- paddw mm1, mm1
- paddw mm3, mm0
- paddw mm1, mm5
- pmulhw mm4, mm3
- paddw mm6, mm0
- psubw mm6, mm2
- paddw mm2, mm2
- movq mm0, [edx + 10H]
- paddw mm2, mm6
- paddw mm4, mm3
- psubw mm2, mm1
- paddw mm2, [eax + 38H]
- paddw mm1, mm1
- paddw mm1, mm2
- psraw mm2, 4
- psubw mm4, mm7
- psraw mm1, 4
- movq mm3, [edx + 20H]
- paddw mm7, mm7
- movq [edx + 20H], mm2
- paddw mm7, mm4
- movq [edx + 10H], mm1
- psubw mm4, mm3
- paddw mm4, [eax + 38H]
- paddw mm3, mm3
- paddw mm3, mm4
- psraw mm4, 4
- psubw mm6, mm5
- psraw mm3, 4
- paddw mm6, [eax + 38H]
- paddw mm5, mm5
- paddw mm5, mm6
- psraw mm6, 4
- movq [edx + 40H], mm4
- psraw mm5, 4
- movq [edx + 30H], mm3
- psubw mm7, mm0
- paddw mm7, [eax + 38H]
- paddw mm0, mm0
- paddw mm0, mm7
- psraw mm7, 4
- movq [edx + 60H], mm6
- psraw mm0, 4
- movq [edx + 50H], mm5
- movq [edx + 70H], mm7
- movq [edx], mm0
- movq mm2, [edx + 38H]
- movq mm6, [eax + 10H]
- movq mm4, mm2
- movq mm7, [edx + 58H]
- pmulhw mm4, mm6
- movq mm1, [eax + 20H]
- pmulhw mm6, mm7
- movq mm5, mm1
- pmulhw mm1, mm2
- movq mm3, [edx + 18H]
- pmulhw mm5, mm7
- movq mm0, [eax]
- paddw mm4, mm2
- paddw mm6, mm7
- paddw mm2, mm1
- movq mm1, [edx + 78H]
- paddw mm7, mm5
- movq mm5, mm0
- pmulhw mm0, mm3
- paddw mm4, mm7
- pmulhw mm5, mm1
- movq mm7, [eax + 30H]
- psubw mm6, mm2
- paddw mm0, mm3
- pmulhw mm3, mm7
- movq mm2, [edx + 28H]
- pmulhw mm7, mm1
- paddw mm5, mm1
- movq mm1, mm2
- pmulhw mm2, [eax + 08H]
- psubw mm3, mm5
- movq mm5, [edx + 68H]
- paddw mm0, mm7
- movq mm7, mm5
- psubw mm0, mm4
- pmulhw mm5, [eax + 08H]
- paddw mm2, mm1
- pmulhw mm1, [eax + 28H]
- paddw mm4, mm4
- paddw mm4, mm0
- psubw mm3, mm6
- paddw mm5, mm7
- paddw mm6, mm6
- pmulhw mm7, [eax + 28H]
- paddw mm6, mm3
- movq [edx + 18H], mm4
- psubw mm1, mm5
- movq mm4, [eax + 18H]
- movq mm5, mm3
- pmulhw mm3, mm4
- paddw mm7, mm2
- movq [edx + 28H], mm6
- movq mm2, mm0
- movq mm6, [edx + 08H]
- pmulhw mm0, mm4
- paddw mm5, mm3
- movq mm3, [edx + 48H]
- psubw mm5, mm1
- paddw mm2, mm0
- psubw mm6, mm3
- movq mm0, mm6
- pmulhw mm6, mm4
- paddw mm3, mm3
- paddw mm1, mm1
- paddw mm3, mm0
- paddw mm1, mm5
- pmulhw mm4, mm3
- paddw mm6, mm0
- psubw mm6, mm2
- paddw mm2, mm2
- movq mm0, [edx + 18H]
- paddw mm2, mm6
- paddw mm4, mm3
- psubw mm2, mm1
- paddw mm2, [eax + 38H]
- paddw mm1, mm1
- paddw mm1, mm2
- psraw mm2, 4
- psubw mm4, mm7
- psraw mm1, 4
- movq mm3, [edx + 28H]
- paddw mm7, mm7
- movq [edx + 28H], mm2
- paddw mm7, mm4
- movq [edx + 18H], mm1
- psubw mm4, mm3
- paddw mm4, [eax + 38H]
- paddw mm3, mm3
- paddw mm3, mm4
- psraw mm4, 4
- psubw mm6, mm5
- psraw mm3, 4
- paddw mm6, [eax + 38H]
- paddw mm5, mm5
- paddw mm5, mm6
- psraw mm6, 4
- movq [edx + 48H], mm4
- psraw mm5, 4
- movq [edx + 38H], mm3
- psubw mm7, mm0
- paddw mm7, [eax + 38H]
- paddw mm0, mm0
- paddw mm0, mm7
- psraw mm7, 4
- movq [edx + 68H], mm6
- psraw mm0, 4
- movq [edx + 58H], mm5
- movq [edx + 78H], mm7
- movq [edx + 08H], mm0
- /* emms */
- }
-}
-
-#endif
-
+#endif
\ No newline at end of file
Modified: trunk/theora/lib/dec/x86_vc/mmxloopfilter.c
===================================================================
--- trunk/theora/lib/dec/x86_vc/mmxloopfilter.c 2008-04-13 06:23:04 UTC (rev 14721)
+++ trunk/theora/lib/dec/x86_vc/mmxloopfilter.c 2008-04-13 06:28:00 UTC (rev 14722)
@@ -352,17 +352,17 @@
while(frag<frag_end){
if(frag->coded){
if(frag>frag0){
- loop_filter_h(frag->buffer[_refi],iplane->ystride,ll);
+ loop_filter_h(frag->buffer[_refi],iplane->stride,ll);
}
if(frag0>frag_top){
- loop_filter_v(frag->buffer[_refi],iplane->ystride,ll);
+ loop_filter_v(frag->buffer[_refi],iplane->stride,ll);
}
if(frag+1<frag_end&&!(frag+1)->coded){
- loop_filter_h(frag->buffer[_refi]+8,iplane->ystride,ll);
+ loop_filter_h(frag->buffer[_refi]+8,iplane->stride,ll);
}
if(frag+fplane->nhfrags<frag_bot&&!(frag+fplane->nhfrags)->coded){
loop_filter_v((frag+fplane->nhfrags)->buffer[_refi],
- iplane->ystride,ll);
+ iplane->stride,ll);
}
}
frag++;
@@ -374,383 +374,4 @@
_mm_empty();
}
-#endif
-
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id:
-
- ********************************************************************/
-
-/* -------------------------------------------------------------------
- MMX based loop filter for the theora codec.
-
- Originally written by Rudolf Marek, based on code from On2's VP3.
- Converted to Visual Studio inline assembly by Nils Pipenbrinck.
-
- Note: I can't test these since my example files never get into the
- loop filters, but the code has been converted semi-automatic from
- the GCC sources, so it ought to work.
- ---------------------------------------------------------------------*/
-#include "../../internal.h"
-#include "x86int.h"
-#include <mmintrin.h>
-
-#if defined(USE_ASM)
-
-
-
-static void loop_filter_v(unsigned char *_pix,int _ystride,
- const ogg_int16_t *_ll){
- _asm {
- mov eax, [_pix]
- mov edx, [_ystride]
- mov ebx, [_ll]
-
- /* _pix -= ystride */
- sub eax, edx
- /* mm0=0 */
- pxor mm0, mm0
- /* _pix -= ystride */
- sub eax, edx
- /* esi=_ystride*3 */
- lea esi, [edx + edx*2]
-
- /* mm7=_pix[0...8]*/
- movq mm7, [eax]
- /* mm4=_pix[0...8+_ystride*3]*/
- movq mm4, [eax + esi]
- /* mm6=_pix[0...8]*/
- movq mm6, mm7
- /* Expand unsigned _pix[0...3] to 16 bits.*/
- punpcklbw mm6, mm0
- movq mm5, mm4
- /* Expand unsigned _pix[4...7] to 16 bits.*/
- punpckhbw mm7, mm0
- punpcklbw mm4, mm0
- /* Expand other arrays too.*/
- punpckhbw mm5, mm0
- /*mm7:mm6=_p[0...7]-_p[0...7+_ystride*3]:*/
- psubw mm6, mm4
- psubw mm7, mm5
- /*mm5=mm4=_pix[0...7+_ystride]*/
- movq mm4, [eax + edx]
- /*mm1=mm3=mm2=_pix[0..7]+_ystride*2]*/
- movq mm2, [eax + edx*2]
- movq mm5, mm4
- movq mm3, mm2
- movq mm1, mm2
- /*Expand these arrays.*/
- punpckhbw mm5, mm0
- punpcklbw mm4, mm0
- punpckhbw mm3, mm0
- punpcklbw mm2, mm0
- pcmpeqw mm0, mm0
- /*mm0=3 3 3 3
- mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/
- psubw mm3, mm5
- psrlw mm0, 14
- psubw mm2, mm4
- /*Scale by 3.*/
- pmullw mm3, mm0
- pmullw mm2, mm0
- /*mm0=4 4 4 4
- f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+
- 3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/
- psrlw mm0, 1
- paddw mm3, mm7
- psllw mm0, 2
- paddw mm2, mm6
- /*Add 4.*/
- paddw mm3, mm0
- paddw mm2, mm0
- /*"Divide" by 8.*/
- psraw mm3, 3
- psraw mm2, 3
- /*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/
- /*Free up mm5.*/
- packuswb mm4, mm5
- /*mm0=L L L L*/
- movq mm0, [ebx]
- /*if(R_i<-2L||R_i>2L)R_i=0:*/
- movq mm5, mm2
- pxor mm6, mm6
- movq mm7, mm0
- psubw mm6, mm0
- psllw mm7, 1
- psllw mm6, 1
- /*mm2==R_3 R_2 R_1 R_0*/
- /*mm5==R_3 R_2 R_1 R_0*/
- /*mm6==-2L -2L -2L -2L*/
- /*mm7==2L 2L 2L 2L*/
- pcmpgtw mm7, mm2
- pcmpgtw mm5, mm6
- pand mm2, mm7
- movq mm7, mm0
- pand mm2, mm5
- psllw mm7, 1
- movq mm5, mm3
- /*mm3==R_7 R_6 R_5 R_4*/
- /*mm5==R_7 R_6 R_5 R_4*/
- /*mm6==-2L -2L -2L -2L*/
- /*mm7==2L 2L 2L 2L*/
- pcmpgtw mm7, mm3
- pcmpgtw mm5, mm6
- pand mm3, mm7
- movq mm7, mm0
- pand mm3, mm5
- /*if(R_i<-L)R_i'=R_i+2L;
- if(R_i>L)R_i'=R_i-2L;
- if(R_i<-L||R_i>L)R_i=-R_i':*/
- psraw mm6, 1
- movq mm5, mm2
- psllw mm7, 1
- /*mm2==R_3 R_2 R_1 R_0*/
- /*mm5==R_3 R_2 R_1 R_0*/
- /*mm6==-L -L -L -L*/
- /*mm0==L L L L*/
- /*mm5=R_i>L?FF:00*/
- pcmpgtw mm5, mm0
- /*mm6=-L>R_i?FF:00*/
- pcmpgtw mm6, mm2
- /*mm7=R_i>L?2L:0*/
- pand mm7, mm5
- /*mm2=R_i>L?R_i-2L:R_i*/
- psubw mm2, mm7
- movq mm7, mm0
- /*mm5=-L>R_i||R_i>L*/
- por mm5, mm6
- psllw mm7, 1
- /*mm7=-L>R_i?2L:0*/
- pand mm7, mm6
- pxor mm6, mm6
- /*mm2=-L>R_i?R_i+2L:R_i*/
- paddw mm2, mm7
- psubw mm6, mm0
- /*mm5=-L>R_i||R_i>L?-R_i':0*/
- pand mm5, mm2
- movq mm7, mm0
- /*mm2=-L>R_i||R_i>L?0:R_i*/
- psubw mm2, mm5
- psllw mm7, 1
- /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
- psubw mm2, mm5
- movq mm5, mm3
- /*mm3==R_7 R_6 R_5 R_4*/
- /*mm5==R_7 R_6 R_5 R_4*/
- /*mm6==-L -L -L -L*/
- /*mm0==L L L L*/
- /*mm6=-L>R_i?FF:00*/
- pcmpgtw mm6, mm3
- /*mm5=R_i>L?FF:00*/
- pcmpgtw mm5, mm0
- /*mm7=R_i>L?2L:0*/
- pand mm7, mm5
- /*mm2=R_i>L?R_i-2L:R_i*/
- psubw mm3, mm7
- psllw mm0, 1
- /*mm5=-L>R_i||R_i>L*/
- por mm5, mm6
- /*mm0=-L>R_i?2L:0*/
- pand mm0, mm6
- /*mm3=-L>R_i?R_i+2L:R_i*/
- paddw mm3, mm0
- /*mm5=-L>R_i||R_i>L?-R_i':0*/
- pand mm5, mm3
- /*mm2=-L>R_i||R_i>L?0:R_i*/
- psubw mm3, mm5
- /*mm3=-L>R_i||R_i>L?-R_i':R_i*/
- psubw mm3, mm5
- /*Unfortunately, there's no unsigned byte+signed byte with unsigned
- saturation op code, so we have to promote things back 16 bits.*/
- pxor mm0, mm0
- movq mm5, mm4
- punpcklbw mm4, mm0
- punpckhbw mm5, mm0
- movq mm6, mm1
- punpcklbw mm1, mm0
- punpckhbw mm6, mm0
- /*_pix[0...8+_ystride]+=R_i*/
- paddw mm4, mm2
- paddw mm5, mm3
- /*_pix[0...8+_ystride*2]-=R_i*/
- psubw mm1, mm2
- psubw mm6, mm3
- packuswb mm4, mm5
- packuswb mm1, mm6
- /*Write it back out.*/
- movq [eax + edx], mm4
- movq [eax + edx*2], mm1
- }
-}
-
-/*This code implements the bulk of loop_filter_h().
- Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all
- four p0's to one register we must transpose the values in four mmx regs.
- When half is done we repeat this for the rest.*/
-static void loop_filter_h4(unsigned char *_pix,long _ystride,
- const ogg_int16_t *_ll){
- /* todo: merge the comments from the GCC sources */
- _asm {
- mov ecx, [_pix]
- mov edx, [_ystride]
- mov eax, [_ll]
- /*esi=_ystride*3*/
- lea esi, [edx + edx*2]
-
- movd mm0, dword ptr [ecx]
- movd mm1, dword ptr [ecx + edx]
- movd mm2, dword ptr [ecx + edx*2]
- movd mm3, dword ptr [ecx + esi]
- punpcklbw mm0, mm1
- punpcklbw mm2, mm3
- movq mm1, mm0
- punpckhwd mm0, mm2
- punpcklwd mm1, mm2
- pxor mm7, mm7
- movq mm5, mm1
- punpcklbw mm1, mm7
- punpckhbw mm5, mm7
- movq mm3, mm0
- punpcklbw mm0, mm7
- punpckhbw mm3, mm7
- psubw mm1, mm3
- movq mm4, mm0
- pcmpeqw mm2, mm2
- psubw mm0, mm5
- psrlw mm2, 14
- pmullw mm0, mm2
- psrlw mm2, 1
- paddw mm0, mm1
- psllw mm2, 2
- paddw mm0, mm2
- psraw mm0, 3
- movq mm6, qword ptr [eax]
- movq mm1, mm0
- pxor mm2, mm2
- movq mm3, mm6
- psubw mm2, mm6
- psllw mm3, 1
- psllw mm2, 1
- pcmpgtw mm3, mm0
- pcmpgtw mm1, mm2
- pand mm0, mm3
- pand mm0, mm1
- psraw mm2, 1
- movq mm1, mm0
- movq mm3, mm6
- pcmpgtw mm2, mm0
- pcmpgtw mm1, mm6
- psllw mm3, 1
- psllw mm6, 1
- pand mm3, mm1
- pand mm6, mm2
- psubw mm0, mm3
- por mm1, mm2
- paddw mm0, mm6
- pand mm1, mm0
- psubw mm0, mm1
- psubw mm0, mm1
- paddw mm5, mm0
- psubw mm4, mm0
- packuswb mm5, mm7
- packuswb mm4, mm7
- punpcklbw mm5, mm4
- movd edi, mm5
- mov word ptr [ecx + 01H], di
- psrlq mm5, 32
- shr edi, 16
- mov word ptr [ecx + edx + 01H], di
- movd edi, mm5
- mov word ptr [ecx + edx*2 + 01H], di
- shr edi, 16
- mov word ptr [ecx + esi + 01H], di
- }
-}
-
-static void loop_filter_h(unsigned char *_pix,int _ystride,
- const ogg_int16_t *_ll){
- _pix-=2;
- loop_filter_h4(_pix,_ystride,_ll);
- loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll);
-}
-
-
-/*We copy the whole function because the MMX routines will be inlined 4 times,
- and we can do just a single emms call at the end this way.
- We also do not use the _bv lookup table, instead computing the values that
- would lie in it on the fly.*/
-
-/*Apply the loop filter to a given set of fragment rows in the given plane.
- The filter may be run on the bottom edge, affecting pixels in the next row of
- fragments, so this row also needs to be available.
- _bv: The bounding values array.
- _refi: The index of the frame buffer to filter.
- _pli: The color plane to filter.
- _fragy0: The Y coordinate of the first fragment row to filter.
- _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
-void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv,
- int _refi,int _pli,int _fragy0,int _fragy_end){
- ogg_int16_t __declspec(align(8)) ll[4];
- th_img_plane *iplane;
- oc_fragment_plane *fplane;
- oc_fragment *frag_top;
- oc_fragment *frag0;
- oc_fragment *frag;
- oc_fragment *frag_end;
- oc_fragment *frag0_end;
- oc_fragment *frag_bot;
- ll[0]=ll[1]=ll[2]=ll[3]=
- (ogg_int16_t)_state->loop_filter_limits[_state->qis[0]];
- iplane=_state->ref_frame_bufs[_refi]+_pli;
- fplane=_state->fplanes+_pli;
- /*The following loops are constructed somewhat non-intuitively on purpose.
- The main idea is: if a block boundary has at least one coded fragment on
- it, the filter is applied to it.
- However, the order that the filters are applied in matters, and VP3 chose
- the somewhat strange ordering used below.*/
- frag_top=_state->frags+fplane->froffset;
- frag0=frag_top+_fragy0*fplane->nhfrags;
- frag0_end=frag0+(_fragy_end-_fragy0)*fplane->nhfrags;
- frag_bot=_state->frags+fplane->froffset+fplane->nfrags;
- while(frag0<frag0_end){
- frag=frag0;
- frag_end=frag+fplane->nhfrags;
- while(frag<frag_end){
- if(frag->coded){
- if(frag>frag0){
- loop_filter_h(frag->buffer[_refi],iplane->ystride,ll);
- }
- if(frag0>frag_top){
- loop_filter_v(frag->buffer[_refi],iplane->ystride,ll);
- }
- if(frag+1<frag_end&&!(frag+1)->coded){
- loop_filter_h(frag->buffer[_refi]+8,iplane->ystride,ll);
- }
- if(frag+fplane->nhfrags<frag_bot&&!(frag+fplane->nhfrags)->coded){
- loop_filter_v((frag+fplane->nhfrags)->buffer[_refi],
- iplane->ystride,ll);
- }
- }
- frag++;
- }
- frag0+=fplane->nhfrags;
- }
-
- /*This needs to be removed when decode specific functions are implemented:*/
- _mm_empty();
-}
-
-#endif
-
+#endif
\ No newline at end of file
Modified: trunk/theora/lib/dec/x86_vc/mmxstate.c
===================================================================
--- trunk/theora/lib/dec/x86_vc/mmxstate.c 2008-04-13 06:23:04 UTC (rev 14721)
+++ trunk/theora/lib/dec/x86_vc/mmxstate.c 2008-04-13 06:28:00 UTC (rev 14722)
@@ -138,7 +138,7 @@
}
/*Fill in the target buffer.*/
dst_framei=_state->ref_frame_idx[OC_FRAME_SELF];
- dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].ystride;
+ dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
/*For now ystride values in all ref frames assumed to be equal.*/
if(_frag->mbmode==OC_MODE_INTRA){
oc_frag_recon_intra_mmx(_frag->buffer[dst_framei],dst_ystride,res_buf);
@@ -149,7 +149,7 @@
int mvoffset0;
int mvoffset1;
ref_framei=_state->ref_frame_idx[OC_FRAME_FOR_MODE[_frag->mbmode]];
- ref_ystride=_state->ref_frame_bufs[ref_framei][_pli].ystride;
+ ref_ystride=_state->ref_frame_bufs[ref_framei][_pli].stride;
if(oc_state_get_mv_offsets(_state,&mvoffset0,&mvoffset1,_frag->mv[0],
_frag->mv[1],ref_ystride,_pli)>1){
oc_frag_recon_inter2_mmx(_frag->buffer[dst_framei],dst_ystride,
@@ -176,8 +176,8 @@
int src_ystride;
dst_framei=_state->ref_frame_idx[_dst_frame];
src_framei=_state->ref_frame_idx[_src_frame];
- dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].ystride;
- src_ystride=_state->ref_frame_bufs[src_framei][_pli].ystride;
+ dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
+ src_ystride=_state->ref_frame_bufs[src_framei][_pli].stride;
fragi_end=_fragis+_nfragis;
for(fragi=_fragis;fragi<fragi_end;fragi++){
oc_fragment *frag = _state->frags+*fragi;
@@ -187,196 +187,4 @@
_m_empty();
}
-#endif
-
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id:
-
- ********************************************************************/
-
-/* ------------------------------------------------------------------------
- MMX acceleration of complete fragment reconstruction algorithm.
- Originally written by Rudolf Marek.
-
- Conversion to MSC intrinsics by Nils Pipenbrinck.
- ---------------------------------------------------------------------*/
-#if defined(USE_ASM)
-
-#include "../../internal.h"
-#include "../idct.h"
-#include "x86int.h"
-#include <mmintrin.h>
-
-static const unsigned char OC_FZIG_ZAGMMX[64]=
-{
- 0, 8, 1, 2, 9,16,24,17,
- 10, 3,32,11,18,25, 4,12,
- 5,26,19,40,33,34,41,48,
- 27, 6,13,20,28,21,14, 7,
- 56,49,42,35,43,50,57,36,
- 15,22,29,30,23,44,37,58,
- 51,59,38,45,52,31,60,53,
- 46,39,47,54,61,62,55,63
-};
-
-/* Fill a block with value */
-static __inline void loc_fill_mmx_value (__m64 * _dst, __m64 _value){
- __m64 t = _value;
- _dst[0] = t; _dst[1] = t; _dst[2] = t; _dst[3] = t;
- _dst[4] = t; _dst[5] = t; _dst[6] = t; _dst[7] = t;
- _dst[8] = t; _dst[9] = t; _dst[10] = t; _dst[11] = t;
- _dst[12] = t; _dst[13] = t; _dst[14] = t; _dst[15] = t;
-}
-
-/* copy a block of 8 byte elements using different strides */
-static __inline void loc_blockcopy_mmx (unsigned char * _dst, int _dst_ystride,
- unsigned char * _src, int _src_ystride){
- __m64 a,b,c,d,e,f,g,h;
- a = *(__m64*)(_src + 0 * _src_ystride);
- b = *(__m64*)(_src + 1 * _src_ystride);
- c = *(__m64*)(_src + 2 * _src_ystride);
- d = *(__m64*)(_src + 3 * _src_ystride);
- e = *(__m64*)(_src + 4 * _src_ystride);
- f = *(__m64*)(_src + 5 * _src_ystride);
- g = *(__m64*)(_src + 6 * _src_ystride);
- h = *(__m64*)(_src + 7 * _src_ystride);
- *(__m64*)(_dst + 0 * _dst_ystride) = a;
- *(__m64*)(_dst + 1 * _dst_ystride) = b;
- *(__m64*)(_dst + 2 * _dst_ystride) = c;
- *(__m64*)(_dst + 3 * _dst_ystride) = d;
- *(__m64*)(_dst + 4 * _dst_ystride) = e;
- *(__m64*)(_dst + 5 * _dst_ystride) = f;
- *(__m64*)(_dst + 6 * _dst_ystride) = g;
- *(__m64*)(_dst + 7 * _dst_ystride) = h;
-}
-
-void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
- ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){
- ogg_int16_t __declspec(align(16)) res_buf[64];
- int dst_framei;
- int dst_ystride;
- int zzi;
- /*_last_zzi is subtly different from an actual count of the number of
- coefficients we decoded for this block.
- It contains the value of zzi BEFORE the final token in the block was
- decoded.
- In most cases this is an EOB token (the continuation of an EOB run from a
- previous block counts), and so this is the same as the coefficient count.
- However, in the case that the last token was NOT an EOB token, but filled
- the block up with exactly 64 coefficients, _last_zzi will be less than 64.
- Provided the last token was not a pure zero run, the minimum value it can
- be is 46, and so that doesn't affect any of the cases in this routine.
- However, if the last token WAS a pure zero run of length 63, then _last_zzi
- will be 1 while the number of coefficients decoded is 64.
- Thus, we will trigger the following special case, where the real
- coefficient count would not.
- Note also that a zero run of length 64 will give _last_zzi a value of 0,
- but we still process the DC coefficient, which might have a non-zero value
- due to DC prediction.
- Although convoluted, this is arguably the correct behavior: it allows us to
- dequantize fewer coefficients and use a smaller transform when the block
- ends with a long zero run instead of a normal EOB token.
- It could be smarter... multiple separate zero runs at the end of a block
- will fool it, but an encoder that generates these really deserves what it
- gets.
- Needless to say we inherited this approach from VP3.*/
- /*Special case only having a DC component.*/
- if(_last_zzi<2){
- __m64 p;
- /*Why is the iquant product rounded in this case and no others? Who knows.*/
- p = _m_from_int((ogg_int32_t)_frag->dc*_dc_iquant+15>>5);
- /* broadcast 16 bits into all 4 mmx subregisters */
- p = _m_punpcklwd (p,p);
- p = _m_punpckldq (p,p);
- loc_fill_mmx_value ((__m64 *)res_buf, p);
- }
- else{
- /*Then, fill in the remainder of the coefficients with 0's, and perform
- the iDCT.*/
- /*First zero the buffer.*/
- /*On K7, etc., this could be replaced with movntq and sfence.*/
- loc_fill_mmx_value ((__m64 *)res_buf, _mm_setzero_si64());
-
- res_buf[0]=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant);
- /*This is planned to be rewritten in MMX.*/
- for(zzi=1;zzi<_ncoefs;zzi++)
- {
- int ci;
- ci=OC_FZIG_ZAG[zzi];
- res_buf[OC_FZIG_ZAGMMX[zzi]]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]*
- _ac_iquant[ci]);
- }
-
- if(_last_zzi<10){
- oc_idct8x8_10_mmx(res_buf);
- }
- else {
- oc_idct8x8_mmx(res_buf);
- }
- }
- /*Fill in the target buffer.*/
- dst_framei=_state->ref_frame_idx[OC_FRAME_SELF];
- dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].ystride;
- /*For now ystride values in all ref frames assumed to be equal.*/
- if(_frag->mbmode==OC_MODE_INTRA){
- oc_frag_recon_intra_mmx(_frag->buffer[dst_framei],dst_ystride,res_buf);
- }
- else{
- int ref_framei;
- int ref_ystride;
- int mvoffset0;
- int mvoffset1;
- ref_framei=_state->ref_frame_idx[OC_FRAME_FOR_MODE[_frag->mbmode]];
- ref_ystride=_state->ref_frame_bufs[ref_framei][_pli].ystride;
- if(oc_state_get_mv_offsets(_state,&mvoffset0,&mvoffset1,_frag->mv[0],
- _frag->mv[1],ref_ystride,_pli)>1){
- oc_frag_recon_inter2_mmx(_frag->buffer[dst_framei],dst_ystride,
- _frag->buffer[ref_framei]+mvoffset0,ref_ystride,
- _frag->buffer[ref_framei]+mvoffset1,ref_ystride,res_buf);
- }
- else{
- oc_frag_recon_inter_mmx(_frag->buffer[dst_framei],dst_ystride,
- _frag->buffer[ref_framei]+mvoffset0,ref_ystride,res_buf);
- }
- }
-
- _mm_empty();
-}
-
-
-void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
- int _nfragis,int _dst_frame,int _src_frame,int _pli){
- const int *fragi;
- const int *fragi_end;
- int dst_framei;
- int dst_ystride;
- int src_framei;
- int src_ystride;
- dst_framei=_state->ref_frame_idx[_dst_frame];
- src_framei=_state->ref_frame_idx[_src_frame];
- dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].ystride;
- src_ystride=_state->ref_frame_bufs[src_framei][_pli].ystride;
- fragi_end=_fragis+_nfragis;
- for(fragi=_fragis;fragi<fragi_end;fragi++){
- oc_fragment *frag = _state->frags+*fragi;
- loc_blockcopy_mmx (frag->buffer[dst_framei], dst_ystride,
- frag->buffer[src_framei], src_ystride);
- }
- _m_empty();
-}
-
-#endif
-
+#endif
\ No newline at end of file
Modified: trunk/theora/lib/dec/x86_vc/x86int.h
===================================================================
--- trunk/theora/lib/dec/x86_vc/x86int.h 2008-04-13 06:23:04 UTC (rev 14721)
+++ trunk/theora/lib/dec/x86_vc/x86int.h 2008-04-13 06:28:00 UTC (rev 14722)
@@ -47,52 +47,3 @@
int _refi,int _pli,int _fragy0,int _fragy_end);
#endif
-/********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
-
- function:
- last mod: $Id$
-
- ********************************************************************/
-
-#if !defined(_x86_x86int_vc_H)
-# define _x86_x86int_vc_H (1)
-# include "../../internal.h"
-
-void oc_state_vtable_init_x86(oc_theora_state *_state);
-
-void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
- const ogg_int16_t *_residue);
-
-void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue);
-
-void oc_frag_recon_inter2_mmx(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2,
- int _src2_ystride,const ogg_int16_t *_residue);
-
-void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
- int _nfragis,int _dst_frame,int _src_frame,int _pli);
-
-void oc_restore_fpu_mmx(void);
-
-void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
- ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
-
-void oc_idct8x8_mmx(ogg_int16_t _y[64]);
-void oc_idct8x8_10_mmx(ogg_int16_t _y[64]);
-
-void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv,
- int _refi,int _pli,int _fragy0,int _fragy_end);
-
-#endif
More information about the commits
mailing list