[xiph-commits] r17446 - in trunk/theora/lib: . x86 x86_vc
tterribe at svn.xiph.org
tterribe at svn.xiph.org
Thu Sep 23 13:06:20 PDT 2010
Author: tterribe
Date: 2010-09-23 13:06:20 -0700 (Thu, 23 Sep 2010)
New Revision: 17446
Modified:
trunk/theora/lib/encint.h
trunk/theora/lib/x86/mmxidct.c
trunk/theora/lib/x86_vc/mmxfrag.c
trunk/theora/lib/x86_vc/mmxidct.c
Log:
Win32 asm fixes.
Thanks to Benjamin Jemlich and Cristian Adam for help diagnosing and testing.
Modified: trunk/theora/lib/encint.h
===================================================================
--- trunk/theora/lib/encint.h 2010-09-23 15:56:45 UTC (rev 17445)
+++ trunk/theora/lib/encint.h 2010-09-23 20:06:20 UTC (rev 17446)
@@ -45,7 +45,11 @@
/*Encoder-specific accelerated functions.*/
# if defined(OC_X86_ASM)
-# include "x86/x86enc.h"
+# if defined(_MSC_VER)
+# include "x86_vc/x86enc.h"
+# else
+# include "x86/x86enc.h"
+# endif
# endif
# if !defined(oc_enc_accel_init)
Modified: trunk/theora/lib/x86/mmxidct.c
===================================================================
--- trunk/theora/lib/x86/mmxidct.c 2010-09-23 15:56:45 UTC (rev 17445)
+++ trunk/theora/lib/x86/mmxidct.c 2010-09-23 20:06:20 UTC (rev 17446)
@@ -287,24 +287,24 @@
/*This routine accepts an 8x8 matrix, but in partially transposed form.
Every 4x4 block is transposed.*/
__asm__ __volatile__(
-#define OC_I(_k,_y) OC_MEM_OFFS((_k*16),_y)
-#define OC_J(_k,_y) OC_MEM_OFFS(((_k-4)*16)+8,_y)
+#define OC_I(_k,_y) OC_MEM_OFFS((_k)*16,_y)
+#define OC_J(_k,_y) OC_MEM_OFFS(((_k)-4)*16+8,_y)
OC_ROW_IDCT(y,x)
OC_TRANSPOSE(y)
#undef OC_I
#undef OC_J
-#define OC_I(_k,_y) OC_MEM_OFFS((_k*16)+64,_y)
-#define OC_J(_k,_y) OC_MEM_OFFS(((_k-4)*16)+72,_y)
+#define OC_I(_k,_y) OC_MEM_OFFS((_k)*16+64,_y)
+#define OC_J(_k,_y) OC_MEM_OFFS(((_k)-4)*16+72,_y)
OC_ROW_IDCT(y,x)
OC_TRANSPOSE(y)
#undef OC_I
#undef OC_J
-#define OC_I(_k,_y) OC_MEM_OFFS((_k*16),_y)
+#define OC_I(_k,_y) OC_MEM_OFFS((_k)*16,_y)
#define OC_J(_k,_y) OC_I(_k,_y)
OC_COLUMN_IDCT(y)
#undef OC_I
#undef OC_J
-#define OC_I(_k,_y) OC_MEM_OFFS((_k*16)+8,_y)
+#define OC_I(_k,_y) OC_MEM_OFFS((_k)*16+8,_y)
#define OC_J(_k,_y) OC_I(_k,_y)
OC_COLUMN_IDCT(y)
#undef OC_I
@@ -492,20 +492,20 @@
static void oc_idct8x8_10_mmx(ogg_int16_t _y[64],ogg_int16_t _x[64]){
__asm__ __volatile__(
-#define OC_I(_k,_y) OC_MEM_OFFS((_k*16),_y)
-#define OC_J(_k,_y) OC_MEM_OFFS(((_k-4)*16)+8,_y)
+#define OC_I(_k,_y) OC_MEM_OFFS((_k)*16,_y)
+#define OC_J(_k,_y) OC_MEM_OFFS(((_k)-4)*16+8,_y)
/*Done with dequant, descramble, and partial transpose.
Now do the iDCT itself.*/
OC_ROW_IDCT_10(y,x)
OC_TRANSPOSE(y)
#undef OC_I
#undef OC_J
-#define OC_I(_k,_y) OC_MEM_OFFS((_k*16),_y)
+#define OC_I(_k,_y) OC_MEM_OFFS((_k)*16,_y)
#define OC_J(_k,_y) OC_I(_k,_y)
OC_COLUMN_IDCT_10(y)
#undef OC_I
#undef OC_J
-#define OC_I(_k,_y) OC_MEM_OFFS((_k*16)+8,_y)
+#define OC_I(_k,_y) OC_MEM_OFFS((_k)*16+8,_y)
#define OC_J(_k,_y) OC_I(_k,_y)
OC_COLUMN_IDCT_10(y)
#undef OC_I
Modified: trunk/theora/lib/x86_vc/mmxfrag.c
===================================================================
--- trunk/theora/lib/x86_vc/mmxfrag.c 2010-09-23 15:56:45 UTC (rev 17445)
+++ trunk/theora/lib/x86_vc/mmxfrag.c 2010-09-23 20:06:20 UTC (rev 17446)
@@ -102,7 +102,7 @@
_frag_buf_offs: The offsets of fragments in the reference frames.*/
void oc_frag_copy_list_mmx(unsigned char *_dst_frame,
const unsigned char *_src_frame,int _ystride,
- const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t _frag_buf_offs){
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs){
ptrdiff_t fragii;
for(fragii=0;fragii<_nfragis;fragii++){
ptrdiff_t frag_buf_off;
Modified: trunk/theora/lib/x86_vc/mmxidct.c
===================================================================
--- trunk/theora/lib/x86_vc/mmxidct.c 2010-09-23 15:56:45 UTC (rev 17445)
+++ trunk/theora/lib/x86_vc/mmxidct.c 2010-09-23 20:06:20 UTC (rev 17446)
@@ -31,9 +31,8 @@
/*A table of constants used by the MMX routines.*/
-static const __declspec(align(16))ogg_uint16_t
- OC_IDCT_CONSTS[(1+7)*4]={
- 8, 8, 8, 8
+static const OC_ALIGN16(ogg_uint16_t) OC_IDCT_CONSTS[(1+7)*4]={
+ 8, 8, 8, 8,
(ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
(ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
(ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
@@ -47,7 +46,7 @@
(ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
(ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
(ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
- (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
+ (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1
};
/*38 cycles*/
@@ -314,26 +313,26 @@
mov CONSTS,offset OC_IDCT_CONSTS
mov Y,_y
mov X,_x
-#define OC_I(_k,_y) [(_y)+_k*16]
-#define OC_J(_k,_y) [(_y)+(_k-4)*16+8]
- OC_ROW_IDCT(_y,_x)
- OC_TRANSPOSE(_y)
+#define OC_I(_k,_y) [(_y)+(_k)*16]
+#define OC_J(_k,_y) [(_y)+((_k)-4)*16+8]
+ OC_ROW_IDCT(Y,X)
+ OC_TRANSPOSE(Y)
#undef OC_I
#undef OC_J
-#define OC_I(_k,_y) [(_y)+(_k*16)+64]
-#define OC_J(_k,_y) [(_y)+(_k-4)*16+72]
- OC_ROW_IDCT(_y,_x)
- OC_TRANSPOSE(_y)
+#define OC_I(_k,_y) [(_y)+(_k)*16+64]
+#define OC_J(_k,_y) [(_y)+((_k)-4)*16+72]
+ OC_ROW_IDCT(Y,X)
+ OC_TRANSPOSE(Y)
#undef OC_I
#undef OC_J
-#define OC_I(_k,_y) [(_y)+_k*16]
+#define OC_I(_k,_y) [(_y)+(_k)*16]
#define OC_J(_k,_y) OC_I(_k,_y)
- OC_COLUMN_IDCT(_y)
+ OC_COLUMN_IDCT(Y)
#undef OC_I
#undef OC_J
-#define OC_I(_k,_y) [(_y)+_k*16+8]
+#define OC_I(_k,_y) [(_y)+(_k)*16+8]
#define OC_J(_k,_y) OC_I(_k,_y)
- OC_COLUMN_IDCT(_y)
+ OC_COLUMN_IDCT(Y)
#undef OC_I
#undef OC_J
#undef CONSTS
@@ -344,9 +343,11 @@
int i;
__asm pxor mm0,mm0;
for(i=0;i<4;i++){
+ ogg_int16_t *x;
+ x=_x+16*i;
#define X ecx
__asm{
- mov X,(_x+16*i)
+ mov X,x
movq [X+0x00],mm0
movq [X+0x08],mm0
movq [X+0x10],mm0
@@ -524,22 +525,22 @@
mov CONSTS,offset OC_IDCT_CONSTS
mov Y,_y
mov X,_x
-#define OC_I(_k,_y) [(_y)+_k*16]
-#define OC_J(_k,_y) [(_y)+(_k-4)*16+8]
+#define OC_I(_k,_y) [(_y)+(_k)*16]
+#define OC_J(_k,_y) [(_y)+((_k)-4)*16+8]
/*Done with dequant, descramble, and partial transpose.
Now do the iDCT itself.*/
- OC_ROW_IDCT_10(_y,_x)
- OC_TRANSPOSE(_y)
+ OC_ROW_IDCT_10(Y,X)
+ OC_TRANSPOSE(Y)
#undef OC_I
#undef OC_J
-#define OC_I(_k,_y) [(_y)+_k*16]
+#define OC_I(_k,_y) [(_y)+(_k)*16]
#define OC_J(_k,_y) OC_I(_k,_y)
- OC_COLUMN_IDCT_10(_y)
+ OC_COLUMN_IDCT_10(Y)
#undef OC_I
#undef OC_J
-#define OC_I(_k,_y) [(_y)+_k*16+8]
+#define OC_I(_k,_y) [(_y)+(_k)*16+8]
#define OC_J(_k,_y) OC_I(_k,_y)
- OC_COLUMN_IDCT_10(_y)
+ OC_COLUMN_IDCT_10(Y)
#undef OC_I
#undef OC_J
#undef CONSTS
@@ -549,8 +550,8 @@
if(_x!=_y){
#define X ecx
__asm{
- mm0,mm0;
- mov X,(_x+16*i)
+ pxor mm0,mm0;
+ mov X,_x
movq [X+0x00],mm0
movq [X+0x10],mm0
movq [X+0x20],mm0
More information about the commits
mailing list