[xiph-commits] r16221 - in branches/theora-thusnelda/lib/enc: x86 x86_vc
tterribe at svn.xiph.org
tterribe at svn.xiph.org
Tue Jul 7 19:17:57 PDT 2009
Author: tterribe
Date: 2009-07-07 19:17:57 -0700 (Tue, 07 Jul 2009)
New Revision: 16221
Modified:
branches/theora-thusnelda/lib/enc/x86/mmxfdct.c
branches/theora-thusnelda/lib/enc/x86/sse2fdct.c
branches/theora-thusnelda/lib/enc/x86_vc/mmxfdct.c
Log:
Better fix for r16212 (now also correctly handles overflow in the other
direction, which was much more unlikely, but stil possible).
Modified: branches/theora-thusnelda/lib/enc/x86/mmxfdct.c
===================================================================
--- branches/theora-thusnelda/lib/enc/x86/mmxfdct.c 2009-07-08 02:06:38 UTC (rev 16220)
+++ branches/theora-thusnelda/lib/enc/x86/mmxfdct.c 2009-07-08 02:17:57 UTC (rev 16221)
@@ -178,13 +178,16 @@
"movq "_r5"(%[y]),%%mm1\n\t" \
"paddw %%mm2,%%mm4\n\t" \
/*mm2=t6'', mm0=_y[0]=u=r+s>>1 \
- The naive implementation could cause overflow, so we use u=s+(r-s>>1).*/ \
- "mov $0x7FFF54DC,%[a]\n\t" \
- "psubw %%mm4,%%mm0\n\t" \
+ The naive implementation could cause overflow, so we use \
+ u=(r&s)+((r^s)>>1).*/ \
"movq "_r3"(%[y]),%%mm2\n\t" \
+ "movq %%mm0,%%mm7\n\t" \
+ "pxor %%mm4,%%mm0\n\t" \
+ "pand %%mm4,%%mm7\n\t" \
"psraw $1,%%mm0\n\t" \
+ "mov $0x7FFF54DC,%[a]\n\t" \
+ "paddw %%mm7,%%mm0\n\t" \
"movd %[a],%%mm7\n\t" \
- "paddw %%mm4,%%mm0\n\t" \
/*mm7={54491-0x7FFF,0x7FFF}x2 \
mm4=_y[4]=v=r-u*/ \
"psubw %%mm0,%%mm4\n\t" \
Modified: branches/theora-thusnelda/lib/enc/x86/sse2fdct.c
===================================================================
--- branches/theora-thusnelda/lib/enc/x86/sse2fdct.c 2009-07-08 02:06:38 UTC (rev 16220)
+++ branches/theora-thusnelda/lib/enc/x86/sse2fdct.c 2009-07-08 02:17:57 UTC (rev 16221)
@@ -153,11 +153,14 @@
"mov $0x7FFF6C84,%[a]\n\t" \
"paddw %%xmm1,%%xmm4\n\t" \
/*xmm0=_y[0]=u=r+s>>1 \
- The naive implementation could cause overflow, so we use u=s+(r-s>>1).*/ \
- "psubw %%xmm4,%%xmm0\n\t" \
+ The naive implementation could cause overflow, so we use \
+ u=(r&s)+((r^s)>>1).*/ \
+ "movdqa %%xmm0,%%xmm6\n\t" \
+ "pxor %%xmm4,%%xmm0\n\t" \
+ "pand %%xmm4,%%xmm6\n\t" \
+ "psraw $1,%%xmm0\n\t" \
"movd %[a],%%xmm13\n\t" \
- "psraw $1,%%xmm0\n\t" \
- "paddw %%xmm4,%%xmm0\n\t" \
+ "paddw %%xmm6,%%xmm0\n\t" \
/*xmm4=_y[4]=v=r-u*/ \
"pshufd $00,%%xmm13,%%xmm13\n\t" \
"psubw %%xmm0,%%xmm4\n\t" \
Modified: branches/theora-thusnelda/lib/enc/x86_vc/mmxfdct.c
===================================================================
--- branches/theora-thusnelda/lib/enc/x86_vc/mmxfdct.c 2009-07-08 02:06:38 UTC (rev 16220)
+++ branches/theora-thusnelda/lib/enc/x86_vc/mmxfdct.c 2009-07-08 02:17:57 UTC (rev 16221)
@@ -177,13 +177,16 @@
__asm movq mm1,[Y+_r5] \
__asm paddw mm4,mm2 \
/*mm2=t6'', mm0=_y[0]=u=r+s>>1 \
- The naive implementation could cause overflow, so we use u=s+(r-s>>1).*/ \
- __asm mov A,0x7FFF54DC \
- __asm psubw mm0,mm4 \
+ The naive implementation could cause overflow, so we use \
+ u=(r&s)+((r^s)>>1).*/ \
__asm movq mm2,[Y+_r3] \
+ __asm movq mm7,mm0 \
+ __asm pxor mm0,mm4 \
+ __asm pand mm7,mm4 \
__asm psraw mm0,1 \
+ __asm mov A,0x7FFF54DC \
+ __asm paddw mm0,mm7 \
__asm movd mm7,A \
- __asm paddw mm0,mm4 \
/*mm7={54491-0x7FFF,0x7FFF}x2 \
mm4=_y[4]=v=r-u*/ \
__asm psubw mm4,mm0 \
More information about the commits
mailing list