[xiph-commits] r16212 - in branches/theora-thusnelda/lib/enc: x86 x86_vc
tterribe at svn.xiph.org
tterribe at svn.xiph.org
Tue Jul 7 04:55:26 PDT 2009
Author: tterribe
Date: 2009-07-07 04:55:25 -0700 (Tue, 07 Jul 2009)
New Revision: 16212
Modified:
branches/theora-thusnelda/lib/enc/x86/mmxfdct.c
branches/theora-thusnelda/lib/enc/x86/sse2fdct.c
branches/theora-thusnelda/lib/enc/x86_vc/mmxfdct.c
Log:
Fix a potential overflow in the fDCT.
Modified: branches/theora-thusnelda/lib/enc/x86/mmxfdct.c
===================================================================
--- branches/theora-thusnelda/lib/enc/x86/mmxfdct.c 2009-07-07 01:43:19 UTC (rev 16211)
+++ branches/theora-thusnelda/lib/enc/x86/mmxfdct.c 2009-07-07 11:55:25 UTC (rev 16212)
@@ -177,12 +177,14 @@
"packssdw %%mm5,%%mm4\n\t" \
"movq "_r5"(%[y]),%%mm1\n\t" \
"paddw %%mm2,%%mm4\n\t" \
- /*mm2=t6'', mm0=_y[0]=u=r+s>>1*/ \
+ /*mm2=t6'', mm0=_y[0]=u=r+s>>1 \
+ The naive implementation could cause overflow, so we use u=s+(r-s>>1).*/ \
"mov $0x7FFF54DC,%[a]\n\t" \
- "paddw %%mm4,%%mm0\n\t" \
+ "psubw %%mm4,%%mm0\n\t" \
"movq "_r3"(%[y]),%%mm2\n\t" \
+ "psraw $1,%%mm0\n\t" \
"movd %[a],%%mm7\n\t" \
- "psraw $1,%%mm0\n\t" \
+ "paddw %%mm4,%%mm0\n\t" \
/*mm7={54491-0x7FFF,0x7FFF}x2 \
mm4=_y[4]=v=r-u*/ \
"psubw %%mm0,%%mm4\n\t" \
Modified: branches/theora-thusnelda/lib/enc/x86/sse2fdct.c
===================================================================
--- branches/theora-thusnelda/lib/enc/x86/sse2fdct.c 2009-07-07 01:43:19 UTC (rev 16211)
+++ branches/theora-thusnelda/lib/enc/x86/sse2fdct.c 2009-07-07 11:55:25 UTC (rev 16212)
@@ -152,10 +152,12 @@
"psubw %%xmm14,%%xmm1\n\t" \
"mov $0x7FFF6C84,%[a]\n\t" \
"paddw %%xmm1,%%xmm4\n\t" \
- /*xmm0=_y[0]=u=r+s>>1*/ \
- "paddw %%xmm4,%%xmm0\n\t" \
+ /*xmm0=_y[0]=u=r+s>>1 \
+ The naive implementation could cause overflow, so we use u=s+(r-s>>1).*/ \
+ "psubw %%xmm4,%%xmm0\n\t" \
"movd %[a],%%xmm13\n\t" \
"psraw $1,%%xmm0\n\t" \
+ "paddw %%xmm4,%%xmm0\n\t" \
/*xmm4=_y[4]=v=r-u*/ \
"pshufd $00,%%xmm13,%%xmm13\n\t" \
"psubw %%xmm0,%%xmm4\n\t" \
Modified: branches/theora-thusnelda/lib/enc/x86_vc/mmxfdct.c
===================================================================
--- branches/theora-thusnelda/lib/enc/x86_vc/mmxfdct.c 2009-07-07 01:43:19 UTC (rev 16211)
+++ branches/theora-thusnelda/lib/enc/x86_vc/mmxfdct.c 2009-07-07 11:55:25 UTC (rev 16212)
@@ -178,12 +178,14 @@
__asm packssdw mm4,mm5 \
__asm movq mm1,[Y+_r5] \
__asm paddw mm4,mm2 \
- /*mm2=t6'', mm0=_y[0]=u=r+s>>1*/ \
+ /*mm2=t6'', mm0=_y[0]=u=r+s>>1 \
+ The naive implementation could cause overflow, so we use u=s+(r-s>>1).*/ \
__asm mov A,0x7FFF54DC \
-__asm paddw mm0,mm4 \
+__asm psubw mm0,mm4 \
__asm movq mm2,[Y+_r3] \
+__asm psraw mm0,1 \
__asm movd mm7,A \
-__asm psraw mm0,1 \
+__asm paddw mm0,mm4 \
/*mm7={54491-0x7FFF,0x7FFF}x2 \
mm4=_y[4]=v=r-u*/ \
__asm psubw mm4,mm0 \
More information about the commits
mailing list