[xiph-commits] r11518 - in branches/theora-playtime: lib lib/x86_32_vs win32/VS2005/libtheora

illiminable at svn.xiph.org illiminable at svn.xiph.org
Sun Jun 4 11:30:47 PDT 2006


Author: illiminable
Date: 2006-06-04 11:30:35 -0700 (Sun, 04 Jun 2006)
New Revision: 11518

Modified:
   branches/theora-playtime/lib/dsp.h
   branches/theora-playtime/lib/x86_32_vs/idct_sse2.c
   branches/theora-playtime/lib/x86_32_vs/recon_sse2.c
   branches/theora-playtime/win32/VS2005/libtheora/libtheora.vcproj
Log:
* Kill dead code
* Fix function prototype for init in idct
* Add init externs to dsp.h
* Remove deadcode, and less prefetch's in recon


Modified: branches/theora-playtime/lib/dsp.h
===================================================================
--- branches/theora-playtime/lib/dsp.h	2006-06-04 17:03:37 UTC (rev 11517)
+++ branches/theora-playtime/lib/dsp.h	2006-06-04 18:30:35 UTC (rev 11518)
@@ -109,6 +109,7 @@
 
 extern void dsp_dct_init(DspFunctions *funcs, ogg_uint32_t cpu_flags);
 extern void dsp_recon_init (DspFunctions *funcs, ogg_uint32_t cpu_flags);
+extern void dsp_idct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags);
 
 void dsp_init(DspFunctions *funcs);
 void dsp_static_init(DspFunctions *funcs);
@@ -117,7 +118,11 @@
 extern void dsp_mmxext_init(DspFunctions *funcs);
 extern void dsp_mmx_fdct_init(DspFunctions *funcs);
 extern void dsp_mmx_recon_init(DspFunctions *funcs);
+
+extern void dsp_sse2_init(DspFunctions *funcs);
 extern void dsp_sse2_recon_init(DspFunctions *funcs);
+extern void dsp_sse2_idct_init(DspFunctions *funcs);
+
 #endif
 
 #define dsp_save_fpu(funcs) (funcs.save_fpu ())

Modified: branches/theora-playtime/lib/x86_32_vs/idct_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/idct_sse2.c	2006-06-04 17:03:37 UTC (rev 11517)
+++ branches/theora-playtime/lib/x86_32_vs/idct_sse2.c	2006-06-04 18:30:35 UTC (rev 11518)
@@ -36,7 +36,7 @@
                    ogg_int16_t * quantized_list,
                    ogg_int32_t * DCT_block) 
 {
-#if 0
+#if 1
   int i;
   for(i=0;i<64;i++)
     DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i];
@@ -58,12 +58,6 @@
         mov     ebx, dequant_coeffs     /* int16 */
         mov     eax, temp_block_ptr
 
-
-        /*      
-                The repeated blocks of 16 iterations are identical except 
-                for the offsets in the writes at the end and the reads at start
-         */
-
         /* 16 Iterations at a time  */
         mov         ecx, 4      /* 4 lots of 16 */
 
@@ -110,118 +104,13 @@
         sub         ecx, 1
         jnz         calc_loop_start
 
-        /* Restore the pointer to the start of the temp buffer */
-        sub         eax, 256
-   
+    /* Restore the pointer to the start of the temp buffer */
+    sub         eax, 256
 
 
+    /* Now follow the pattern to write - can't use simd */
+    mov         ebx, 8
 
-        ///* 16 Iterations at a time  */
-        //    /* Read 16x16 bits of quatized_list and dequant_coeffs */
-        //    movdqu      xmm1, [esi + 32]
-        //    movdqu      xmm5, [esi + 48]
-
-        //    movdqa      xmm2, [ebx + 32]
-        //    movdqa      xmm6, [ebx + 48]
-
-        //    /* Make a copy of xmm1 and xmm5 */
-        //    movdqa      xmm7, xmm1
-        //    movdqa      xmm0, xmm5
-
-        //    /* Multiply */
-        //    pmullw      xmm1, xmm2
-        //    pmulhw      xmm2, xmm7
-
-        //    pmullw      xmm5, xmm6
-        //    pmulhw      xmm6, xmm0
-
-        //    /* Interleave the multiplicataion results */
-        //    movdqa      xmm0, xmm1
-        //    punpcklwd   xmm1, xmm2      /* Now the low 4 x 32 bits */
-        //    punpckhwd   xmm0, xmm2      /* The high 4x32 bits */
-
-        //    movdqa      xmm2, xmm5
-        //    punpcklwd   xmm5, xmm6
-        //    punpckhwd   xmm2, xmm6
-
-        //    /* Write the 16x32 bits of output to temp space */
-        //    movdqa      [eax + 64], xmm1
-        //    movdqa      [eax + 80], xmm0
-        //    movdqa      [eax + 96], xmm5
-        //    movdqa      [eax + 112], xmm2
-
-        ///* 16 Iterations at a time  */
-        //    /* Read 16x16 bits of quatized_list and dequant_coeffs */
-        //    movdqu      xmm1, [esi + 64]
-        //    movdqu      xmm5, [esi + 80]
-
-        //    movdqa      xmm2, [ebx + 64]
-        //    movdqa      xmm6, [ebx + 80]
-
-        //    /* Make a copy of xmm1 and xmm5 */
-        //    movdqa      xmm7, xmm1
-        //    movdqa      xmm0, xmm5
-
-        //    /* Multiply */
-        //    pmullw      xmm1, xmm2
-        //    pmulhw      xmm2, xmm7
-
-        //    pmullw      xmm5, xmm6
-        //    pmulhw      xmm6, xmm0
-
-        //    /* Interleave the multiplicataion results */
-        //    movdqa      xmm0, xmm1
-        //    punpcklwd   xmm1, xmm2      /* Now the low 4 x 32 bits */
-        //    punpckhwd   xmm0, xmm2      /* The high 4x32 bits */
-
-        //    movdqa      xmm2, xmm5
-        //    punpcklwd   xmm5, xmm6
-        //    punpckhwd   xmm2, xmm6
-
-        //    /* Write the 16x32 bits of output to temp space */
-        //    movdqa      [eax + 128], xmm1
-        //    movdqa      [eax + 144], xmm0
-        //    movdqa      [eax + 160], xmm5
-        //    movdqa      [eax + 176], xmm2
-
-
-
-        ///* 16 Iterations at a time  */
-        //    /* Read 16x16 bits of quatized_list and dequant_coeffs */
-        //    movdqu      xmm1, [esi + 96]
-        //    movdqu      xmm5, [esi + 112]
-
-        //    movdqa      xmm2, [ebx + 96]
-        //    movdqa      xmm6, [ebx + 112]
-
-        //    /* Make a copy of xmm1 and xmm5 */
-        //    movdqa      xmm7, xmm1
-        //    movdqa      xmm0, xmm5
-
-        //    /* Multiply */
-        //    pmullw      xmm1, xmm2
-        //    pmulhw      xmm2, xmm7
-
-        //    pmullw      xmm5, xmm6
-        //    pmulhw      xmm6, xmm0
-
-        //    /* Interleave the multiplicataion results */
-        //    movdqa      xmm0, xmm1
-        //    punpcklwd   xmm1, xmm2      /* Now the low 4 x 32 bits */
-        //    punpckhwd   xmm0, xmm2      /* The high 4x32 bits */
-
-        //    movdqa      xmm2, xmm5
-        //    punpcklwd   xmm5, xmm6
-        //    punpckhwd   xmm2, xmm6
-
-        //    /* Write the 16x32 bits of output to temp space */
-        //    movdqa      [eax + 192], xmm1
-        //    movdqa      [eax + 208], xmm0
-        //    movdqa      [eax + 224], xmm5
-        //    movdqa      [eax + 240], xmm2
-
-        /* Now follow the pattern to write - can't use simd */
-        mov         ebx, 8
     write_loop_start:
         mov         ecx         , [edx]
         mov         esi         , [eax]
@@ -257,39 +146,6 @@
     sub         ebx, 1
     jnz         write_loop_start
 
-        //mov         ecx         , [edx + 32]
-        //mov         esi         , [eax + 32]
-        //mov         [edi + ecx*4] , esi
-        //mov         ecx         , [edx + 36]
-        //mov         esi         , [eax + 36]
-        //mov         [edi + ecx*4] , esi
-        //mov         ecx         , [edx + 40]
-        //mov         esi         , [eax + 40]
-        //mov         [edi + ecx*4] , esi
-        //mov         ecx         , [edx + 44]
-        //mov         esi         , [eax + 44]
-        //mov         [edi + ecx*4] , esi
-
-        //mov         ecx         , [edx + 48]
-        //mov         esi         , [eax + 48]
-        //mov         [edi + ecx*4] , esi
-        //mov         ecx         , [edx + 52]
-        //mov         esi         , [eax + 52]
-        //mov         [edi + ecx*4] , esi
-        //mov         ecx         , [edx + 56]
-        //mov         esi         , [eax + 56]
-        //mov         [edi + ecx*4] , esi
-        //mov         ecx         , [edx + 60]
-        //mov         esi         , [eax + 60]
-        //mov         [edi + ecx*4] , esi
-
-        //add         eax, 64
-        //add         edx, 64
-
-        //sub         ebx, 1
-        //jnz         write_loop_start
-
-
     };
 #endif
 }
@@ -732,7 +588,7 @@
 }
 
 
-void dsp_sse2_idct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
+void dsp_sse2_idct_init (DspFunctions *funcs)
 {
     /* TODO::: Match function order */
   funcs->dequant_slow = dequant_slow__sse2;

Modified: branches/theora-playtime/lib/x86_32_vs/recon_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/recon_sse2.c	2006-06-04 17:03:37 UTC (rev 11517)
+++ branches/theora-playtime/lib/x86_32_vs/recon_sse2.c	2006-06-04 18:30:35 UTC (rev 11518)
@@ -154,11 +154,8 @@
         mov     edx, V128x16Ptr
 
         prefetchnta    [ebx]
-        prefetchnta    [ebx + 32]
         prefetchnta    [ebx + 64]
-        prefetchnta    [ebx + 96]
 
-
         movdqa      xmm7, [edx]
         /* 8 lots of int16 per register on the first mov */
         /* Then packs those 8 + another 8 down to 16x 8 bits */
@@ -170,25 +167,21 @@
         movdqa      xmm6, [ebx + 16]
         packsswb    xmm0, xmm6  /*[ebx + 16]*/
         pxor        xmm0, xmm7
-        //lea         ebx, [ebx + 32]
 
         /* Iteration 2 - xmm1*/
         movdqa      xmm1, [ebx + 32]
         packsswb    xmm1, [ebx + 48]
         pxor        xmm1, xmm7
-        //lea         ebx, [ebx + 32]
 
         /* Iteration 3 - xmm2 */
         movdqa      xmm2, [ebx + 64]
         packsswb    xmm2, [ebx + 80]
         pxor        xmm2, xmm7
-        //lea         ebx, [ebx + 32]
 
         /* Iteration 4 - xmm3 */
         movdqa      xmm3, [ebx + 96]
         packsswb    xmm3, [ebx + 112]
         pxor        xmm3, xmm7
-        /* lea         ebx, [ebx + 16] */
 
 
         /* Output the data - lower bits, then shift then low bits again */
@@ -215,7 +208,6 @@
         movq        QWORD PTR [eax], xmm3
         psrldq      xmm3, 8
         movq        QWORD PTR [eax + ecx], xmm3
-        /* lea         eax, [eax + ecx]*/
 
 
     };

Modified: branches/theora-playtime/win32/VS2005/libtheora/libtheora.vcproj
===================================================================
--- branches/theora-playtime/win32/VS2005/libtheora/libtheora.vcproj	2006-06-04 17:03:37 UTC (rev 11517)
+++ branches/theora-playtime/win32/VS2005/libtheora/libtheora.vcproj	2006-06-04 18:30:35 UTC (rev 11518)
@@ -51,6 +51,7 @@
 				WarningLevel="3"
 				Detect64BitPortabilityProblems="true"
 				DebugInformationFormat="3"
+				DisableSpecificWarnings="4996"
 			/>
 			<Tool
 				Name="VCManagedResourceCompilerTool"
@@ -127,6 +128,7 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
+				OmitFramePointers="true"
 				AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include;..\..\..\lib"
 				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS; USE_ASM"
 				StringPooling="true"
@@ -136,9 +138,9 @@
 				UsePrecompiledHeader="0"
 				WarningLevel="4"
 				Detect64BitPortabilityProblems="true"
-				DebugInformationFormat="0"
+				DebugInformationFormat="3"
 				CompileAs="1"
-				DisableSpecificWarnings="4244;4267;4057;4100;4245"
+				DisableSpecificWarnings="4244;4267;4057;4100;4245;4996"
 			/>
 			<Tool
 				Name="VCManagedResourceCompilerTool"



More information about the commits mailing list