[xiph-commits] r11518 - in branches/theora-playtime: lib
lib/x86_32_vs win32/VS2005/libtheora
illiminable at svn.xiph.org
illiminable at svn.xiph.org
Sun Jun 4 11:30:47 PDT 2006
Author: illiminable
Date: 2006-06-04 11:30:35 -0700 (Sun, 04 Jun 2006)
New Revision: 11518
Modified:
branches/theora-playtime/lib/dsp.h
branches/theora-playtime/lib/x86_32_vs/idct_sse2.c
branches/theora-playtime/lib/x86_32_vs/recon_sse2.c
branches/theora-playtime/win32/VS2005/libtheora/libtheora.vcproj
Log:
* Kill dead code
* Fix function prototype for init in idct
* Add init externs to dsp.h
* Remove deadcode, and less prefetch's in recon
Modified: branches/theora-playtime/lib/dsp.h
===================================================================
--- branches/theora-playtime/lib/dsp.h 2006-06-04 17:03:37 UTC (rev 11517)
+++ branches/theora-playtime/lib/dsp.h 2006-06-04 18:30:35 UTC (rev 11518)
@@ -109,6 +109,7 @@
extern void dsp_dct_init(DspFunctions *funcs, ogg_uint32_t cpu_flags);
extern void dsp_recon_init (DspFunctions *funcs, ogg_uint32_t cpu_flags);
+extern void dsp_idct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags);
void dsp_init(DspFunctions *funcs);
void dsp_static_init(DspFunctions *funcs);
@@ -117,7 +118,11 @@
extern void dsp_mmxext_init(DspFunctions *funcs);
extern void dsp_mmx_fdct_init(DspFunctions *funcs);
extern void dsp_mmx_recon_init(DspFunctions *funcs);
+
+extern void dsp_sse2_init(DspFunctions *funcs);
extern void dsp_sse2_recon_init(DspFunctions *funcs);
+extern void dsp_sse2_idct_init(DspFunctions *funcs);
+
#endif
#define dsp_save_fpu(funcs) (funcs.save_fpu ())
Modified: branches/theora-playtime/lib/x86_32_vs/idct_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/idct_sse2.c 2006-06-04 17:03:37 UTC (rev 11517)
+++ branches/theora-playtime/lib/x86_32_vs/idct_sse2.c 2006-06-04 18:30:35 UTC (rev 11518)
@@ -36,7 +36,7 @@
ogg_int16_t * quantized_list,
ogg_int32_t * DCT_block)
{
-#if 0
+#if 1
int i;
for(i=0;i<64;i++)
DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i];
@@ -58,12 +58,6 @@
mov ebx, dequant_coeffs /* int16 */
mov eax, temp_block_ptr
-
- /*
- The repeated blocks of 16 iterations are identical except
- for the offsets in the writes at the end and the reads at start
- */
-
/* 16 Iterations at a time */
mov ecx, 4 /* 4 lots of 16 */
@@ -110,118 +104,13 @@
sub ecx, 1
jnz calc_loop_start
- /* Restore the pointer to the start of the temp buffer */
- sub eax, 256
-
+ /* Restore the pointer to the start of the temp buffer */
+ sub eax, 256
+ /* Now follow the pattern to write - can't use simd */
+ mov ebx, 8
- ///* 16 Iterations at a time */
- // /* Read 16x16 bits of quatized_list and dequant_coeffs */
- // movdqu xmm1, [esi + 32]
- // movdqu xmm5, [esi + 48]
-
- // movdqa xmm2, [ebx + 32]
- // movdqa xmm6, [ebx + 48]
-
- // /* Make a copy of xmm1 and xmm5 */
- // movdqa xmm7, xmm1
- // movdqa xmm0, xmm5
-
- // /* Multiply */
- // pmullw xmm1, xmm2
- // pmulhw xmm2, xmm7
-
- // pmullw xmm5, xmm6
- // pmulhw xmm6, xmm0
-
- // /* Interleave the multiplicataion results */
- // movdqa xmm0, xmm1
- // punpcklwd xmm1, xmm2 /* Now the low 4 x 32 bits */
- // punpckhwd xmm0, xmm2 /* The high 4x32 bits */
-
- // movdqa xmm2, xmm5
- // punpcklwd xmm5, xmm6
- // punpckhwd xmm2, xmm6
-
- // /* Write the 16x32 bits of output to temp space */
- // movdqa [eax + 64], xmm1
- // movdqa [eax + 80], xmm0
- // movdqa [eax + 96], xmm5
- // movdqa [eax + 112], xmm2
-
- ///* 16 Iterations at a time */
- // /* Read 16x16 bits of quatized_list and dequant_coeffs */
- // movdqu xmm1, [esi + 64]
- // movdqu xmm5, [esi + 80]
-
- // movdqa xmm2, [ebx + 64]
- // movdqa xmm6, [ebx + 80]
-
- // /* Make a copy of xmm1 and xmm5 */
- // movdqa xmm7, xmm1
- // movdqa xmm0, xmm5
-
- // /* Multiply */
- // pmullw xmm1, xmm2
- // pmulhw xmm2, xmm7
-
- // pmullw xmm5, xmm6
- // pmulhw xmm6, xmm0
-
- // /* Interleave the multiplicataion results */
- // movdqa xmm0, xmm1
- // punpcklwd xmm1, xmm2 /* Now the low 4 x 32 bits */
- // punpckhwd xmm0, xmm2 /* The high 4x32 bits */
-
- // movdqa xmm2, xmm5
- // punpcklwd xmm5, xmm6
- // punpckhwd xmm2, xmm6
-
- // /* Write the 16x32 bits of output to temp space */
- // movdqa [eax + 128], xmm1
- // movdqa [eax + 144], xmm0
- // movdqa [eax + 160], xmm5
- // movdqa [eax + 176], xmm2
-
-
-
- ///* 16 Iterations at a time */
- // /* Read 16x16 bits of quatized_list and dequant_coeffs */
- // movdqu xmm1, [esi + 96]
- // movdqu xmm5, [esi + 112]
-
- // movdqa xmm2, [ebx + 96]
- // movdqa xmm6, [ebx + 112]
-
- // /* Make a copy of xmm1 and xmm5 */
- // movdqa xmm7, xmm1
- // movdqa xmm0, xmm5
-
- // /* Multiply */
- // pmullw xmm1, xmm2
- // pmulhw xmm2, xmm7
-
- // pmullw xmm5, xmm6
- // pmulhw xmm6, xmm0
-
- // /* Interleave the multiplicataion results */
- // movdqa xmm0, xmm1
- // punpcklwd xmm1, xmm2 /* Now the low 4 x 32 bits */
- // punpckhwd xmm0, xmm2 /* The high 4x32 bits */
-
- // movdqa xmm2, xmm5
- // punpcklwd xmm5, xmm6
- // punpckhwd xmm2, xmm6
-
- // /* Write the 16x32 bits of output to temp space */
- // movdqa [eax + 192], xmm1
- // movdqa [eax + 208], xmm0
- // movdqa [eax + 224], xmm5
- // movdqa [eax + 240], xmm2
-
- /* Now follow the pattern to write - can't use simd */
- mov ebx, 8
write_loop_start:
mov ecx , [edx]
mov esi , [eax]
@@ -257,39 +146,6 @@
sub ebx, 1
jnz write_loop_start
- //mov ecx , [edx + 32]
- //mov esi , [eax + 32]
- //mov [edi + ecx*4] , esi
- //mov ecx , [edx + 36]
- //mov esi , [eax + 36]
- //mov [edi + ecx*4] , esi
- //mov ecx , [edx + 40]
- //mov esi , [eax + 40]
- //mov [edi + ecx*4] , esi
- //mov ecx , [edx + 44]
- //mov esi , [eax + 44]
- //mov [edi + ecx*4] , esi
-
- //mov ecx , [edx + 48]
- //mov esi , [eax + 48]
- //mov [edi + ecx*4] , esi
- //mov ecx , [edx + 52]
- //mov esi , [eax + 52]
- //mov [edi + ecx*4] , esi
- //mov ecx , [edx + 56]
- //mov esi , [eax + 56]
- //mov [edi + ecx*4] , esi
- //mov ecx , [edx + 60]
- //mov esi , [eax + 60]
- //mov [edi + ecx*4] , esi
-
- //add eax, 64
- //add edx, 64
-
- //sub ebx, 1
- //jnz write_loop_start
-
-
};
#endif
}
@@ -732,7 +588,7 @@
}
-void dsp_sse2_idct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
+void dsp_sse2_idct_init (DspFunctions *funcs)
{
/* TODO::: Match function order */
funcs->dequant_slow = dequant_slow__sse2;
Modified: branches/theora-playtime/lib/x86_32_vs/recon_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/recon_sse2.c 2006-06-04 17:03:37 UTC (rev 11517)
+++ branches/theora-playtime/lib/x86_32_vs/recon_sse2.c 2006-06-04 18:30:35 UTC (rev 11518)
@@ -154,11 +154,8 @@
mov edx, V128x16Ptr
prefetchnta [ebx]
- prefetchnta [ebx + 32]
prefetchnta [ebx + 64]
- prefetchnta [ebx + 96]
-
movdqa xmm7, [edx]
/* 8 lots of int16 per register on the first mov */
/* Then packs those 8 + another 8 down to 16x 8 bits */
@@ -170,25 +167,21 @@
movdqa xmm6, [ebx + 16]
packsswb xmm0, xmm6 /*[ebx + 16]*/
pxor xmm0, xmm7
- //lea ebx, [ebx + 32]
/* Iteration 2 - xmm1*/
movdqa xmm1, [ebx + 32]
packsswb xmm1, [ebx + 48]
pxor xmm1, xmm7
- //lea ebx, [ebx + 32]
/* Iteration 3 - xmm2 */
movdqa xmm2, [ebx + 64]
packsswb xmm2, [ebx + 80]
pxor xmm2, xmm7
- //lea ebx, [ebx + 32]
/* Iteration 4 - xmm3 */
movdqa xmm3, [ebx + 96]
packsswb xmm3, [ebx + 112]
pxor xmm3, xmm7
- /* lea ebx, [ebx + 16] */
/* Output the data - lower bits, then shift then low bits again */
@@ -215,7 +208,6 @@
movq QWORD PTR [eax], xmm3
psrldq xmm3, 8
movq QWORD PTR [eax + ecx], xmm3
- /* lea eax, [eax + ecx]*/
};
Modified: branches/theora-playtime/win32/VS2005/libtheora/libtheora.vcproj
===================================================================
--- branches/theora-playtime/win32/VS2005/libtheora/libtheora.vcproj 2006-06-04 17:03:37 UTC (rev 11517)
+++ branches/theora-playtime/win32/VS2005/libtheora/libtheora.vcproj 2006-06-04 18:30:35 UTC (rev 11518)
@@ -51,6 +51,7 @@
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
+ DisableSpecificWarnings="4996"
/>
<Tool
Name="VCManagedResourceCompilerTool"
@@ -127,6 +128,7 @@
InlineFunctionExpansion="2"
EnableIntrinsicFunctions="true"
FavorSizeOrSpeed="1"
+ OmitFramePointers="true"
AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include;..\..\..\lib"
PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS; USE_ASM"
StringPooling="true"
@@ -136,9 +138,9 @@
UsePrecompiledHeader="0"
WarningLevel="4"
Detect64BitPortabilityProblems="true"
- DebugInformationFormat="0"
+ DebugInformationFormat="3"
CompileAs="1"
- DisableSpecificWarnings="4244;4267;4057;4100;4245"
+ DisableSpecificWarnings="4244;4267;4057;4100;4245;4996"
/>
<Tool
Name="VCManagedResourceCompilerTool"
More information about the commits
mailing list