[xiph-commits] r11681 - branches/theora-playtime/lib/x86_32_vs
illiminable at svn.xiph.org
illiminable at svn.xiph.org
Thu Jun 29 16:36:29 PDT 2006
Author: illiminable
Date: 2006-06-29 16:36:24 -0700 (Thu, 29 Jun 2006)
New Revision: 11681
Modified:
branches/theora-playtime/lib/x86_32_vs/idct_sse2.c
Log:
* dequant_slow10 sse2 faster than C : Best case 86 vs 102
Modified: branches/theora-playtime/lib/x86_32_vs/idct_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/idct_sse2.c 2006-06-29 23:26:58 UTC (rev 11680)
+++ branches/theora-playtime/lib/x86_32_vs/idct_sse2.c 2006-06-29 23:36:24 UTC (rev 11681)
@@ -44,12 +44,12 @@
//static unsigned __int64 perf_dequant_slow10_count;
//static unsigned __int64 perf_dequant_slow10_min;
-static perf_info dequant_slow_c;
-static perf_info dequant_slow_sse2;
-static perf_info idct1_c;
-static perf_info idct1_sse2;
-static perf_info dequant_slow10_c;
-static perf_info dequant_slow10_sse2;
+static perf_info dequant_slow_c_perf;
+static perf_info dequant_slow_sse2_perf;
+static perf_info idct1_c_perf;
+static perf_info idct1_sse2_perf;
+static perf_info dequant_slow10_c_perf;
+static perf_info dequant_slow10_sse2_perf;
static void dequant_slow__sse2( ogg_int16_t * dequant_coeffs,
@@ -59,11 +59,11 @@
#if 0
int i;
- PERF_BLOCK_START();
+ //PERF_BLOCK_START();
for(i=0;i<64;i++)
DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i];
- PERF_BLOCK_END("dequant_slow C", dequant_slow_c, 1000);
+ //PERF_BLOCK_END("dequant_slow C", dequant_slow_c_perf, 1000);
#else
static __declspec(align(16)) ogg_int32_t temp_block[64];
@@ -72,7 +72,7 @@
/* quantized list is not aligned */
- PERF_BLOCK_START();
+ //PERF_BLOCK_START();
__asm {
align 16
@@ -174,7 +174,7 @@
pop ebx
};
- PERF_BLOCK_END("dequant_slow sse2", dequant_slow_sse2, 1000);
+ //PERF_BLOCK_END("dequant_slow sse2", dequant_slow_sse2_perf, 1000);
#endif
}
@@ -407,19 +407,19 @@
#if 0
int i;
- //PERF_BLOCK_START();
+ PERF_BLOCK_START();
memset(DCT_block,0, 128);
for(i=0;i<10;i++)
DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i];
- PERF_BLOCK_END("dequant_slow10 C", perf_dequant_slow10_time, perf_dequant_slow10_count,perf_dequant_slow10_min, 10000);
+ PERF_BLOCK_END("dequant_slow10 C", dequant_slow10_c_perf, 10000);
#else
static __declspec(align(16)) unsigned char temp_block[40];
static unsigned char* temp_block_ptr = temp_block;
static ogg_int32_t* zigzag_ptr = dezigzag_index;
- //PERF_BLOCK_START();
+ PERF_BLOCK_START();
__asm {
align 16
@@ -516,6 +516,7 @@
}
+ PERF_BLOCK_END("dequant_slow10 sse2", dequant_slow10_sse2_perf, 10000);
//PERF_BLOCK_END("dequant_slow10 sse2", perf_dequant_slow10_time, perf_dequant_slow10_count,perf_dequant_slow10_min, 5000);
#endif
@@ -786,25 +787,26 @@
void dsp_sse2_idct_init (DspFunctions *funcs)
{
- /* Required, no C, but calls the sse2 version */
+ /* Required, no sse2, but calls the sse2 version of a child function */
funcs->IDctSlow = IDctSlow__sse2;
+ funcs->IDct10 = IDct10__sse2;
funcs->dequant_slow = dequant_slow__sse2;
//funcs->IDct1 = IDct1__sse2;
- //funcs->dequant_slow10 = dequant_slow10__sse2;
+ funcs->dequant_slow10 = dequant_slow10__sse2;
//funcs->dequant_slow = dequant_slow__sse2;
- ClearPerfData(&dequant_slow_c);
- ClearPerfData(&dequant_slow_sse2);
- ClearPerfData(&idct1_c);
- ClearPerfData(&idct1_sse2);
- ClearPerfData(&dequant_slow10_c);
- ClearPerfData(&dequant_slow10_sse2);
+ ClearPerfData(&dequant_slow_c_perf);
+ ClearPerfData(&dequant_slow_sse2_perf);
+ ClearPerfData(&idct1_c_perf);
+ ClearPerfData(&idct1_sse2_perf);
+ ClearPerfData(&dequant_slow10_c_perf);
+ ClearPerfData(&dequant_slow10_sse2_perf);
/* ---------- Not written ------------ */
- //funcs->IDct10 = IDct10__sse2;
+
}
\ No newline at end of file
More information about the commits
mailing list