[xiph-commits] r11680 - branches/theora-playtime/lib/x86_32_vs
illiminable at svn.xiph.org
illiminable at svn.xiph.org
Thu Jun 29 16:27:02 PDT 2006
Author: illiminable
Date: 2006-06-29 16:26:58 -0700 (Thu, 29 Jun 2006)
New Revision: 11680
Modified:
branches/theora-playtime/lib/x86_32_vs/idct_sse2.c
Log:
* dequant_slow sse2 faster than C (could be deceptive since it's called from the middle of a C function, since the outer function hasn't been written in sse2 yet) : Best case 298 vs 362
Modified: branches/theora-playtime/lib/x86_32_vs/idct_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/idct_sse2.c 2006-06-29 23:07:04 UTC (rev 11679)
+++ branches/theora-playtime/lib/x86_32_vs/idct_sse2.c 2006-06-29 23:26:58 UTC (rev 11680)
@@ -44,7 +44,14 @@
//static unsigned __int64 perf_dequant_slow10_count;
//static unsigned __int64 perf_dequant_slow10_min;
+static perf_info dequant_slow_c;
+static perf_info dequant_slow_sse2;
+static perf_info idct1_c;
+static perf_info idct1_sse2;
+static perf_info dequant_slow10_c;
+static perf_info dequant_slow10_sse2;
+
static void dequant_slow__sse2( ogg_int16_t * dequant_coeffs,
ogg_int16_t * quantized_list,
ogg_int32_t * DCT_block)
@@ -52,11 +59,11 @@
#if 0
int i;
- //PERF_BLOCK_START();
+ PERF_BLOCK_START();
for(i=0;i<64;i++)
DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i];
- PERF_BLOCK_END("dequant_slow C", perf_dequant_slow_time, perf_dequant_slow_count,perf_dequant_slow_min, 5000);
+ PERF_BLOCK_END("dequant_slow C", dequant_slow_c, 1000);
#else
static __declspec(align(16)) ogg_int32_t temp_block[64];
@@ -65,7 +72,7 @@
/* quantized list is not aligned */
- // PERF_BLOCK_START();
+ PERF_BLOCK_START();
__asm {
align 16
@@ -167,7 +174,7 @@
pop ebx
};
- // PERF_BLOCK_END("dequant_slow sse2", perf_dequant_slow_time, perf_dequant_slow_count,perf_dequant_slow_min, 5000);
+ PERF_BLOCK_END("dequant_slow sse2", dequant_slow_sse2, 1000);
#endif
}
@@ -779,17 +786,25 @@
void dsp_sse2_idct_init (DspFunctions *funcs)
{
+ /* Required, no C, but calls the sse2 version */
+ funcs->IDctSlow = IDctSlow__sse2;
- //funcs->dequant_slow = dequant_slow__sse2;
+ funcs->dequant_slow = dequant_slow__sse2;
//funcs->IDct1 = IDct1__sse2;
//funcs->dequant_slow10 = dequant_slow10__sse2;
//funcs->dequant_slow = dequant_slow__sse2;
+ ClearPerfData(&dequant_slow_c);
+ ClearPerfData(&dequant_slow_sse2);
+ ClearPerfData(&idct1_c);
+ ClearPerfData(&idct1_sse2);
+ ClearPerfData(&dequant_slow10_c);
+ ClearPerfData(&dequant_slow10_sse2);
/* ---------- Not written ------------ */
- //funcs->IDctSlow = IDctSlow__sse2;
+
//funcs->IDct10 = IDct10__sse2;
}
\ No newline at end of file
More information about the commits
mailing list