[xiph-commits] r11537 - branches/theora-playtime/lib/x86_32_vs
illiminable at svn.xiph.org
illiminable at svn.xiph.org
Wed Jun 7 09:31:45 PDT 2006
Author: illiminable
Date: 2006-06-07 09:31:40 -0700 (Wed, 07 Jun 2006)
New Revision: 11537
Modified:
branches/theora-playtime/lib/x86_32_vs/idct_sse2.c
Log:
* sse2 implementation of idct1 from 70 to 34 cycles
Modified: branches/theora-playtime/lib/x86_32_vs/idct_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/idct_sse2.c 2006-06-07 15:55:33 UTC (rev 11536)
+++ branches/theora-playtime/lib/x86_32_vs/idct_sse2.c 2006-06-07 16:31:40 UTC (rev 11537)
@@ -36,7 +36,11 @@
static unsigned __int64 perf_dequant_slow_count;
static unsigned __int64 perf_dequant_slow_min;
+static unsigned __int64 perf_idct1_time;
+static unsigned __int64 perf_idct1_count;
+static unsigned __int64 perf_idct1_min;
+
static void dequant_slow__sse2( ogg_int16_t * dequant_coeffs,
ogg_int16_t * quantized_list,
ogg_int32_t * DCT_block)
@@ -586,15 +590,62 @@
void IDct1__sse2( Q_LIST_ENTRY * InputData,
ogg_int16_t *QuantMatrix,
ogg_int16_t * OutputData ){
+
+#if 0
int loop;
ogg_int16_t OutD;
+ PERF_BLOCK_START();
OutD=(ogg_int16_t) ((ogg_int32_t)(InputData[0]*QuantMatrix[0]+15)>>5);
for(loop=0;loop<64;loop++)
OutputData[loop]=OutD;
+ PERF_BLOCK_END("IDct1 C", perf_idct1_time, perf_idct1_count,perf_idct1_min, 10000);
+#else
+ static __declspec(align(16)) unsigned char temp[16];
+ static unsigned char* temp_ptr = temp;
+ PERF_BLOCK_START();
+ __asm {
+ align 16
+
+ mov esi, InputData
+ mov edx, QuantMatrix
+ mov edi, OutputData
+ mov eax, temp_ptr
+
+ mov cx, WORD PTR [esi]
+ add cx, WORD PTR [edx]
+ add cx, 15
+ shr cx, 5
+
+ /* Write it to mem so can get it to xmm reg */
+ mov [eax], cx
+
+ /* Read it from mem */
+ movdqa xmm0, [eax]
+
+ /* Put this word in all the spaces */
+ pshufd xmm1, xmm0, 0
+ movdqa xmm2, xmm1
+ pslldq xmm2, 2
+ por xmm1, xmm2
+
+
+ movdqa [edi], xmm1
+ movdqa [edi+16], xmm1
+ movdqa [edi+32], xmm1
+ movdqa [edi+48], xmm1
+
+
+
+
+
+ }
+ PERF_BLOCK_END("IDct1 C", perf_idct1_time, perf_idct1_count,perf_idct1_min, 10000);
+#endif
+
}
@@ -605,6 +656,11 @@
perf_dequant_slow_time = 0;
perf_dequant_slow_count = 0;
perf_dequant_slow_min = -1;
+
+ perf_idct1_time = 0;
+ perf_idct1_count = 0;
+ perf_idct1_min = -1;
+
/* TODO::: Match function order */
funcs->dequant_slow = dequant_slow__sse2;
funcs->IDct1 = IDct1__sse2;
More information about the commits
mailing list