[xiph-commits] r11537 - branches/theora-playtime/lib/x86_32_vs

illiminable at svn.xiph.org illiminable at svn.xiph.org
Wed Jun 7 09:31:45 PDT 2006


Author: illiminable
Date: 2006-06-07 09:31:40 -0700 (Wed, 07 Jun 2006)
New Revision: 11537

Modified:
   branches/theora-playtime/lib/x86_32_vs/idct_sse2.c
Log:
* sse2 implementation of idct1 from 70 to 34 cycles

Modified: branches/theora-playtime/lib/x86_32_vs/idct_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/idct_sse2.c	2006-06-07 15:55:33 UTC (rev 11536)
+++ branches/theora-playtime/lib/x86_32_vs/idct_sse2.c	2006-06-07 16:31:40 UTC (rev 11537)
@@ -36,7 +36,11 @@
 static unsigned __int64 perf_dequant_slow_count;
 static unsigned __int64 perf_dequant_slow_min;
 
+static unsigned __int64 perf_idct1_time;
+static unsigned __int64 perf_idct1_count;
+static unsigned __int64 perf_idct1_min;
 
+
 static void dequant_slow__sse2( ogg_int16_t * dequant_coeffs,
                    ogg_int16_t * quantized_list,
                    ogg_int32_t * DCT_block) 
@@ -586,15 +590,62 @@
 void IDct1__sse2( Q_LIST_ENTRY * InputData,
             ogg_int16_t *QuantMatrix,
             ogg_int16_t * OutputData ){
+
+#if 0
   int loop;
 
   ogg_int16_t  OutD;
 
+  PERF_BLOCK_START();
   OutD=(ogg_int16_t) ((ogg_int32_t)(InputData[0]*QuantMatrix[0]+15)>>5);
 
   for(loop=0;loop<64;loop++)
     OutputData[loop]=OutD;
+  PERF_BLOCK_END("IDct1 C", perf_idct1_time, perf_idct1_count,perf_idct1_min, 10000);
+#else
+    static __declspec(align(16)) unsigned char temp[16];
+    static unsigned char* temp_ptr = temp;
 
+    PERF_BLOCK_START();
+    __asm {
+        align       16
+
+        mov     esi, InputData
+        mov     edx, QuantMatrix
+        mov     edi, OutputData
+        mov     eax, temp_ptr
+
+        mov     cx, WORD PTR [esi]
+        add     cx, WORD PTR [edx]
+        add     cx, 15
+        shr     cx, 5
+
+        /* Write it to mem so can get it to xmm reg */
+        mov     [eax], cx
+
+        /* Read it from mem */
+        movdqa xmm0, [eax]
+
+        /* Put this word in all the spaces */
+        pshufd   xmm1, xmm0, 0
+        movdqa   xmm2, xmm1
+        pslldq   xmm2, 2
+        por      xmm1, xmm2
+
+
+        movdqa  [edi], xmm1
+        movdqa  [edi+16], xmm1
+        movdqa  [edi+32], xmm1
+        movdqa  [edi+48], xmm1
+
+
+
+
+
+    }
+    PERF_BLOCK_END("IDct1 C", perf_idct1_time, perf_idct1_count,perf_idct1_min, 10000);
+#endif
+
 }
 
 
@@ -605,6 +656,11 @@
     perf_dequant_slow_time = 0;
     perf_dequant_slow_count = 0;
     perf_dequant_slow_min = -1;
+
+    perf_idct1_time = 0;
+    perf_idct1_count = 0;
+    perf_idct1_min = -1;
+
     /* TODO::: Match function order */
   funcs->dequant_slow = dequant_slow__sse2;
   funcs->IDct1 = IDct1__sse2;



More information about the commits mailing list