[xiph-commits] r11678 - branches/theora-playtime/lib/x86_32_vs

illiminable at svn.xiph.org illiminable at svn.xiph.org
Thu Jun 29 15:45:02 PDT 2006


Author: illiminable
Date: 2006-06-29 15:44:57 -0700 (Thu, 29 Jun 2006)
New Revision: 11678

Modified:
   branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c
   branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c
Log:
* row_sad8 mmx is faster than sse2 : Best case 36 vs 33 cycles, stays out for now

Modified: branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c	2006-06-29 22:32:23 UTC (rev 11677)
+++ branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c	2006-06-29 22:44:57 UTC (rev 11678)
@@ -31,6 +31,8 @@
 
 static const ogg_int64_t V128 = 0x0080008000800080LL;
 
+
+static perf_info row_sad8_mmx_perf;
 static perf_info sub8x8avg2_mmx_perf;
 static perf_info sub8x8_mmx_perf;
 static perf_info sub8x8_128_mmx_perf;
@@ -453,7 +455,7 @@
     DctInputPtr += 8;
   }
 #else
-    PERF_BLOCK_START();
+    //PERF_BLOCK_START();
     __asm {
         align 16
 
@@ -705,7 +707,7 @@
 
     };
 
-	PERF_BLOCK_END("sub8x8avg2 mmx", sub8x8avg2_mmx_perf, 10000);
+	//PERF_BLOCK_END("sub8x8avg2 mmx", sub8x8avg2_mmx_perf, 10000);
 
 
 
@@ -736,8 +738,10 @@
   return SadValue;
 
 #else
+
   ogg_uint32_t MaxSad;
 
+  PERF_BLOCK_START();
   
   __asm {
     align       16
@@ -780,6 +784,8 @@
     and         eax, 0xffff
     mov         MaxSad, eax
   };
+
+  PERF_BLOCK_END("row_sad8 mmx", row_sad8_mmx_perf, 200000);
    return MaxSad;
   
   
@@ -1621,6 +1627,7 @@
   ClearPerfData(&sub8x8_mmx_perf);
   ClearPerfData(&sub8x8_128_mmx_perf);
   ClearPerfData(&sub8x8avg2_mmx_perf);
+  ClearPerfData(&row_sad8_mmx_perf);
 }
 
 

Modified: branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c	2006-06-29 22:32:23 UTC (rev 11677)
+++ branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c	2006-06-29 22:44:57 UTC (rev 11678)
@@ -25,6 +25,7 @@
 #endif
 
 
+static perf_info row_sad8_sse2_perf;
 static perf_info sub8x8avg2_sse2_perf;
 static perf_info sub8x8_sse2_perf;
 static perf_info sub8x8_128_sse2_perf;
@@ -333,7 +334,7 @@
     DctInputPtr += 8;
   }
 #else
-	 PERF_BLOCK_START();
+	 //PERF_BLOCK_START();
     __asm {
         align 16
 
@@ -530,7 +531,7 @@
         //add         edi, 32
 };        
 
-PERF_BLOCK_END("sub8x8avg2 sse2", sub8x8avg2_sse2_perf, 10000);
+//PERF_BLOCK_END("sub8x8avg2 sse2", sub8x8avg2_sse2_perf, 10000);
 
  
 
@@ -562,7 +563,9 @@
 #else
   ogg_uint32_t SadValue;
 
+  PERF_BLOCK_START();
 
+
   __asm {
 
     align       16
@@ -608,6 +611,7 @@
 
   }
 
+  PERF_BLOCK_END("row_sad8 sse2", row_sad8_sse2_perf, 200000);
   return SadValue;
 
 
@@ -1645,7 +1649,11 @@
   funcs->sub8x8 = sub8x8__sse2;
   funcs->sub8x8_128 = sub8x8_128__sse2;
   funcs->sub8x8avg2 = sub8x8avg2__sse2;
+
+  /* The mmx version is slightly faster */
   //funcs->row_sad8 = row_sad8__sse2;
+
+
   //funcs->col_sad8x8 = col_sad8x8__sse2;
   
   
@@ -1663,6 +1671,7 @@
   ClearPerfData(&sub8x8_sse2_perf);
   ClearPerfData(&sub8x8_128_sse2_perf);
   ClearPerfData(&sub8x8avg2_sse2_perf);
+  ClearPerfData(&row_sad8_sse2_perf);
 
 
 



More information about the commits mailing list