[xiph-commits] r11679 - branches/theora-playtime/lib/x86_32_vs

Thu Jun 29 16:07:10 PDT 2006

Author: illiminable
Date: 2006-06-29 16:07:04 -0700 (Thu, 29 Jun 2006)
New Revision: 11679

Modified:
   branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c
   branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c
Log:
* col_sad8x8 mmx is faster than sse2 : Best case  114 vs 77 , definately out

Modified: branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c
===================================================================

--- branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c	2006-06-29 22:44:57 UTC (rev 11678)
+++ branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c	2006-06-29 23:07:04 UTC (rev 11679)
@@ -31,7 +31,7 @@
 
 static const ogg_int64_t V128 = 0x0080008000800080LL;
 
-
+static perf_info col_sad8x8_mmx_perf;
 static perf_info row_sad8_mmx_perf;
 static perf_info sub8x8avg2_mmx_perf;
 static perf_info sub8x8_mmx_perf;
@@ -741,7 +741,7 @@
 
   ogg_uint32_t MaxSad;
 
-  PERF_BLOCK_START();
+  //PERF_BLOCK_START();
   
   __asm {
     align       16
@@ -785,7 +785,7 @@
     mov         MaxSad, eax
   };
 
-  PERF_BLOCK_END("row_sad8 mmx", row_sad8_mmx_perf, 200000);
+  //PERF_BLOCK_END("row_sad8 mmx", row_sad8_mmx_perf, 200000);
    return MaxSad;
   
   
@@ -847,7 +847,7 @@
 #else
   ogg_uint32_t MaxSad;
 
-
+	//PERF_BLOCK_START();
     __asm {
         align       16
         mov         ebx, Src1
@@ -920,6 +920,7 @@
         mov         MaxSad, eax
     };
 
+	//PERF_BLOCK_END("col_sad8x8 mmx", col_sad8x8_mmx_perf, 30000);
     return MaxSad;
 
 
@@ -1628,6 +1629,7 @@
   ClearPerfData(&sub8x8_128_mmx_perf);
   ClearPerfData(&sub8x8avg2_mmx_perf);
   ClearPerfData(&row_sad8_mmx_perf);
+  ClearPerfData(&col_sad8x8_mmx_perf);
 }
 
 

Modified: branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c	2006-06-29 22:44:57 UTC (rev 11678)
+++ branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c	2006-06-29 23:07:04 UTC (rev 11679)
@@ -24,7 +24,7 @@
 #define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
 #endif
 
-
+static perf_info col_sad8x8_sse2_perf;
 static perf_info row_sad8_sse2_perf;
 static perf_info sub8x8avg2_sse2_perf;
 static perf_info sub8x8_sse2_perf;
@@ -563,7 +563,7 @@
 #else
   ogg_uint32_t SadValue;
 
-  PERF_BLOCK_START();
+  //PERF_BLOCK_START();
 
 
   __asm {
@@ -611,7 +611,7 @@
 
   }
 
-  PERF_BLOCK_END("row_sad8 sse2", row_sad8_sse2_perf, 200000);
+  //PERF_BLOCK_END("row_sad8 sse2", row_sad8_sse2_perf, 200000);
   return SadValue;
 
 
@@ -678,7 +678,7 @@
     /* TODO::: It may not be worth contracting to 8 bit in the middle 
                 The conversion back and forth possibly outweighs the saving */
 
-
+	//PERF_BLOCK_START();
     __asm {
         align       16
         
@@ -877,6 +877,7 @@
 
     };
 
+	//PERF_BLOCK_END("col_sad8x8 sse2", col_sad8x8_sse2_perf, 30000);
     return SadValue;
  
 
@@ -1653,7 +1654,7 @@
   /* The mmx version is slightly faster */
   //funcs->row_sad8 = row_sad8__sse2;
 
-
+  /* The mmx version is much faster */
   //funcs->col_sad8x8 = col_sad8x8__sse2;
   
   
@@ -1672,6 +1673,7 @@
   ClearPerfData(&sub8x8_128_sse2_perf);
   ClearPerfData(&sub8x8avg2_sse2_perf);
   ClearPerfData(&row_sad8_sse2_perf);
+  ClearPerfData(&col_sad8x8_sse2_perf);