[xiph-commits] r11677 - branches/theora-playtime/lib/x86_32_vs

illiminable at svn.xiph.org illiminable at svn.xiph.org
Thu Jun 29 15:32:27 PDT 2006


Author: illiminable
Date: 2006-06-29 15:32:23 -0700 (Thu, 29 Jun 2006)
New Revision: 11677

Modified:
   branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c
   branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c
Log:
* sub8x8avg2 sse2 faster than mmx : Best case 86 vs 99 cycles

Modified: branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c	2006-06-29 22:21:05 UTC (rev 11676)
+++ branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c	2006-06-29 22:32:23 UTC (rev 11677)
@@ -31,7 +31,7 @@
 
 static const ogg_int64_t V128 = 0x0080008000800080LL;
 
-
+static perf_info sub8x8avg2_mmx_perf;
 static perf_info sub8x8_mmx_perf;
 static perf_info sub8x8_128_mmx_perf;
 
@@ -278,7 +278,7 @@
   }
 
 #else
-	PERF_BLOCK_START();
+	//PERF_BLOCK_START();
     __asm {
         align 16
 
@@ -418,7 +418,7 @@
 
     };
 
-	PERF_BLOCK_END("sub8x8_128 mmx", sub8x8_128_mmx_perf, 10000);
+	//PERF_BLOCK_END("sub8x8_128 mmx", sub8x8_128_mmx_perf, 10000);
  
 #endif
 }
@@ -453,7 +453,7 @@
     DctInputPtr += 8;
   }
 #else
-
+    PERF_BLOCK_START();
     __asm {
         align 16
 
@@ -705,9 +705,11 @@
 
     };
 
+	PERF_BLOCK_END("sub8x8avg2 mmx", sub8x8avg2_mmx_perf, 10000);
 
 
 
+
  
 #endif
 }
@@ -1618,6 +1620,7 @@
 
   ClearPerfData(&sub8x8_mmx_perf);
   ClearPerfData(&sub8x8_128_mmx_perf);
+  ClearPerfData(&sub8x8avg2_mmx_perf);
 }
 
 

Modified: branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c	2006-06-29 22:21:05 UTC (rev 11676)
+++ branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c	2006-06-29 22:32:23 UTC (rev 11677)
@@ -25,7 +25,7 @@
 #endif
 
 
-
+static perf_info sub8x8avg2_sse2_perf;
 static perf_info sub8x8_sse2_perf;
 static perf_info sub8x8_128_sse2_perf;
 
@@ -225,7 +225,7 @@
   }
 
 #else
-	PERF_BLOCK_START();
+	//PERF_BLOCK_START();
     __asm {
         align 16
         
@@ -297,7 +297,7 @@
 
     };
 
-	PERF_BLOCK_END("sub8x8_128 sse2", sub8x8_128_sse2_perf, 10000);
+	//PERF_BLOCK_END("sub8x8_128 sse2", sub8x8_128_sse2_perf, 10000);
 
  
 #endif
@@ -333,6 +333,7 @@
     DctInputPtr += 8;
   }
 #else
+	 PERF_BLOCK_START();
     __asm {
         align 16
 
@@ -529,6 +530,8 @@
         //add         edi, 32
 };        
 
+PERF_BLOCK_END("sub8x8avg2 sse2", sub8x8avg2_sse2_perf, 10000);
+
  
 
  
@@ -1641,7 +1644,7 @@
   TH_DEBUG("enabling accelerated x86_32 sse2 dsp functions.\n");
   funcs->sub8x8 = sub8x8__sse2;
   funcs->sub8x8_128 = sub8x8_128__sse2;
-  //funcs->sub8x8avg2 = sub8x8avg2__sse2;
+  funcs->sub8x8avg2 = sub8x8avg2__sse2;
   //funcs->row_sad8 = row_sad8__sse2;
   //funcs->col_sad8x8 = col_sad8x8__sse2;
   
@@ -1659,6 +1662,7 @@
 
   ClearPerfData(&sub8x8_sse2_perf);
   ClearPerfData(&sub8x8_128_sse2_perf);
+  ClearPerfData(&sub8x8avg2_sse2_perf);
 
 
 



More information about the commits mailing list