[xiph-commits] r11678 - branches/theora-playtime/lib/x86_32_vs
illiminable at svn.xiph.org
illiminable at svn.xiph.org
Thu Jun 29 15:45:02 PDT 2006
Author: illiminable
Date: 2006-06-29 15:44:57 -0700 (Thu, 29 Jun 2006)
New Revision: 11678
Modified:
branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c
branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c
Log:
* row_sad8 mmx is faster than sse2 : Best case 36 vs 33 cycles, stays out for now
Modified: branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c 2006-06-29 22:32:23 UTC (rev 11677)
+++ branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c 2006-06-29 22:44:57 UTC (rev 11678)
@@ -31,6 +31,8 @@
static const ogg_int64_t V128 = 0x0080008000800080LL;
+
+static perf_info row_sad8_mmx_perf;
static perf_info sub8x8avg2_mmx_perf;
static perf_info sub8x8_mmx_perf;
static perf_info sub8x8_128_mmx_perf;
@@ -453,7 +455,7 @@
DctInputPtr += 8;
}
#else
- PERF_BLOCK_START();
+ //PERF_BLOCK_START();
__asm {
align 16
@@ -705,7 +707,7 @@
};
- PERF_BLOCK_END("sub8x8avg2 mmx", sub8x8avg2_mmx_perf, 10000);
+ //PERF_BLOCK_END("sub8x8avg2 mmx", sub8x8avg2_mmx_perf, 10000);
@@ -736,8 +738,10 @@
return SadValue;
#else
+
ogg_uint32_t MaxSad;
+ PERF_BLOCK_START();
__asm {
align 16
@@ -780,6 +784,8 @@
and eax, 0xffff
mov MaxSad, eax
};
+
+ PERF_BLOCK_END("row_sad8 mmx", row_sad8_mmx_perf, 200000);
return MaxSad;
@@ -1621,6 +1627,7 @@
ClearPerfData(&sub8x8_mmx_perf);
ClearPerfData(&sub8x8_128_mmx_perf);
ClearPerfData(&sub8x8avg2_mmx_perf);
+ ClearPerfData(&row_sad8_mmx_perf);
}
Modified: branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c 2006-06-29 22:32:23 UTC (rev 11677)
+++ branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c 2006-06-29 22:44:57 UTC (rev 11678)
@@ -25,6 +25,7 @@
#endif
+static perf_info row_sad8_sse2_perf;
static perf_info sub8x8avg2_sse2_perf;
static perf_info sub8x8_sse2_perf;
static perf_info sub8x8_128_sse2_perf;
@@ -333,7 +334,7 @@
DctInputPtr += 8;
}
#else
- PERF_BLOCK_START();
+ //PERF_BLOCK_START();
__asm {
align 16
@@ -530,7 +531,7 @@
//add edi, 32
};
-PERF_BLOCK_END("sub8x8avg2 sse2", sub8x8avg2_sse2_perf, 10000);
+//PERF_BLOCK_END("sub8x8avg2 sse2", sub8x8avg2_sse2_perf, 10000);
@@ -562,7 +563,9 @@
#else
ogg_uint32_t SadValue;
+ PERF_BLOCK_START();
+
__asm {
align 16
@@ -608,6 +611,7 @@
}
+ PERF_BLOCK_END("row_sad8 sse2", row_sad8_sse2_perf, 200000);
return SadValue;
@@ -1645,7 +1649,11 @@
funcs->sub8x8 = sub8x8__sse2;
funcs->sub8x8_128 = sub8x8_128__sse2;
funcs->sub8x8avg2 = sub8x8avg2__sse2;
+
+ /* The mmx version is slightly faster */
//funcs->row_sad8 = row_sad8__sse2;
+
+
//funcs->col_sad8x8 = col_sad8x8__sse2;
@@ -1663,6 +1671,7 @@
ClearPerfData(&sub8x8_sse2_perf);
ClearPerfData(&sub8x8_128_sse2_perf);
ClearPerfData(&sub8x8avg2_sse2_perf);
+ ClearPerfData(&row_sad8_sse2_perf);
More information about the commits
mailing list