[xiph-commits] r11679 - branches/theora-playtime/lib/x86_32_vs
illiminable at svn.xiph.org
illiminable at svn.xiph.org
Thu Jun 29 16:07:10 PDT 2006
Author: illiminable
Date: 2006-06-29 16:07:04 -0700 (Thu, 29 Jun 2006)
New Revision: 11679
Modified:
branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c
branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c
Log:
* col_sad8x8 mmx is faster than sse2 : Best case 114 vs 77 , definately out
Modified: branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c 2006-06-29 22:44:57 UTC (rev 11678)
+++ branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c 2006-06-29 23:07:04 UTC (rev 11679)
@@ -31,7 +31,7 @@
static const ogg_int64_t V128 = 0x0080008000800080LL;
-
+static perf_info col_sad8x8_mmx_perf;
static perf_info row_sad8_mmx_perf;
static perf_info sub8x8avg2_mmx_perf;
static perf_info sub8x8_mmx_perf;
@@ -741,7 +741,7 @@
ogg_uint32_t MaxSad;
- PERF_BLOCK_START();
+ //PERF_BLOCK_START();
__asm {
align 16
@@ -785,7 +785,7 @@
mov MaxSad, eax
};
- PERF_BLOCK_END("row_sad8 mmx", row_sad8_mmx_perf, 200000);
+ //PERF_BLOCK_END("row_sad8 mmx", row_sad8_mmx_perf, 200000);
return MaxSad;
@@ -847,7 +847,7 @@
#else
ogg_uint32_t MaxSad;
-
+ //PERF_BLOCK_START();
__asm {
align 16
mov ebx, Src1
@@ -920,6 +920,7 @@
mov MaxSad, eax
};
+ //PERF_BLOCK_END("col_sad8x8 mmx", col_sad8x8_mmx_perf, 30000);
return MaxSad;
@@ -1628,6 +1629,7 @@
ClearPerfData(&sub8x8_128_mmx_perf);
ClearPerfData(&sub8x8avg2_mmx_perf);
ClearPerfData(&row_sad8_mmx_perf);
+ ClearPerfData(&col_sad8x8_mmx_perf);
}
Modified: branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c 2006-06-29 22:44:57 UTC (rev 11678)
+++ branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c 2006-06-29 23:07:04 UTC (rev 11679)
@@ -24,7 +24,7 @@
#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
#endif
-
+static perf_info col_sad8x8_sse2_perf;
static perf_info row_sad8_sse2_perf;
static perf_info sub8x8avg2_sse2_perf;
static perf_info sub8x8_sse2_perf;
@@ -563,7 +563,7 @@
#else
ogg_uint32_t SadValue;
- PERF_BLOCK_START();
+ //PERF_BLOCK_START();
__asm {
@@ -611,7 +611,7 @@
}
- PERF_BLOCK_END("row_sad8 sse2", row_sad8_sse2_perf, 200000);
+ //PERF_BLOCK_END("row_sad8 sse2", row_sad8_sse2_perf, 200000);
return SadValue;
@@ -678,7 +678,7 @@
/* TODO::: It may not be worth contracting to 8 bit in the middle
The conversion back and forth possibly outweighs the saving */
-
+ //PERF_BLOCK_START();
__asm {
align 16
@@ -877,6 +877,7 @@
};
+ //PERF_BLOCK_END("col_sad8x8 sse2", col_sad8x8_sse2_perf, 30000);
return SadValue;
@@ -1653,7 +1654,7 @@
/* The mmx version is slightly faster */
//funcs->row_sad8 = row_sad8__sse2;
-
+ /* The mmx version is much faster */
//funcs->col_sad8x8 = col_sad8x8__sse2;
@@ -1672,6 +1673,7 @@
ClearPerfData(&sub8x8_128_sse2_perf);
ClearPerfData(&sub8x8avg2_sse2_perf);
ClearPerfData(&row_sad8_sse2_perf);
+ ClearPerfData(&col_sad8x8_sse2_perf);
More information about the commits
mailing list