[xiph-commits] r11677 - branches/theora-playtime/lib/x86_32_vs
illiminable at svn.xiph.org
illiminable at svn.xiph.org
Thu Jun 29 15:32:27 PDT 2006
Author: illiminable
Date: 2006-06-29 15:32:23 -0700 (Thu, 29 Jun 2006)
New Revision: 11677
Modified:
branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c
branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c
Log:
* sub8x8avg2 sse2 faster than mmx : Best case 86 vs 99 cycles
Modified: branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c 2006-06-29 22:21:05 UTC (rev 11676)
+++ branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c 2006-06-29 22:32:23 UTC (rev 11677)
@@ -31,7 +31,7 @@
static const ogg_int64_t V128 = 0x0080008000800080LL;
-
+static perf_info sub8x8avg2_mmx_perf;
static perf_info sub8x8_mmx_perf;
static perf_info sub8x8_128_mmx_perf;
@@ -278,7 +278,7 @@
}
#else
- PERF_BLOCK_START();
+ //PERF_BLOCK_START();
__asm {
align 16
@@ -418,7 +418,7 @@
};
- PERF_BLOCK_END("sub8x8_128 mmx", sub8x8_128_mmx_perf, 10000);
+ //PERF_BLOCK_END("sub8x8_128 mmx", sub8x8_128_mmx_perf, 10000);
#endif
}
@@ -453,7 +453,7 @@
DctInputPtr += 8;
}
#else
-
+ PERF_BLOCK_START();
__asm {
align 16
@@ -705,9 +705,11 @@
};
+ PERF_BLOCK_END("sub8x8avg2 mmx", sub8x8avg2_mmx_perf, 10000);
+
#endif
}
@@ -1618,6 +1620,7 @@
ClearPerfData(&sub8x8_mmx_perf);
ClearPerfData(&sub8x8_128_mmx_perf);
+ ClearPerfData(&sub8x8avg2_mmx_perf);
}
Modified: branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c 2006-06-29 22:21:05 UTC (rev 11676)
+++ branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c 2006-06-29 22:32:23 UTC (rev 11677)
@@ -25,7 +25,7 @@
#endif
-
+static perf_info sub8x8avg2_sse2_perf;
static perf_info sub8x8_sse2_perf;
static perf_info sub8x8_128_sse2_perf;
@@ -225,7 +225,7 @@
}
#else
- PERF_BLOCK_START();
+ //PERF_BLOCK_START();
__asm {
align 16
@@ -297,7 +297,7 @@
};
- PERF_BLOCK_END("sub8x8_128 sse2", sub8x8_128_sse2_perf, 10000);
+ //PERF_BLOCK_END("sub8x8_128 sse2", sub8x8_128_sse2_perf, 10000);
#endif
@@ -333,6 +333,7 @@
DctInputPtr += 8;
}
#else
+ PERF_BLOCK_START();
__asm {
align 16
@@ -529,6 +530,8 @@
//add edi, 32
};
+PERF_BLOCK_END("sub8x8avg2 sse2", sub8x8avg2_sse2_perf, 10000);
+
@@ -1641,7 +1644,7 @@
TH_DEBUG("enabling accelerated x86_32 sse2 dsp functions.\n");
funcs->sub8x8 = sub8x8__sse2;
funcs->sub8x8_128 = sub8x8_128__sse2;
- //funcs->sub8x8avg2 = sub8x8avg2__sse2;
+ funcs->sub8x8avg2 = sub8x8avg2__sse2;
//funcs->row_sad8 = row_sad8__sse2;
//funcs->col_sad8x8 = col_sad8x8__sse2;
@@ -1659,6 +1662,7 @@
ClearPerfData(&sub8x8_sse2_perf);
ClearPerfData(&sub8x8_128_sse2_perf);
+ ClearPerfData(&sub8x8avg2_sse2_perf);
More information about the commits
mailing list