[xiph-commits] r11675 - branches/theora-playtime/lib/x86_32_vs
illiminable at svn.xiph.org
illiminable at svn.xiph.org
Thu Jun 29 15:08:36 PDT 2006
Author: illiminable
Date: 2006-06-29 15:08:27 -0700 (Thu, 29 Jun 2006)
New Revision: 11675
Modified:
branches/theora-playtime/lib/x86_32_vs/dct_decode_sse2.c
branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c
branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c
branches/theora-playtime/lib/x86_32_vs/perf_helper.c
branches/theora-playtime/lib/x86_32_vs/perf_helper.h
Log:
* Instrument the mmx versions of dsp
* Decrackulate the perf function, max is a stupid metric
* sub8x8_sse2 faster than mmx : best case 62 vs 70 cycles
Modified: branches/theora-playtime/lib/x86_32_vs/dct_decode_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/dct_decode_sse2.c 2006-06-29 21:50:53 UTC (rev 11674)
+++ branches/theora-playtime/lib/x86_32_vs/dct_decode_sse2.c 2006-06-29 22:08:27 UTC (rev 11675)
@@ -48,7 +48,7 @@
static __declspec(align(16)) unsigned char temp[128];
static unsigned char* temp_ptr = temp;
- PERF_BLOCK_START();
+ //PERF_BLOCK_START();
__asm {
align 16
mov esi, PixelPtr
@@ -153,7 +153,7 @@
pop ebp
}
- PERF_BLOCK_END("filter horiz sse2", filter_horiz_perf, 10000);
+ //PERF_BLOCK_END("filter horiz sse2", filter_horiz_perf, 10000);
#endif
}
Modified: branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c 2006-06-29 21:50:53 UTC (rev 11674)
+++ branches/theora-playtime/lib/x86_32_vs/dsp_mmx.c 2006-06-29 22:08:27 UTC (rev 11675)
@@ -32,9 +32,7 @@
static const ogg_int64_t V128 = 0x0080008000800080LL;
-static unsigned __int64 perf_sad8x8_time;
-static unsigned __int64 perf_sad8x8_count;
-static unsigned __int64 perf_sad8x8_min;
+static perf_info sub8x8_mmx_perf;
static void sub8x8__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr,
ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine,
@@ -62,6 +60,7 @@
DctInputPtr += 8;
}
#else
+ PERF_BLOCK_START();
__asm {
align 16
@@ -245,6 +244,8 @@
};
+
+ PERF_BLOCK_END("sub8x8 mmx", sub8x8_mmx_perf, 10000);
#endif
}
@@ -1611,9 +1612,7 @@
funcs->inter8x8_err = inter8x8_err__mmx;
funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmx;
- perf_sad8x8_time = 0;
- perf_sad8x8_count = 0;
-perf_sad8x8_min = -1;
+ ClearPerfData(&sub8x8_mmx_perf);
}
Modified: branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c 2006-06-29 21:50:53 UTC (rev 11674)
+++ branches/theora-playtime/lib/x86_32_vs/dsp_sse2.c 2006-06-29 22:08:27 UTC (rev 11675)
@@ -26,9 +26,7 @@
-//static unsigned __int64 perf_sad8x8_time;
-//static unsigned __int64 perf_sad8x8_count;
-//static unsigned __int64 perf_sad8x8_min;
+static perf_info sub8x8_sse2_perf;
@@ -63,7 +61,7 @@
DctInputPtr += 8;
}
#else
-
+ PERF_BLOCK_START();
__asm {
align 16
@@ -194,7 +192,9 @@
};
+ PERF_BLOCK_END("sub8x8 sse2", sub8x8_sse2_perf, 10000);
+
#endif
}
@@ -1636,7 +1636,7 @@
void dsp_sse2_init(DspFunctions *funcs)
{
TH_DEBUG("enabling accelerated x86_32 sse2 dsp functions.\n");
- //funcs->sub8x8 = sub8x8__sse2;
+ funcs->sub8x8 = sub8x8__sse2;
//funcs->sub8x8_128 = sub8x8_128__sse2;
//funcs->sub8x8avg2 = sub8x8avg2__sse2;
//funcs->row_sad8 = row_sad8__sse2;
@@ -1654,8 +1654,10 @@
//funcs->inter8x8_err = inter8x8_err__sse2;
//funcs->inter8x8_err_xy2 = inter8x8_err_xy2__sse2;
+ ClearPerfData(&sub8x8_sse2_perf);
+
}
Modified: branches/theora-playtime/lib/x86_32_vs/perf_helper.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/perf_helper.c 2006-06-29 21:50:53 UTC (rev 11674)
+++ branches/theora-playtime/lib/x86_32_vs/perf_helper.c 2006-06-29 22:08:27 UTC (rev 11675)
@@ -24,5 +24,4 @@
inoutData->sum = 0;
inoutData->count = 0;
inoutData->min = (unsigned __int64)-1;
- inoutData->max = 0;
}
Modified: branches/theora-playtime/lib/x86_32_vs/perf_helper.h
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/perf_helper.h 2006-06-29 21:50:53 UTC (rev 11674)
+++ branches/theora-playtime/lib/x86_32_vs/perf_helper.h 2006-06-29 22:08:27 UTC (rev 11675)
@@ -11,8 +11,8 @@
unsigned __int64 sum;
unsigned __int64 count;
unsigned __int64 min;
- unsigned __int64 max;
+
} perf_info;
@@ -27,12 +27,11 @@
#define PERF_BLOCK_END(s, perf, z) \
perf_temp = (GetCPUTime() - perf_start_time[--depth]); \
(perf.min) = ((perf.min) > perf_temp) ? perf_temp : (perf.min); \
- (perf.max) = ((perf.max) > perf_temp) ? (perf.max) : perf_temp; \
perf.sum += perf_temp; \
(perf.count)++; \
if (((perf.count) % (z)) == 0) \
{ \
- printf(s " - %lld from %lld iterations -- @%lld cycles -- min(%lld) -- max(%lld)\n", perf.sum, perf.count, (perf.sum) / (perf.count), perf.min, perf.max); \
+ printf(s " - %lld from %lld iterations -- @%lld cycles -- min(%lld)\n", perf.sum, perf.count, (perf.sum) / (perf.count), perf.min); \
}
#else
More information about the commits
mailing list