[xiph-commits] r14839 - in branches/theora-thusnelda/lib: . enc
xiphmont at svn.xiph.org
xiphmont at svn.xiph.org
Tue May 6 00:50:30 PDT 2008
Author: xiphmont
Date: 2008-05-06 00:50:29 -0700 (Tue, 06 May 2008)
New Revision: 14839
Modified:
branches/theora-thusnelda/lib/cpu.c
branches/theora-thusnelda/lib/cpu.h
branches/theora-thusnelda/lib/enc/mode.c
Log:
Quit dicking around with the rho optimization; function call overhead
swamps everything.
Modified: branches/theora-thusnelda/lib/cpu.c
===================================================================
--- branches/theora-thusnelda/lib/cpu.c 2008-05-06 03:04:19 UTC (rev 14838)
+++ branches/theora-thusnelda/lib/cpu.c 2008-05-06 07:50:29 UTC (rev 14839)
@@ -5,14 +5,14 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
-
+
CPU capability detection for x86 processors.
Originally written by Rudolf Marek.
-
+
function:
last mod: $Id$
@@ -20,91 +20,181 @@
#include "cpu.h"
+#if !defined(USE_ASM)
+ogg_uint32_t oc_cpu_flags_get(void){
+ return 0;
+}
+#else
-ogg_uint32_t oc_cpu_flags_get(void){
- ogg_uint32_t flags = 0;
-#if defined(USE_ASM)
- ogg_uint32_t eax;
- ogg_uint32_t ebx;
- ogg_uint32_t ecx;
- ogg_uint32_t edx;
-#if (defined(__amd64__) || defined(__x86_64__))
-# define cpuid(_op,_eax,_ebx,_ecx,_edx) \
+# if !defined(_MSC_VER)
+# if defined(__amd64__)||defined(__x86_64__)
+/*On x86-64, gcc seems to be able to figure out how to save %rbx for us when
+ compiling with -fPIC.*/
+# define cpuid(_op,_eax,_ebx,_ecx,_edx) \
__asm__ __volatile__( \
- "push %%rbx\n\t" \
"cpuid\n\t" \
- "movl %%ebx,%1\n\t" \
- "pop %%rbx\n\t" \
- :"=a" (_eax), \
- "=r" (_ebx), \
- "=c" (_ecx), \
- "=d" (_edx) \
- :"a" (_op) \
+ :[eax]"=a"(_eax),[ebx]"=b"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \
+ :"a"(_op) \
:"cc" \
)
-#else
-# define cpuid(_op,_eax,_ebx,_ecx,_edx) \
+# else
+/*On x86-32, not so much.*/
+# define cpuid(_op,_eax,_ebx,_ecx,_edx) \
__asm__ __volatile__( \
- "pushl %%ebx\n\t" \
+ "xchgl %%ebx,%[ebx]\n\t" \
"cpuid\n\t" \
- "movl %%ebx,%1\n\t" \
- "popl %%ebx\n\t" \
- :"=a" (_eax), \
- "=r" (_ebx), \
- "=c" (_ecx), \
- "=d" (_edx) \
- :"a" (_op) \
+ "xchgl %%ebx,%[ebx]\n\t" \
+ :[eax]"=a"(_eax),[ebx]"=r"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \
+ :"a"(_op) \
:"cc" \
)
+# endif
+# else
+/*Why does MSVC need this complicated rigamarole?
+ At this point I honestly do not care.*/
+
+/*Visual C cpuid helper function.
+ For VS2005 we could as well use the _cpuid builtin, but that wouldn't work
+ for VS2003 users, so we do it in inline assembler.*/
+static void oc_cpuid_helper(ogg_uint32_t _cpu_info[4],ogg_uint32_t _op){
+ _asm{
+ mov eax,[_op]
+ mov esi,_cpu_info
+ cpuid
+ mov [esi+0],eax
+ mov [esi+4],ebx
+ mov [esi+8],ecx
+ mov [esi+12],edx
+ }
+}
+
+# define cpuid(_op,_eax,_ebx,_ecx,_edx) \
+ do{ \
+ ogg_uint32_t cpu_info[4]; \
+ oc_cpuid_helper(cpu_info,_op); \
+ (_eax) = cpu_info[0]; \
+ (_ebx) = cpu_info[1]; \
+ (_ecx) = cpu_info[2]; \
+ (_edx) = cpu_info[3]; \
+ }while(0)
+
+static void oc_detect_cpuid_helper(ogg_uint32_t *_eax,ogg_uint32_t *_ebx){
+ _asm{
+ pushfd
+ pushfd
+ pop eax
+ mov ebx,eax
+ xor eax,200000h
+ push eax
+ popfd
+ pushfd
+ pop eax
+ popfd
+ mov [_eax],eax
+ mov [_ebx],ebx
+ }
+}
+# endif
+
+ogg_uint32_t oc_cpu_flags_get(void){
+ ogg_uint32_t flags;
+ ogg_uint32_t eax;
+ ogg_uint32_t ebx;
+ ogg_uint32_t ecx;
+ ogg_uint32_t edx;
+# if !defined(__amd64__)&&!defined(__x86_64__)
+ /*Not all x86-32 chips support cpuid, so we have to check.*/
+# if !defined(_MSC_VER)
__asm__ __volatile__(
"pushfl\n\t"
"pushfl\n\t"
- "popl %0\n\t"
- "movl %0,%1\n\t"
- "xorl $0x200000,%0\n\t"
- "pushl %0\n\t"
+ "popl %[a]\n\t"
+ "movl %[a],%[b]\n\t"
+ "xorl $0x200000,%[a]\n\t"
+ "pushl %[a]\n\t"
"popfl\n\t"
"pushfl\n\t"
- "popl %0\n\t"
+ "popl %[a]\n\t"
"popfl\n\t"
- :"=r" (eax),
- "=r" (ebx)
+ :[a]"=r"(eax),[b]"=r"(ebx)
:
:"cc"
);
+# else
+ oc_detect_cpuid_helper(&eax,&ebx);
+# endif
/*No cpuid.*/
if(eax==ebx)return 0;
-#endif
+# endif
cpuid(0,eax,ebx,ecx,edx);
- if(ebx==0x756e6547&&edx==0x49656e69&&ecx==0x6c65746e){
- /*Intel:*/
-inteltest:
+ /* l e t n I e n i u n e G*/
+ if(ecx==0x6C65746E&&edx==0x49656E69&&ebx==0x756E6547||
+ /* 6 8 x M T e n i u n e G*/
+ ecx==0x3638784D&&edx==0x54656E69&&ebx==0x756E6547){
+ /*Intel, Transmeta (tested with Crusoe TM5800):*/
cpuid(1,eax,ebx,ecx,edx);
- if((edx&0x00800000)==0)return 0;
+ /*If there isn't even MMX, give up.*/
+ if(!(edx&0x00800000))return 0;
flags=OC_CPU_X86_MMX;
if(edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE;
if(edx&0x04000000)flags|=OC_CPU_X86_SSE2;
+ if(ecx&0x00000001)flags|=OC_CPU_X86_PNI;
}
- else if(ebx==0x68747541&&edx==0x69746e65&&ecx==0x444d4163 ||
- ebx==0x646f6547&&edx==0x79622065&&ecx==0x43534e20){
- /*AMD:*/
- /*Geode:*/
+ /* D M A c i t n e h t u A*/
+ else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541||
+ /* C S N y b e d o e G*/
+ ecx==0x43534e20&&edx==0x79622065&&ebx==0x646f6547){
+ /*AMD, Geode:*/
cpuid(0x80000000,eax,ebx,ecx,edx);
- if(eax<0x80000001)goto inteltest;
+ if(eax<0x80000001){
+ /*No extended functions supported.
+ Use normal cpuid flags.*/
+ cpuid(1,eax,ebx,ecx,edx);
+ /*If there isn't even MMX, give up.*/
+ if(!(edx&0x00800000))return 0;
+ flags=OC_CPU_X86_MMX;
+ if(edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE;
+ }
+ else{
+ cpuid(0x80000001,eax,ebx,ecx,edx);
+ /*If there isn't even MMX, give up.*/
+ if(!(edx&0x00800000))return 0;
+ flags=OC_CPU_X86_MMX;
+ if(edx&0x80000000)flags|=OC_CPU_X86_3DNOW;
+ if(edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT;
+ if(edx&0x00400000)flags|=OC_CPU_X86_MMXEXT;
+ /*Also check for SSE.*/
+ cpuid(1,eax,ebx,ecx,edx);
+ if(edx&0x02000000)flags|=OC_CPU_X86_SSE;
+ }
+ if(edx&0x04000000)flags|=OC_CPU_X86_SSE2;
+ if(ecx&0x00000001)flags|=OC_CPU_X86_PNI;
+ }
+ /* s l u a H r u a t n e C*/
+ else if(ecx==0x736C7561&&edx==0x48727561&&ebx==0x746E6543){
+ /*VIA:*/
+ /*The C7 (and later?) processors support Intel-like cpuid info.*/
+ /*The C3-2 (Nehemiah) cores appear to, as well.*/
+ cpuid(1,eax,ebx,ecx,edx);
+ if(edx&0x00800000){
+ flags=OC_CPU_X86_MMX;
+ if(edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE;
+ if(edx&0x04000000)flags|=OC_CPU_X86_SSE2;
+ if(ecx&0x00000001)flags|=OC_CPU_X86_PNI;
+ }
+ else flags=0;
+ /*The (non-Nehemiah) C3 processors support AMD-like cpuid info.
+ We need to check this even if the Intel test succeeds to pick up 3dnow!
+ support on these processors.*/
+ /*TODO: How about earlier chips?*/
cpuid(0x80000001,eax,ebx,ecx,edx);
- if((edx&0x00800000)==0)return 0;
- flags=OC_CPU_X86_MMX;
+ if(edx&0x00800000)flags|=OC_CPU_X86_MMX;
if(edx&0x80000000)flags|=OC_CPU_X86_3DNOW;
- if(edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT;
- if(edx&0x00400000)flags|=OC_CPU_X86_MMXEXT;
}
else{
/*Implement me.*/
flags=0;
}
-
-#endif
-
return flags;
}
-
+#endif
Modified: branches/theora-thusnelda/lib/cpu.h
===================================================================
--- branches/theora-thusnelda/lib/cpu.h 2008-05-06 03:04:19 UTC (rev 14838)
+++ branches/theora-thusnelda/lib/cpu.h 2008-05-06 07:50:29 UTC (rev 14839)
@@ -24,6 +24,7 @@
#define OC_CPU_X86_MMXEXT (1<<3)
#define OC_CPU_X86_SSE (1<<4)
#define OC_CPU_X86_SSE2 (1<<5)
+#define OC_CPU_X86_PNI (1<<6)
ogg_uint32_t oc_cpu_flags_get(void);
Modified: branches/theora-thusnelda/lib/enc/mode.c
===================================================================
--- branches/theora-thusnelda/lib/enc/mode.c 2008-05-06 03:04:19 UTC (rev 14838)
+++ branches/theora-thusnelda/lib/enc/mode.c 2008-05-06 07:50:29 UTC (rev 14839)
@@ -488,57 +488,7 @@
}
#include "quant_lookup.h"
-static int find_nonzero_transition(ogg_int16_t *q, ogg_int16_t in){
- int i;
- int val = (abs((int)in)<<1);
- for(i=63;i>=0;i--)
- if( val < q[i])break;
- return i+1;
-}
-/* Don't use it... it is tripping a GCC bug */
-#include<stdio.h>
-int find_nonzero_transition(ogg_int16_t *q, ogg_int16_t in){
- int ret;
- __asm__ (
- ".balign 16 \n"
- "mov $64,%[ret]\n"
- "movd %[in],%%mm0\n"
- "punpcklwd %%mm0,%%mm0\n"
- "punpcklwd %%mm0,%%mm0\n"
- "jmp %=2f\n"
-
- "%=1:\n"
- "sub $8,%[quant]\n"
- "sub $4,%[ret]\n"
- "jz %=3f\n"
-
- "%=2:\n"
- "movq (%[quant]),%%mm1\n"
- "pcmpgtw %%mm0,%%mm1\n"
- "packsswb %%mm1,%%mm1\n"
- "movd %%mm1,%%ecx\n"
- "jecxz %=1b\n"
-
- "not %%ecx\n"
- "jecxz %=3f\n"
- "dec %[ret]\n"
- "shl $8,%%ecx\n"
- "jecxz %=3f\n"
- "dec %[ret]\n"
- "shl $8,%%ecx\n"
- "jecxz %=3f\n"
- "dec %[ret]\n"
-
- "%=3:\n"
- "emms\n"
- :[ret]"=&r"(ret)
- :[quant]"r"(q+60),[in]"r"(abs(in)<<1)
- :"%ecx"
- );
- return ret;
-}
-*/
static void TQB (CP_INSTANCE *cpi, int mode, int fi, mv_t mv, int plane, ogg_int16_t re_q[2][3][64], long *rho_count){
if ( cpi->frag_coded[fi] ) {
int qi = cpi->BaseQ; /* temporary */;
@@ -606,15 +556,19 @@
NXZ; adds one nonzero token; +1
NXN; replaces a zero run with a nonzero; +0 */
int i;
-
+ quant_tables *qq = &(cpi->quant_tables[inter][plane]);
+
for(i=0;i<64;i++){
int ii = dezigzag_index[i];
- int pos = find_nonzero_transition(cpi->quant_tables[inter][plane][i],buffer[ii]);
+ int pos;
+ int val = abs(buffer[ii])<<1;
+ ogg_int16_t *qqq = (*qq)[i];
+ for(pos=64;pos>0;pos--)
+ if(val < qqq[pos-1])break;
/* rho-domain distribution */
rho_count[pos]++;
-
if(qi<pos){
data[i] = 0;
}else{
More information about the commits
mailing list