[xiph-commits] r14814 - trunk/theora/lib

tterribe at svn.xiph.org tterribe at svn.xiph.org
Tue Apr 29 09:54:40 PDT 2008


Author: tterribe
Date: 2008-04-29 09:54:37 -0700 (Tue, 29 Apr 2008)
New Revision: 14814

Modified:
   trunk/theora/lib/cpu.c
   trunk/theora/lib/cpu.h
Log:
Lots of clean-up to the CPU detection code.
VIA detection reworked (should behave properly even with late model C3-2's,
 please complain if it doesn't).
Win32 now properly tries to detect the cpuid instruction, assuming that code
 even works.
Support for Prescott New Instructions (SSE3) detection added.


Modified: trunk/theora/lib/cpu.c
===================================================================
--- trunk/theora/lib/cpu.c	2008-04-29 16:38:03 UTC (rev 14813)
+++ trunk/theora/lib/cpu.c	2008-04-29 16:54:37 UTC (rev 14814)
@@ -9,10 +9,10 @@
  * by the Xiph.Org Foundation http://www.xiph.org/                  *
  *                                                                  *
  ********************************************************************
- 
+
  CPU capability detection for x86 processors.
   Originally written by Rudolf Marek.
-  
+
  function:
   last mod: $Id$
 
@@ -21,163 +21,193 @@
 #include "cpu.h"
 
 #if !defined(USE_ASM)
-
 ogg_uint32_t oc_cpu_flags_get(void){
   return 0;
 }
+#else
 
-#else /* USE_ASM */
-
-# if defined(_MSC_VER)
-/*  Visual C cpuid helper function. For VS2005 we could
-    as well use the _cpuid builtin, but that wouldn't work
-    for VS2003 users, so we do it in inline assembler */
-
-static void oc_cpuid_helper (ogg_uint32_t * CpuInfo, ogg_uint32_t op){
-  _asm {
-    mov eax, [op]
-    mov esi, CpuInfo
-    cpuid
-    mov [esi + 0], eax
-    mov [esi + 4], ebx
-    mov [esi + 8], ecx
-    mov [esi +12], edx
-  }
-}
-
-#  define cpuid(_op,_eax,_ebx,_ecx,_edx) \
-  {                                    \
-    ogg_uint32_t nfo[4];               \
-    oc_cpuid_helper (nfo, (_op));      \
-    (_eax) = nfo[0],(_ebx) = nfo[1];   \
-    (_ecx) = nfo[2],(_edx) = nfo[3];   \
-  }
-
-# elif (defined(__amd64__) || defined(__x86_64__))
-
-#  define cpuid(_op,_eax,_ebx,_ecx,_edx) \
+# if !defined(_MSC_VER)
+#  if defined(__amd64__)||defined(__x86_64__)
+/*On x86-64, gcc seems to be able to figure out how to save %rbx for us when
+   compiling with -fPIC.*/
+#   define cpuid(_op,_eax,_ebx,_ecx,_edx) \
   __asm__ __volatile__( \
-   "push %%rbx\n\t" \
    "cpuid\n\t" \
-   "movl %%ebx,%1\n\t" \
-   "pop  %%rbx\n\t" \
-   :"=a" (_eax), \
-    "=r" (_ebx), \
-    "=c" (_ecx), \
-    "=d" (_edx) \
-   :"a" (_op) \
+   :[eax]"=a"(_eax),[ebx]"=b"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \
+   :"a"(_op) \
    :"cc" \
   )
-# else /* x86_32, GCC */
-
-#  define cpuid(_op,_eax,_ebx,_ecx,_edx) \
+#  else
+/*On x86-32, not so much.*/
+#   define cpuid(_op,_eax,_ebx,_ecx,_edx) \
   __asm__ __volatile__( \
-   "pushl %%ebx\n\t" \
+   "xchgl %%ebx,%[ebx]\n\t" \
    "cpuid\n\t" \
-   "movl  %%ebx,%1\n\t" \
-   "popl  %%ebx\n\t" \
-   :"=a" (_eax), \
-    "=r" (_ebx), \
-    "=c" (_ecx), \
-    "=d" (_edx) \
-   :"a" (_op) \
+   "xchgl %%ebx,%[ebx]\n\t" \
+   :[eax]"=a"(_eax),[ebx]"=r"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \
+   :"a"(_op) \
    :"cc" \
   )
+#  endif
+# else
+/*Why does MSVC need this complicated rigamarole?
+  At this point I honestly do not care.*/
 
-# endif /* arch switch */
+/*Visual C cpuid helper function.
+  For VS2005 we could as well use the _cpuid builtin, but that wouldn't work
+   for VS2003 users, so we do it in inline assembler.*/
+static void oc_cpuid_helper(ogg_uint32_t _cpu_info[4],ogg_uint32_t _op){
+  _asm{
+    mov eax,[_op]
+    mov esi,_cpu_info
+    cpuid
+    mov [esi+0],eax
+    mov [esi+4],ebx
+    mov [esi+8],ecx
+    mov [esi+12],edx
+  }
+}
 
+#  define cpuid(_op,_eax,_ebx,_ecx,_edx) \
+  do{ \
+    ogg_uint32_t cpu_info[4]; \
+    oc_cpuid_helper(cpu_info,_op); \
+    (_eax) = cpu_info[0]; \
+    (_ebx) = cpu_info[1]; \
+    (_ecx) = cpu_info[2]; \
+    (_edx) = cpu_info[3]; \
+  }while(0)
+
+static void oc_detect_cpuid_helper(ogg_uint32_t *_eax,ogg_uint32_t *_ebx){
+  _asm{
+    pushfd
+    pushfd
+    pop eax
+    mov ebx,eax
+    xor eax,200000h
+    push eax
+    popfd
+    pushfd
+    pop eax
+    popfd
+    mov *_eax,eax
+    mov *_ebx,ebx
+  }
+}
+# endif
+
 ogg_uint32_t oc_cpu_flags_get(void){
-  ogg_uint32_t flags = 0;
+  ogg_uint32_t flags;
   ogg_uint32_t eax;
   ogg_uint32_t ebx;
   ogg_uint32_t ecx;
   ogg_uint32_t edx;
-
-# if !defined(_MSC_VER) && !defined(__amd64__) && !defined(__x86_64__)
-  /* check for cpuid */
+# if !defined(__amd64__)&&!defined(__x86_64__)
+  /*Not all x86-32 chips support cpuid, so we have to check.*/
+#  if !defined(_MSC_VER)
   __asm__ __volatile__(
    "pushfl\n\t"
    "pushfl\n\t"
-   "popl          %0\n\t"
-   "movl          %0,%1\n\t"
-   "xorl   $0x200000,%0\n\t"
-   "pushl         %0\n\t"
+   "popl %[a]\n\t"
+   "movl %[a],%[b]\n\t"
+   "xorl $0x200000,%[a]\n\t"
+   "pushl %[a]\n\t"
    "popfl\n\t"
    "pushfl\n\t"
-   "popl          %0\n\t"
+   "popl %[a]\n\t"
    "popfl\n\t"
-   :"=r" (eax),
-    "=r" (ebx)
+   :[a]"=r"(eax),[b]"=r"(ebx)
    :
    :"cc"
   );
+#  else
+  oc_cpuid_detect_helper(&eax,&ebx);
+#  endif
   /*No cpuid.*/
   if(eax==ebx)return 0;
-# endif /* GCC, x86_32 */
-
+# endif
   cpuid(0,eax,ebx,ecx,edx);
-  if(ebx==0x756e6547&&edx==0x49656e69&&ecx==0x6c65746e ||
-     ebx==0x756e6547&&edx==0x54656e69&&ecx==0x3638784d) {
-    /*Intel:*/
-    /*Transmeta (tested with Crusoe TM5800): */
-inteltest:
+  /*         l e t n          I e n i          u n e G*/
+  if(ecx==0x6C65746E&&edx==0x49656E69&&ebx==0x756E6547||
+   /*      6 8 x M          T e n i          u n e G*/
+   ecx==0x3638784D&&edx==0x54656E69&&ebx==0x756E6547){
+    /*Intel, Transmeta (tested with Crusoe TM5800):*/
     cpuid(1,eax,ebx,ecx,edx);
-    if((edx&0x00800000)==0)return 0;
+    /*If there isn't even MMX, give up.*/
+    if(!(edx&0x00800000))return 0;
     flags=OC_CPU_X86_MMX;
     if(edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE;
     if(edx&0x04000000)flags|=OC_CPU_X86_SSE2;
+    if(ecx&0x00000001)flags|=OC_CPU_X86_PNI;
   }
-  else if(ebx==0x68747541&&edx==0x69746e65&&ecx==0x444d4163 ||
-          ebx==0x646f6547&&edx==0x79622065&&ecx==0x43534e20){
-    /*AMD:*/
-    /*Geode:*/
+  /*              D M A c          i t n e          h t u A*/
+  else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541||
+   /*      C S N            y b   e          d o e G*/
+   ecx==0x43534e20&&edx==0x79622065&&ebx==0x646f6547){
+    /*AMD, Geode:*/
     cpuid(0x80000000,eax,ebx,ecx,edx);
-    if(eax<0x80000001)goto inteltest;
-    cpuid(0x80000001,eax,ebx,ecx,edx);
-    if((edx&0x00800000)==0)return 0;
-    flags=OC_CPU_X86_MMX;
-    if(edx&0x80000000)flags|=OC_CPU_X86_3DNOW;
-    if(edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT;
-    if(edx&0x00400000)flags|=OC_CPU_X86_MMXEXT;
-  } else if (ebx == 0x746e6543&&edx==0x48727561&&ecx==0x736c7561) {
-    /* VIA: */
-
-    /* VIA C7 supports Intel-like cpuid info */
+    if(eax<0x80000001){
+      /*No extended functions supported.
+        Use normal cpuid flags.*/
+      cpuid(1,eax,ebx,ecx,edx);
+      /*If there isn't even MMX, give up.*/
+      if(!(edx&0x00800000))return 0;
+      flags=OC_CPU_X86_MMX;
+      if(edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE;
+    }
+    else{
+      cpuid(0x80000001,eax,ebx,ecx,edx);
+      /*If there isn't even MMX, give up.*/
+      if((edx&0x00800000)==0)return 0;
+      flags=OC_CPU_X86_MMX;
+      if(edx&0x80000000)flags|=OC_CPU_X86_3DNOW;
+      if(edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT;
+      if(edx&0x00400000)flags|=OC_CPU_X86_MMXEXT;
+      /*Also check for SSE.*/
+      cpuid(1,eax,ebx,ecx,edx);
+      if(edx&0x02000000)flags|=OC_CPU_X86_SSE;
+    }
+    if(edx&0x04000000)flags|=OC_CPU_X86_SSE2;
+    if(ecx&0x00000001)flags|=OC_CPU_X86_PNI;
+  }
+  /*              s l u a          H r u a          t n e C*/
+  else if(ecx==0x736C7561&&edx==0x48727561&&ebx==0x746E6543){
+    /*VIA:*/
+    /*The C7 (and later?) processors support Intel-like cpuid info.*/
+    /*The C3-2 (Nehemiah) cores appear to, as well.*/
     cpuid(1,eax,ebx,ecx,edx);
-    if((edx&0x00800000)) {
+    if(edx&0x00800000){
       flags=OC_CPU_X86_MMX;
       if(edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE;
       if(edx&0x04000000)flags|=OC_CPU_X86_SSE2;
+      if(ecx&0x00000001)flags|=OC_CPU_X86_PNI;
     }
-
-    /* VIA C3 (Samuel, without PadLock) has AMD-like cpuid info */
-    cpuid(0x80000001,eax,ebx,ecx,edx);
-    if((edx&0x00800000)) {
-      flags=OC_CPU_X86_MMX;
-      if(edx&0x80000000) flags|=OC_CPU_X86_3DNOW;
+    else{
+      /*The (non-Nehemiah) C3 processors support AMD-like cpuid info.*/
+      /*How about earlier chips?*/
+      cpuid(0x80000001,eax,ebx,ecx,edx);
+      if(edx&0x00800000){
+        flags=OC_CPU_X86_MMX;
+        if(edx&0x80000000)flags|=OC_CPU_X86_3DNOW;
+      }
     }
-
-    /* What about C3-2 ? don't own one :( */
   }
   else{
     /*Implement me.*/
     flags=0;
   }
-
-# ifdef DEBUG
-  if (flags) {
+# if defined(DEBUG)
+  if(flags){
     TH_DEBUG("vectorized instruction sets supported:");
-    if (flags & OC_CPU_X86_MMX)      TH_DEBUG(" mmx");
-    if (flags & OC_CPU_X86_MMXEXT)   TH_DEBUG(" mmxext");
-    if (flags & OC_CPU_X86_SSE)      TH_DEBUG(" sse");
-    if (flags & OC_CPU_X86_SSE2)     TH_DEBUG(" sse2");
-    if (flags & OC_CPU_X86_3DNOW)    TH_DEBUG(" 3dnow");
-    if (flags & OC_CPU_X86_3DNOWEXT) TH_DEBUG(" 3dnowext");
+    if(flags&OC_CPU_X86_MMX)TH_DEBUG(" mmx");
+    if(flags&OC_CPU_X86_MMXEXT)TH_DEBUG(" mmxext");
+    if(flags&OC_CPU_X86_SSE)TH_DEBUG(" sse");
+    if(flags&OC_CPU_X86_SSE2)TH_DEBUG(" sse2");
+    if(flags&OC_CPU_X86_3DNOW)TH_DEBUG(" 3dnow");
+    if(flags&OC_CPU_X86_3DNOWEXT)TH_DEBUG(" 3dnowext");
     TH_DEBUG("\n");
   }
 # endif
-
   return flags;
 }
-
-#endif /* USE_ASM */
+#endif

Modified: trunk/theora/lib/cpu.h
===================================================================
--- trunk/theora/lib/cpu.h	2008-04-29 16:38:03 UTC (rev 14813)
+++ trunk/theora/lib/cpu.h	2008-04-29 16:54:37 UTC (rev 14814)
@@ -24,6 +24,7 @@
 #define OC_CPU_X86_MMXEXT (1<<3)
 #define OC_CPU_X86_SSE    (1<<4)
 #define OC_CPU_X86_SSE2   (1<<5)
+#define OC_CPU_X86_PNI    (1<<6)
 
 ogg_uint32_t oc_cpu_flags_get(void);
 



More information about the commits mailing list