[xiph-commits] r11451 - in trunk/theora: examples lib lib/x86_32_vs win32 win32/VS2005 win32/VS2005/dump_video win32/VS2005/encoder_example win32/VS2005/libtheora

illiminable at svn.xiph.org illiminable at svn.xiph.org
Sun May 28 11:18:47 PDT 2006


Author: illiminable
Date: 2006-05-28 11:18:26 -0700 (Sun, 28 May 2006)
New Revision: 11451

Added:
   trunk/theora/lib/x86_32_vs/
   trunk/theora/lib/x86_32_vs/cpu.c
   trunk/theora/lib/x86_32_vs/dsp_mmx.c
   trunk/theora/lib/x86_32_vs/fdct_mmx.c
   trunk/theora/lib/x86_32_vs/recon_mmx.c
   trunk/theora/win32/VS2005/dump_video/
   trunk/theora/win32/VS2005/dump_video/dump_video.vcproj
   trunk/theora/win32/VS2005/encoder_example/
   trunk/theora/win32/VS2005/encoder_example/encoder_example.vcproj
   trunk/theora/win32/getopt.c
   trunk/theora/win32/getopt1.c
Modified:
   trunk/theora/examples/dump_video.c
   trunk/theora/examples/encoder_example.c
   trunk/theora/win32/VS2005/libtheora/libtheora.vcproj
Log:
* Make dump vid build in win32
* MAke encoder example build in win32
* Add converted intel/inline masm mmx code
* Change some <> to "" conditionally for windows when including getopt, since the default copy in lib theora doesn't build on windows.
* PUt a copy of getopt that actually builds on windows in win32 - probably no reason we can't use this version as the main version (i copied it from libspeex, since i knew that one worked)
* Add a dump_video vs2005 project
* Add an encoder example vs2005 project
* Add a macro for performance data logging (currently only win32)


Modified: trunk/theora/examples/dump_video.c
===================================================================
--- trunk/theora/examples/dump_video.c	2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/examples/dump_video.c	2006-05-28 18:18:26 UTC (rev 11451)
@@ -28,7 +28,9 @@
 #define _FILE_OFFSET_BITS 64
 
 #include <stdio.h>
+#ifndef WIN32 
 #include <unistd.h>
+#endif
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>

Modified: trunk/theora/examples/encoder_example.c
===================================================================
--- trunk/theora/examples/encoder_example.c	2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/examples/encoder_example.c	2006-05-28 18:18:26 UTC (rev 11451)
@@ -21,8 +21,15 @@
 #define _LARGEFILE64_SOURCE
 #define _FILE_OFFSET_BITS 64
 
+/* Define to give performance data win32 only*/
+//#define THEORA_PERF_DATA 
+#ifdef THEORA_PERF_DATA
+#include <windows.h>
+#endif
+
 #ifdef HAVE_CONFIG_H
 # include <config.h>
+
 #endif
 
 #ifndef _REENTRANT
@@ -30,10 +37,16 @@
 #endif
 
 #include <stdio.h>
+#ifndef WIN32
 #include <unistd.h>
+#endif
 #include <stdlib.h>
 #include <string.h>
+#ifndef WIN32
 #include <getopt.h>
+#else
+#include "getopt.h"
+#endif
 #include <time.h>
 #include <math.h>
 #include "theora/theora.h"
@@ -537,15 +550,30 @@
   ogg_int64_t video_bytesout=0;
   double timebase;
 
+
   FILE* outfile = stdout;
 
-#ifdef _WIN32 /* We need to set stdin/stdout to binary mode. Damn windows. */
+#ifdef _WIN32 
+# ifdef THEORA_PERF_DATA
+    LARGE_INTEGER start_time;
+    LARGE_INTEGER final_time;
+
+    LONGLONG elapsed_ticks;
+    LARGE_INTEGER ticks_per_second;
+    
+    LONGLONG elapsed_secs;
+    LONGLONG elapsed_sec_mod;
+    double elapsed_secs_dbl ;
+# endif
+  /* We need to set stdin/stdout to binary mode. Damn windows. */
   /* if we were reading/writing a file, it would also need to in
      binary mode, eg, fopen("file.wav","wb"); */
   /* Beware the evil ifdef. We avoid these where we can, but this one we
      cannot. Don't add any more, you'll probably go to hell if you do. */
   _setmode( _fileno( stdin ), _O_BINARY );
   _setmode( _fileno( stdout ), _O_BINARY );
+
+
 #endif
 
   while((c=getopt_long(argc,argv,optstring,options,&long_option_index))!=EOF){
@@ -621,6 +649,15 @@
     optind++;
   }
 
+
+
+#ifdef THEORA_PERF_DATA
+# ifdef WIN32
+    QueryPerformanceCounter(&start_time);
+# endif
+#endif
+
+
   /* yayness.  Set up Ogg output stream */
   srand(time(NULL));
   {
@@ -848,6 +885,23 @@
 
   fprintf(stderr,"\r   \ndone.\n\n");
 
+#ifdef THEORA_PERF_DATA
+# ifdef WIN32
+    QueryPerformanceCounter(&final_time);
+    elapsed_ticks = final_time.QuadPart - start_time.QuadPart;
+    ticks_per_second;
+    QueryPerformanceFrequency(&ticks_per_second);
+    elapsed_secs = elapsed_ticks / ticks_per_second.QuadPart;
+    elapsed_sec_mod = elapsed_ticks % ticks_per_second.QuadPart;
+    elapsed_secs_dbl = elapsed_secs;
+    elapsed_secs_dbl += ((double)elapsed_sec_mod / (double)ticks_per_second.QuadPart);
+    printf("Encode time = %lld ticks\n", elapsed_ticks);
+    printf("~%lld and %lld / %lld seconds\n", elapsed_secs, elapsed_sec_mod, ticks_per_second.QuadPart);
+    printf("~%Lf seconds\n", elapsed_secs_dbl);
+# endif
+
+#endif 
+
   return(0);
 
 }

Added: trunk/theora/lib/x86_32_vs/cpu.c
===================================================================
--- trunk/theora/lib/x86_32_vs/cpu.c	2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/lib/x86_32_vs/cpu.c	2006-05-28 18:18:26 UTC (rev 11451)
@@ -0,0 +1,215 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: mcomp.c,v 1.8 2003/12/03 08:59:41 arc Exp $
+
+ ********************************************************************/
+
+#include "cpu.h"
+
+void
+cpuid(ogg_int32_t op, ogg_uint32_t *out_eax, ogg_uint32_t *out_ebx, ogg_uint32_t *out_ecx, ogg_uint32_t *out_edx)
+{
+
+
+#if defined(THEORA_USE_ASM)
+
+
+    ogg_uint32_t my_eax, my_ebx, my_ecx, my_edx;
+    __asm {
+        //push   ebx
+        mov     eax, op
+        cpuid
+        mov     my_eax, eax
+        mov     my_ebx, ebx
+        mov     my_ecx, ecx
+        mov     my_edx, edx
+        
+
+
+    };
+
+    *out_eax = my_eax;
+    *out_ebx = my_ebx;
+    *out_ecx = my_ecx;
+    *out_edx = my_edx;
+
+#endif
+//#if defined(__x86_64__)
+//  asm volatile ("pushq %%rbx   \n\t"
+//                "cpuid         \n\t"
+//                "movl %%ebx,%1 \n\t"
+//                "popq %%rbx"        
+//              : "=a" (*eax),         
+//                "=r" (*ebx),         
+//                "=c" (*ecx),         
+//                "=d" (*edx)          
+//              : "a" (op)            
+//              : "cc");
+//#elif defined(__i386__)
+//  asm volatile ("pushl %%ebx   \n\t"
+//                "cpuid         \n\t"
+//                "movl %%ebx,%1 \n\t"
+//                "popl %%ebx"        
+//              : "=a" (*eax),         
+//                "=r" (*ebx),         
+//                "=c" (*ecx),         
+//                "=d" (*edx)          
+//              : "a" (op)            
+//              : "cc");
+//#endif
+}
+
+//#if defined(__x86_64__) || defined(__i386__)
+
+static ogg_uint32_t cpu_get_flags (void)
+{
+
+#if !defined(THEORA_USE_ASM)
+    return 0;
+#else
+
+    ogg_uint32_t my_eax, my_ebx, my_ecx, my_edx, flags = 0;
+    //__asm {
+
+    //    pushf
+    //    pushf
+    //    pop     eax
+    //    mov     ebx, eax
+    //    xor     eax, 200000h
+    //    push    eax
+    //    popf
+    //    pushf
+    //    pop     eax
+    //    popf
+    //    mov     my_eax, eax
+    //    mov     my_ebx, ebx
+    //};
+
+    //if (my_eax == my_ebx)
+    //    return 0;
+
+    /*cpuid(0, &eax, &ebx, &ecx, &edx); */
+    /* Intel */
+    cpuid(1, &my_eax, &my_ebx, &my_ecx, &my_edx);
+    if ((my_edx & 0x00800000) == 0)
+    return 0;
+    flags |= CPU_X86_MMX;
+    if (my_edx & 0x02000000)
+    flags |= CPU_X86_MMXEXT | CPU_X86_SSE;
+    if (my_edx & 0x04000000)
+    flags |= CPU_X86_SSE2;
+
+    /* AMD */
+    cpuid(0x80000000, &my_eax, &my_ebx, &my_ecx, &my_edx);
+    if(my_eax >= 0x80000001) {
+    cpuid(0x80000001, &my_eax, &my_ebx, &my_ecx, &my_edx);
+    if ((my_edx & 0x00800000) != 0) {
+      flags |= CPU_X86_MMX;
+      if (my_edx & 0x80000000)
+        flags |= CPU_X86_3DNOW;
+      if (my_edx & 0x40000000)
+        flags |= CPU_X86_3DNOWEXT;
+      if (my_edx & 0x00400000)
+        flags |= CPU_X86_MMXEXT;
+    }
+    }
+
+    return flags;
+
+#endif
+
+
+
+
+
+}
+
+//  ogg_uint32_t eax, ebx, ecx, edx;
+//  ogg_uint32_t flags = 0;
+//
+//  /* check for cpuid support on i386 */
+//#if defined(__i386__)
+//  asm volatile ("pushfl              \n\t"
+//                "pushfl              \n\t"
+//                "popl %0             \n\t"
+//                "movl %0,%1          \n\t"
+//                "xorl $0x200000,%0   \n\t"
+//                "pushl %0            \n\t"
+//                "popfl               \n\t"
+//                "pushfl              \n\t"
+//                "popl %0             \n\t"
+//                "popfl"
+//              : "=r" (eax),
+//                "=r" (ebx)
+//              :
+//              : "cc");
+//
+//  if (eax == ebx)             /* no cpuid */
+//    return 0;
+//#endif
+//
+//  /*cpuid(0, &eax, &ebx, &ecx, &edx); */
+//  /* Intel */
+//  cpuid(1, &eax, &ebx, &ecx, &edx);
+//  if ((edx & 0x00800000) == 0)
+//    return 0;
+//  flags |= CPU_X86_MMX;
+//  if (edx & 0x02000000)
+//    flags |= CPU_X86_MMXEXT | CPU_X86_SSE;
+//  if (edx & 0x04000000)
+//    flags |= CPU_X86_SSE2;
+//
+//  /* AMD */
+//  cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
+//  if(eax >= 0x80000001) {
+//    cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+//    if ((edx & 0x00800000) != 0) {
+//      flags |= CPU_X86_MMX;
+//      if (edx & 0x80000000)
+//        flags |= CPU_X86_3DNOW;
+//      if (edx & 0x40000000)
+//        flags |= CPU_X86_3DNOWEXT;
+//      if (edx & 0x00400000)
+//        flags |= CPU_X86_MMXEXT;
+//    }
+//  }
+//
+//  return flags;
+//}
+//
+//#else /* not an i386 or x86_64 */
+//
+//static ogg_uint32_t cpu_get_flags (void) {
+//  return 0;
+//}
+//
+//#endif
+
+ogg_uint32_t cpu_init (void)
+{
+  ogg_uint32_t cpu_flags = cpu_get_flags();
+
+  if (cpu_flags) {
+    TH_DEBUG("vectorized instruction sets supported:");
+    if (cpu_flags & CPU_X86_MMX)      TH_DEBUG(" mmx");
+    if (cpu_flags & CPU_X86_MMXEXT)   TH_DEBUG(" mmxext");
+    if (cpu_flags & CPU_X86_SSE)      TH_DEBUG(" sse");
+    if (cpu_flags & CPU_X86_SSE2)     TH_DEBUG(" sse2");
+    if (cpu_flags & CPU_X86_3DNOW)    TH_DEBUG(" 3dnow");
+    if (cpu_flags & CPU_X86_3DNOWEXT) TH_DEBUG(" 3dnowext");
+    TH_DEBUG("\n");
+  }
+
+  return cpu_flags;
+}

Added: trunk/theora/lib/x86_32_vs/dsp_mmx.c
===================================================================
--- trunk/theora/lib/x86_32_vs/dsp_mmx.c	2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/lib/x86_32_vs/dsp_mmx.c	2006-05-28 18:18:26 UTC (rev 11451)
@@ -0,0 +1,1606 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: mcomp.c,v 1.8 2003/12/03 08:59:41 arc Exp $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+
+#include "codec_internal.h"
+#include "dsp.h"
+
+#if 0
+//These are to let me selectively enable the C versions, these are needed
+#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
+#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b)))
+#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
+#endif
+
+
+static const ogg_int64_t V128 = 0x0080008000800080LL;
+
+static void sub8x8__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr,
+                  ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine,
+                  ogg_uint32_t ReconPixelsPerLine) 
+{
+
+    //Make non-zero to use the C-version
+#if 0
+  int i;
+
+  /* For each block row */
+  for (i=8; i; i--) {
+    DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], ReconPtr[0]);
+    DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], ReconPtr[1]);
+    DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], ReconPtr[2]);
+    DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], ReconPtr[3]);
+    DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], ReconPtr[4]);
+    DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], ReconPtr[5]);
+    DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], ReconPtr[6]);
+    DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], ReconPtr[7]);
+
+    /* Start next row */
+    FiltPtr += PixelsPerLine;
+    ReconPtr += ReconPixelsPerLine;
+    DctInputPtr += 8;
+  }
+#else
+    __asm {
+        align 16
+
+        pxor		mm7, mm7	
+
+        mov     eax, FiltPtr
+        mov     ebx, ReconPtr
+        mov     edx, DctInputPtr
+
+     /* You can't use rept in inline masm and macro parsing seems screwed with inline asm*/		
+     
+     /* ITERATION 1 */
+        movq		mm0, [eax]		/* mm0 = FiltPtr */
+        movq		mm1, [ebx]		/* mm1 = ReconPtr */
+        movq		mm2, mm0		/* dup to prepare for up conversion */
+        movq		mm3, mm1		/* dup to prepare for up conversion */
+        /* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
+        punpcklbw		mm1, mm7		/* mm1 = INT16(ReconPtr) */
+        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
+        punpckhbw		mm3, mm7		/* mm3 = INT16(ReconPtr) */
+        /* start calculation */
+        psubw		mm0, mm1		/* mm0 = FiltPtr - ReconPtr */
+        psubw		mm2, mm3		/* mm2 = FiltPtr - ReconPtr */
+        movq		[edx], mm0		/* write answer out */
+        movq		[8 + edx], mm2		/* write answer out */
+        /* Increment pointers */
+        add		edx, 16		
+        add		eax, PixelsPerLine		
+        add		ebx, ReconPixelsPerLine
+
+
+     /* ITERATION 2 */
+        movq		mm0, [eax]		/* mm0 = FiltPtr */
+        movq		mm1, [ebx]		/* mm1 = ReconPtr */
+        movq		mm2, mm0		/* dup to prepare for up conversion */
+        movq		mm3, mm1		/* dup to prepare for up conversion */
+        /* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
+        punpcklbw		mm1, mm7		/* mm1 = INT16(ReconPtr) */
+        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
+        punpckhbw		mm3, mm7		/* mm3 = INT16(ReconPtr) */
+        /* start calculation */
+        psubw		mm0, mm1		/* mm0 = FiltPtr - ReconPtr */
+        psubw		mm2, mm3		/* mm2 = FiltPtr - ReconPtr */
+        movq		[edx], mm0		/* write answer out */
+        movq		[8 + edx], mm2		/* write answer out */
+        /* Increment pointers */
+        add		edx, 16		
+        add		eax, PixelsPerLine		
+        add		ebx, ReconPixelsPerLine
+
+
+     /* ITERATION 3 */
+        movq		mm0, [eax]		/* mm0 = FiltPtr */
+        movq		mm1, [ebx]		/* mm1 = ReconPtr */
+        movq		mm2, mm0		/* dup to prepare for up conversion */
+        movq		mm3, mm1		/* dup to prepare for up conversion */
+        /* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
+        punpcklbw		mm1, mm7		/* mm1 = INT16(ReconPtr) */
+        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
+        punpckhbw		mm3, mm7		/* mm3 = INT16(ReconPtr) */
+        /* start calculation */
+        psubw		mm0, mm1		/* mm0 = FiltPtr - ReconPtr */
+        psubw		mm2, mm3		/* mm2 = FiltPtr - ReconPtr */
+        movq		[edx], mm0		/* write answer out */
+        movq		[8 + edx], mm2		/* write answer out */
+        /* Increment pointers */
+        add		edx, 16		
+        add		eax, PixelsPerLine		
+        add		ebx, ReconPixelsPerLine
+
+
+     /* ITERATION 4 */
+        movq		mm0, [eax]		/* mm0 = FiltPtr */
+        movq		mm1, [ebx]		/* mm1 = ReconPtr */
+        movq		mm2, mm0		/* dup to prepare for up conversion */
+        movq		mm3, mm1		/* dup to prepare for up conversion */
+        /* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
+        punpcklbw		mm1, mm7		/* mm1 = INT16(ReconPtr) */
+        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
+        punpckhbw		mm3, mm7		/* mm3 = INT16(ReconPtr) */
+        /* start calculation */
+        psubw		mm0, mm1		/* mm0 = FiltPtr - ReconPtr */
+        psubw		mm2, mm3		/* mm2 = FiltPtr - ReconPtr */
+        movq		[edx], mm0		/* write answer out */
+        movq		[8 + edx], mm2		/* write answer out */
+        /* Increment pointers */
+        add		edx, 16		
+        add		eax, PixelsPerLine		
+        add		ebx, ReconPixelsPerLine
+
+
+     /* ITERATION 5 */
+        movq		mm0, [eax]		/* mm0 = FiltPtr */
+        movq		mm1, [ebx]		/* mm1 = ReconPtr */
+        movq		mm2, mm0		/* dup to prepare for up conversion */
+        movq		mm3, mm1		/* dup to prepare for up conversion */
+        /* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
+        punpcklbw		mm1, mm7		/* mm1 = INT16(ReconPtr) */
+        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
+        punpckhbw		mm3, mm7		/* mm3 = INT16(ReconPtr) */
+        /* start calculation */
+        psubw		mm0, mm1		/* mm0 = FiltPtr - ReconPtr */
+        psubw		mm2, mm3		/* mm2 = FiltPtr - ReconPtr */
+        movq		[edx], mm0		/* write answer out */
+        movq		[8 + edx], mm2		/* write answer out */
+        /* Increment pointers */
+        add		edx, 16		
+        add		eax, PixelsPerLine		
+        add		ebx, ReconPixelsPerLine
+
+
+     /* ITERATION 6 */
+        movq		mm0, [eax]		/* mm0 = FiltPtr */
+        movq		mm1, [ebx]		/* mm1 = ReconPtr */
+        movq		mm2, mm0		/* dup to prepare for up conversion */
+        movq		mm3, mm1		/* dup to prepare for up conversion */
+        /* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
+        punpcklbw		mm1, mm7		/* mm1 = INT16(ReconPtr) */
+        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
+        punpckhbw		mm3, mm7		/* mm3 = INT16(ReconPtr) */
+        /* start calculation */
+        psubw		mm0, mm1		/* mm0 = FiltPtr - ReconPtr */
+        psubw		mm2, mm3		/* mm2 = FiltPtr - ReconPtr */
+        movq		[edx], mm0		/* write answer out */
+        movq		[8 + edx], mm2		/* write answer out */
+        /* Increment pointers */
+        add		edx, 16		
+        add		eax, PixelsPerLine		
+        add		ebx, ReconPixelsPerLine
+
+
+     /* ITERATION 7 */
+        movq		mm0, [eax]		/* mm0 = FiltPtr */
+        movq		mm1, [ebx]		/* mm1 = ReconPtr */
+        movq		mm2, mm0		/* dup to prepare for up conversion */
+        movq		mm3, mm1		/* dup to prepare for up conversion */
+        /* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
+        punpcklbw		mm1, mm7		/* mm1 = INT16(ReconPtr) */
+        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
+        punpckhbw		mm3, mm7		/* mm3 = INT16(ReconPtr) */
+        /* start calculation */
+        psubw		mm0, mm1		/* mm0 = FiltPtr - ReconPtr */
+        psubw		mm2, mm3		/* mm2 = FiltPtr - ReconPtr */
+        movq		[edx], mm0		/* write answer out */
+        movq		[8 + edx], mm2		/* write answer out */
+        /* Increment pointers */
+        add		edx, 16		
+        add		eax, PixelsPerLine		
+        add		ebx, ReconPixelsPerLine
+
+
+     /* ITERATION 8 */
+        movq		mm0, [eax]		/* mm0 = FiltPtr */
+        movq		mm1, [ebx]		/* mm1 = ReconPtr */
+        movq		mm2, mm0		/* dup to prepare for up conversion */
+        movq		mm3, mm1		/* dup to prepare for up conversion */
+        /* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
+        punpcklbw		mm1, mm7		/* mm1 = INT16(ReconPtr) */
+        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
+        punpckhbw		mm3, mm7		/* mm3 = INT16(ReconPtr) */
+        /* start calculation */
+        psubw		mm0, mm1		/* mm0 = FiltPtr - ReconPtr */
+        psubw		mm2, mm3		/* mm2 = FiltPtr - ReconPtr */
+        movq		[edx], mm0		/* write answer out */
+        movq		[8 + edx], mm2		/* write answer out */
+        /* Increment pointers */
+        add		edx, 16		
+        add		eax, PixelsPerLine		
+        add		ebx, ReconPixelsPerLine
+
+
+
+     
+
+    };
+ 
+#endif
+}
+
+static void sub8x8_128__mmx (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
+                      ogg_uint32_t PixelsPerLine) 
+{
+
+#if 0
+  int i;
+  /* For each block row */
+  for (i=8; i; i--) {
+    /* INTRA mode so code raw image data */
+    /* We convert the data to 8 bit signed (by subtracting 128) as
+       this reduces the internal precision requirments in the DCT
+       transform. */
+    DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], 128);
+    DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], 128);
+    DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], 128);
+    DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], 128);
+    DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], 128);
+    DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], 128);
+    DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], 128);
+    DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], 128);
+
+    /* Start next row */
+    FiltPtr += PixelsPerLine;
+    DctInputPtr += 8;
+  }
+
+#else
+    __asm {
+        align 16
+
+        pxor		mm7, mm7		
+
+        mov         eax, FiltPtr
+        mov         ebx, DctInputPtr
+
+        movq		mm1, V128
+
+        /*  ITERATION 1 */		
+        movq		mm0, [eax]		/* mm0 = FiltPtr */
+        movq		mm2, mm0		/* dup to prepare for up conversion */
+        /* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
+        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
+        /* start calculation */
+        psubw		mm0, mm1		/* mm0 = FiltPtr - 128 */
+        psubw		mm2, mm1		/* mm2 = FiltPtr - 128 */
+        movq		[ebx], mm0		/* write answer out */
+        movq		[8 + ebx], mm2		/* write answer out */
+        /* Increment pointers */
+        add		ebx, 16		
+        add		eax, PixelsPerLine	
+
+
+        /*  ITERATION 2 */		
+        movq		mm0, [eax]		/* mm0 = FiltPtr */
+        movq		mm2, mm0		/* dup to prepare for up conversion */
+        /* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
+        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
+        /* start calculation */
+        psubw		mm0, mm1		/* mm0 = FiltPtr - 128 */
+        psubw		mm2, mm1		/* mm2 = FiltPtr - 128 */
+        movq		[ebx], mm0		/* write answer out */
+        movq		[8 + ebx], mm2		/* write answer out */
+        /* Increment pointers */
+        add		ebx, 16		
+        add		eax, PixelsPerLine	
+
+
+        /*  ITERATION 3 */		
+        movq		mm0, [eax]		/* mm0 = FiltPtr */
+        movq		mm2, mm0		/* dup to prepare for up conversion */
+        /* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
+        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
+        /* start calculation */
+        psubw		mm0, mm1		/* mm0 = FiltPtr - 128 */
+        psubw		mm2, mm1		/* mm2 = FiltPtr - 128 */
+        movq		[ebx], mm0		/* write answer out */
+        movq		[8 + ebx], mm2		/* write answer out */
+        /* Increment pointers */
+        add		ebx, 16		
+        add		eax, PixelsPerLine	
+
+
+        /*  ITERATION 4 */		
+        movq		mm0, [eax]		/* mm0 = FiltPtr */
+        movq		mm2, mm0		/* dup to prepare for up conversion */
+        /* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
+        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
+        /* start calculation */
+        psubw		mm0, mm1		/* mm0 = FiltPtr - 128 */
+        psubw		mm2, mm1		/* mm2 = FiltPtr - 128 */
+        movq		[ebx], mm0		/* write answer out */
+        movq		[8 + ebx], mm2		/* write answer out */
+        /* Increment pointers */
+        add		ebx, 16		
+        add		eax, PixelsPerLine	
+
+
+        /*  ITERATION 5 */		
+        movq		mm0, [eax]		/* mm0 = FiltPtr */
+        movq		mm2, mm0		/* dup to prepare for up conversion */
+        /* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
+        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
+        /* start calculation */
+        psubw		mm0, mm1		/* mm0 = FiltPtr - 128 */
+        psubw		mm2, mm1		/* mm2 = FiltPtr - 128 */
+        movq		[ebx], mm0		/* write answer out */
+        movq		[8 + ebx], mm2		/* write answer out */
+        /* Increment pointers */
+        add		ebx, 16		
+        add		eax, PixelsPerLine	
+
+
+        /*  ITERATION 6 */		
+        movq		mm0, [eax]		/* mm0 = FiltPtr */
+        movq		mm2, mm0		/* dup to prepare for up conversion */
+        /* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
+        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
+        /* start calculation */
+        psubw		mm0, mm1		/* mm0 = FiltPtr - 128 */
+        psubw		mm2, mm1		/* mm2 = FiltPtr - 128 */
+        movq		[ebx], mm0		/* write answer out */
+        movq		[8 + ebx], mm2		/* write answer out */
+        /* Increment pointers */
+        add		ebx, 16		
+        add		eax, PixelsPerLine	
+
+
+        /*  ITERATION 7 */		
+        movq		mm0, [eax]		/* mm0 = FiltPtr */
+        movq		mm2, mm0		/* dup to prepare for up conversion */
+        /* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
+        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
+        /* start calculation */
+        psubw		mm0, mm1		/* mm0 = FiltPtr - 128 */
+        psubw		mm2, mm1		/* mm2 = FiltPtr - 128 */
+        movq		[ebx], mm0		/* write answer out */
+        movq		[8 + ebx], mm2		/* write answer out */
+        /* Increment pointers */
+        add		ebx, 16		
+        add		eax, PixelsPerLine	
+
+
+        /*  ITERATION 8 */		
+        movq		mm0, [eax]		/* mm0 = FiltPtr */
+        movq		mm2, mm0		/* dup to prepare for up conversion */
+        /* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
+        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
+        /* start calculation */
+        psubw		mm0, mm1		/* mm0 = FiltPtr - 128 */
+        psubw		mm2, mm1		/* mm2 = FiltPtr - 128 */
+        movq		[ebx], mm0		/* write answer out */
+        movq		[8 + ebx], mm2		/* write answer out */
+        /* Increment pointers */
+        add		ebx, 16		
+        add		eax, PixelsPerLine	
+
+    };
+ 
+#endif
+}
+
+
+
+
+static void sub8x8avg2__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr1,
+                     unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
+                     ogg_uint32_t PixelsPerLine,
+                     ogg_uint32_t ReconPixelsPerLine) 
+{
+
+#if 0
+  int i;
+
+  /* For each block row */
+  for (i=8; i; i--) {
+    DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], DSP_OP_AVG (ReconPtr1[0], ReconPtr2[0]));
+    DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], DSP_OP_AVG (ReconPtr1[1], ReconPtr2[1]));
+    DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], DSP_OP_AVG (ReconPtr1[2], ReconPtr2[2]));
+    DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], DSP_OP_AVG (ReconPtr1[3], ReconPtr2[3]));
+    DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], DSP_OP_AVG (ReconPtr1[4], ReconPtr2[4]));
+    DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], DSP_OP_AVG (ReconPtr1[5], ReconPtr2[5]));
+    DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], DSP_OP_AVG (ReconPtr1[6], ReconPtr2[6]));
+    DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], DSP_OP_AVG (ReconPtr1[7], ReconPtr2[7]));
+
+    /* Start next row */
+    FiltPtr += PixelsPerLine;
+    ReconPtr1 += ReconPixelsPerLine;
+    ReconPtr2 += ReconPixelsPerLine;
+    DctInputPtr += 8;
+  }
+#else
+
+    __asm {
+        align 16
+
+            pxor        mm7, mm7
+
+        mov         eax, FiltPtr
+        mov         ebx, ReconPtr1
+        mov         ecx, ReconPtr2
+        mov         edx, DctInputPtr
+
+        /*  ITERATION 1 */	
+        movq		mm0, [eax]		;	/* mm0 = FiltPtr */
+        movq		mm1, [ebx]		;	/* mm1 = ReconPtr1 */
+        movq		mm4, [ecx]		;	/* mm1 = ReconPtr2 */
+        movq		mm2, mm0		;	/* dup to prepare for up conversion */
+        movq		mm3, mm1		;	/* dup to prepare for up conversion */
+        movq		mm5, mm4		;	/* dup to prepare for up conversion */
+	        ;	/* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		;	/* mm0 = INT16(FiltPtr) */
+        punpcklbw		mm1, mm7		;	/* mm1 = INT16(ReconPtr1) */
+        punpcklbw		mm4, mm7		;	/* mm1 = INT16(ReconPtr2) */
+        punpckhbw		mm2, mm7		;	/* mm2 = INT16(FiltPtr) */
+        punpckhbw		mm3, mm7		;	/* mm3 = INT16(ReconPtr1) */
+        punpckhbw		mm5, mm7		;	/* mm3 = INT16(ReconPtr2) */
+	        ;	/* average ReconPtr1 and ReconPtr2 */
+        paddw		mm1, mm4		;	/* mm1 = ReconPtr1 + ReconPtr2 */
+        paddw		mm3, mm5		;	/* mm3 = ReconPtr1 + ReconPtr2 */
+        psrlw		mm1, 1		;	/* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
+        psrlw		mm3, 1		;	/* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
+        psubw		mm0, mm1		;	/* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+        psubw		mm2, mm3		;	/* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+        movq		[edx], mm0		;	/* write answer out */
+        movq		[8 + edx], mm2		;	/* write answer out */
+	        ;	/* Increment pointers */
+        add		edx, 16		;	
+        add		eax, PixelsPerLine		;	
+        add		ebx, ReconPixelsPerLine		;	
+        add		ecx, ReconPixelsPerLine		;	
+	
+
+        /*  ITERATION 2 */	
+        movq		mm0, [eax]		;	/* mm0 = FiltPtr */
+        movq		mm1, [ebx]		;	/* mm1 = ReconPtr1 */
+        movq		mm4, [ecx]		;	/* mm1 = ReconPtr2 */
+        movq		mm2, mm0		;	/* dup to prepare for up conversion */
+        movq		mm3, mm1		;	/* dup to prepare for up conversion */
+        movq		mm5, mm4		;	/* dup to prepare for up conversion */
+	        ;	/* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		;	/* mm0 = INT16(FiltPtr) */
+        punpcklbw		mm1, mm7		;	/* mm1 = INT16(ReconPtr1) */
+        punpcklbw		mm4, mm7		;	/* mm1 = INT16(ReconPtr2) */
+        punpckhbw		mm2, mm7		;	/* mm2 = INT16(FiltPtr) */
+        punpckhbw		mm3, mm7		;	/* mm3 = INT16(ReconPtr1) */
+        punpckhbw		mm5, mm7		;	/* mm3 = INT16(ReconPtr2) */
+	        ;	/* average ReconPtr1 and ReconPtr2 */
+        paddw		mm1, mm4		;	/* mm1 = ReconPtr1 + ReconPtr2 */
+        paddw		mm3, mm5		;	/* mm3 = ReconPtr1 + ReconPtr2 */
+        psrlw		mm1, 1		;	/* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
+        psrlw		mm3, 1		;	/* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
+        psubw		mm0, mm1		;	/* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+        psubw		mm2, mm3		;	/* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+        movq		[edx], mm0		;	/* write answer out */
+        movq		[8 + edx], mm2		;	/* write answer out */
+	        ;	/* Increment pointers */
+        add		edx, 16		;	
+        add		eax, PixelsPerLine		;	
+        add		ebx, ReconPixelsPerLine		;	
+        add		ecx, ReconPixelsPerLine		;	
+
+
+        /*  ITERATION 3 */	
+        movq		mm0, [eax]		;	/* mm0 = FiltPtr */
+        movq		mm1, [ebx]		;	/* mm1 = ReconPtr1 */
+        movq		mm4, [ecx]		;	/* mm1 = ReconPtr2 */
+        movq		mm2, mm0		;	/* dup to prepare for up conversion */
+        movq		mm3, mm1		;	/* dup to prepare for up conversion */
+        movq		mm5, mm4		;	/* dup to prepare for up conversion */
+	        ;	/* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		;	/* mm0 = INT16(FiltPtr) */
+        punpcklbw		mm1, mm7		;	/* mm1 = INT16(ReconPtr1) */
+        punpcklbw		mm4, mm7		;	/* mm1 = INT16(ReconPtr2) */
+        punpckhbw		mm2, mm7		;	/* mm2 = INT16(FiltPtr) */
+        punpckhbw		mm3, mm7		;	/* mm3 = INT16(ReconPtr1) */
+        punpckhbw		mm5, mm7		;	/* mm3 = INT16(ReconPtr2) */
+	        ;	/* average ReconPtr1 and ReconPtr2 */
+        paddw		mm1, mm4		;	/* mm1 = ReconPtr1 + ReconPtr2 */
+        paddw		mm3, mm5		;	/* mm3 = ReconPtr1 + ReconPtr2 */
+        psrlw		mm1, 1		;	/* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
+        psrlw		mm3, 1		;	/* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
+        psubw		mm0, mm1		;	/* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+        psubw		mm2, mm3		;	/* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+        movq		[edx], mm0		;	/* write answer out */
+        movq		[8 + edx], mm2		;	/* write answer out */
+	        ;	/* Increment pointers */
+        add		edx, 16		;	
+        add		eax, PixelsPerLine		;	
+        add		ebx, ReconPixelsPerLine		;	
+        add		ecx, ReconPixelsPerLine		;	
+
+
+        /*  ITERATION 4 */	
+        movq		mm0, [eax]		;	/* mm0 = FiltPtr */
+        movq		mm1, [ebx]		;	/* mm1 = ReconPtr1 */
+        movq		mm4, [ecx]		;	/* mm1 = ReconPtr2 */
+        movq		mm2, mm0		;	/* dup to prepare for up conversion */
+        movq		mm3, mm1		;	/* dup to prepare for up conversion */
+        movq		mm5, mm4		;	/* dup to prepare for up conversion */
+	        ;	/* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		;	/* mm0 = INT16(FiltPtr) */
+        punpcklbw		mm1, mm7		;	/* mm1 = INT16(ReconPtr1) */
+        punpcklbw		mm4, mm7		;	/* mm1 = INT16(ReconPtr2) */
+        punpckhbw		mm2, mm7		;	/* mm2 = INT16(FiltPtr) */
+        punpckhbw		mm3, mm7		;	/* mm3 = INT16(ReconPtr1) */
+        punpckhbw		mm5, mm7		;	/* mm3 = INT16(ReconPtr2) */
+	        ;	/* average ReconPtr1 and ReconPtr2 */
+        paddw		mm1, mm4		;	/* mm1 = ReconPtr1 + ReconPtr2 */
+        paddw		mm3, mm5		;	/* mm3 = ReconPtr1 + ReconPtr2 */
+        psrlw		mm1, 1		;	/* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
+        psrlw		mm3, 1		;	/* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
+        psubw		mm0, mm1		;	/* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+        psubw		mm2, mm3		;	/* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+        movq		[edx], mm0		;	/* write answer out */
+        movq		[8 + edx], mm2		;	/* write answer out */
+	        ;	/* Increment pointers */
+        add		edx, 16		;	
+        add		eax, PixelsPerLine		;	
+        add		ebx, ReconPixelsPerLine		;	
+        add		ecx, ReconPixelsPerLine		;	
+
+
+        /*  ITERATION 5 */	
+        movq		mm0, [eax]		;	/* mm0 = FiltPtr */
+        movq		mm1, [ebx]		;	/* mm1 = ReconPtr1 */
+        movq		mm4, [ecx]		;	/* mm1 = ReconPtr2 */
+        movq		mm2, mm0		;	/* dup to prepare for up conversion */
+        movq		mm3, mm1		;	/* dup to prepare for up conversion */
+        movq		mm5, mm4		;	/* dup to prepare for up conversion */
+	        ;	/* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		;	/* mm0 = INT16(FiltPtr) */
+        punpcklbw		mm1, mm7		;	/* mm1 = INT16(ReconPtr1) */
+        punpcklbw		mm4, mm7		;	/* mm1 = INT16(ReconPtr2) */
+        punpckhbw		mm2, mm7		;	/* mm2 = INT16(FiltPtr) */
+        punpckhbw		mm3, mm7		;	/* mm3 = INT16(ReconPtr1) */
+        punpckhbw		mm5, mm7		;	/* mm3 = INT16(ReconPtr2) */
+	        ;	/* average ReconPtr1 and ReconPtr2 */
+        paddw		mm1, mm4		;	/* mm1 = ReconPtr1 + ReconPtr2 */
+        paddw		mm3, mm5		;	/* mm3 = ReconPtr1 + ReconPtr2 */
+        psrlw		mm1, 1		;	/* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
+        psrlw		mm3, 1		;	/* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
+        psubw		mm0, mm1		;	/* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+        psubw		mm2, mm3		;	/* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+        movq		[edx], mm0		;	/* write answer out */
+        movq		[8 + edx], mm2		;	/* write answer out */
+	        ;	/* Increment pointers */
+        add		edx, 16		;	
+        add		eax, PixelsPerLine		;	
+        add		ebx, ReconPixelsPerLine		;	
+        add		ecx, ReconPixelsPerLine		;	
+
+
+        /*  ITERATION 6 */	
+        movq		mm0, [eax]		;	/* mm0 = FiltPtr */
+        movq		mm1, [ebx]		;	/* mm1 = ReconPtr1 */
+        movq		mm4, [ecx]		;	/* mm1 = ReconPtr2 */
+        movq		mm2, mm0		;	/* dup to prepare for up conversion */
+        movq		mm3, mm1		;	/* dup to prepare for up conversion */
+        movq		mm5, mm4		;	/* dup to prepare for up conversion */
+	        ;	/* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		;	/* mm0 = INT16(FiltPtr) */
+        punpcklbw		mm1, mm7		;	/* mm1 = INT16(ReconPtr1) */
+        punpcklbw		mm4, mm7		;	/* mm1 = INT16(ReconPtr2) */
+        punpckhbw		mm2, mm7		;	/* mm2 = INT16(FiltPtr) */
+        punpckhbw		mm3, mm7		;	/* mm3 = INT16(ReconPtr1) */
+        punpckhbw		mm5, mm7		;	/* mm3 = INT16(ReconPtr2) */
+	        ;	/* average ReconPtr1 and ReconPtr2 */
+        paddw		mm1, mm4		;	/* mm1 = ReconPtr1 + ReconPtr2 */
+        paddw		mm3, mm5		;	/* mm3 = ReconPtr1 + ReconPtr2 */
+        psrlw		mm1, 1		;	/* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
+        psrlw		mm3, 1		;	/* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
+        psubw		mm0, mm1		;	/* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+        psubw		mm2, mm3		;	/* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+        movq		[edx], mm0		;	/* write answer out */
+        movq		[8 + edx], mm2		;	/* write answer out */
+	        ;	/* Increment pointers */
+        add		edx, 16		;	
+        add		eax, PixelsPerLine		;	
+        add		ebx, ReconPixelsPerLine		;	
+        add		ecx, ReconPixelsPerLine		;	
+
+
+        /*  ITERATION 7 */	
+        movq		mm0, [eax]		;	/* mm0 = FiltPtr */
+        movq		mm1, [ebx]		;	/* mm1 = ReconPtr1 */
+        movq		mm4, [ecx]		;	/* mm1 = ReconPtr2 */
+        movq		mm2, mm0		;	/* dup to prepare for up conversion */
+        movq		mm3, mm1		;	/* dup to prepare for up conversion */
+        movq		mm5, mm4		;	/* dup to prepare for up conversion */
+	        ;	/* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		;	/* mm0 = INT16(FiltPtr) */
+        punpcklbw		mm1, mm7		;	/* mm1 = INT16(ReconPtr1) */
+        punpcklbw		mm4, mm7		;	/* mm1 = INT16(ReconPtr2) */
+        punpckhbw		mm2, mm7		;	/* mm2 = INT16(FiltPtr) */
+        punpckhbw		mm3, mm7		;	/* mm3 = INT16(ReconPtr1) */
+        punpckhbw		mm5, mm7		;	/* mm3 = INT16(ReconPtr2) */
+	        ;	/* average ReconPtr1 and ReconPtr2 */
+        paddw		mm1, mm4		;	/* mm1 = ReconPtr1 + ReconPtr2 */
+        paddw		mm3, mm5		;	/* mm3 = ReconPtr1 + ReconPtr2 */
+        psrlw		mm1, 1		;	/* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
+        psrlw		mm3, 1		;	/* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
+        psubw		mm0, mm1		;	/* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+        psubw		mm2, mm3		;	/* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+        movq		[edx], mm0		;	/* write answer out */
+        movq		[8 + edx], mm2		;	/* write answer out */
+	        ;	/* Increment pointers */
+        add		edx, 16		;	
+        add		eax, PixelsPerLine		;	
+        add		ebx, ReconPixelsPerLine		;	
+        add		ecx, ReconPixelsPerLine		;	
+
+
+        /*  ITERATION 8 */	
+        movq		mm0, [eax]		;	/* mm0 = FiltPtr */
+        movq		mm1, [ebx]		;	/* mm1 = ReconPtr1 */
+        movq		mm4, [ecx]		;	/* mm1 = ReconPtr2 */
+        movq		mm2, mm0		;	/* dup to prepare for up conversion */
+        movq		mm3, mm1		;	/* dup to prepare for up conversion */
+        movq		mm5, mm4		;	/* dup to prepare for up conversion */
+	        ;	/* convert from UINT8 to INT16 */
+        punpcklbw		mm0, mm7		;	/* mm0 = INT16(FiltPtr) */
+        punpcklbw		mm1, mm7		;	/* mm1 = INT16(ReconPtr1) */
+        punpcklbw		mm4, mm7		;	/* mm1 = INT16(ReconPtr2) */
+        punpckhbw		mm2, mm7		;	/* mm2 = INT16(FiltPtr) */
+        punpckhbw		mm3, mm7		;	/* mm3 = INT16(ReconPtr1) */
+        punpckhbw		mm5, mm7		;	/* mm3 = INT16(ReconPtr2) */
+	        ;	/* average ReconPtr1 and ReconPtr2 */
+        paddw		mm1, mm4		;	/* mm1 = ReconPtr1 + ReconPtr2 */
+        paddw		mm3, mm5		;	/* mm3 = ReconPtr1 + ReconPtr2 */
+        psrlw		mm1, 1		;	/* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
+        psrlw		mm3, 1		;	/* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
+        psubw		mm0, mm1		;	/* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+        psubw		mm2, mm3		;	/* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+        movq		[edx], mm0		;	/* write answer out */
+        movq		[8 + edx], mm2		;	/* write answer out */
+	        ;	/* Increment pointers */
+        add		edx, 16		;	
+        add		eax, PixelsPerLine		;	
+        add		ebx, ReconPixelsPerLine		;	
+        add		ecx, ReconPixelsPerLine		;	
+
+    };
+
+
+
+
+ 
+#endif
+}
+
+static ogg_uint32_t row_sad8__mmx (unsigned char *Src1, unsigned char *Src2)
+{
+
+#if 0
+  ogg_uint32_t SadValue;
+  ogg_uint32_t SadValue1;
+
+  SadValue    = DSP_OP_ABS_DIFF (Src1[0], Src2[0]) + 
+	        DSP_OP_ABS_DIFF (Src1[1], Src2[1]) +
+	        DSP_OP_ABS_DIFF (Src1[2], Src2[2]) +
+	        DSP_OP_ABS_DIFF (Src1[3], Src2[3]);
+
+  SadValue1   = DSP_OP_ABS_DIFF (Src1[4], Src2[4]) + 
+	        DSP_OP_ABS_DIFF (Src1[5], Src2[5]) +
+	        DSP_OP_ABS_DIFF (Src1[6], Src2[6]) +
+	        DSP_OP_ABS_DIFF (Src1[7], Src2[7]);
+
+  SadValue = ( SadValue > SadValue1 ) ? SadValue : SadValue1;
+
+  return SadValue;
+
+#else
+  ogg_uint32_t MaxSad;
+
+  
+  __asm {
+    align       16
+    mov         ebx, Src1
+    mov         ecx, Src2
+
+
+    pxor		mm6, mm6		;	/* zero out mm6 for unpack */
+    pxor		mm7, mm7		;	/* zero out mm7 for unpack */
+    movq		mm0, [ebx]		;	/* take 8 bytes */
+    movq		mm1, [ecx]		;	
+
+    movq		mm2, mm0		;	
+    psubusb		mm0, mm1		;	/* A - B */
+    psubusb		mm1, mm2		;	/* B - A */
+    por		mm0, mm1		;	/* and or gives abs difference */
+
+    movq		mm1, mm0		;	
+
+    punpcklbw		mm0, mm6		;	/* ; unpack low four bytes to higher precision */
+    punpckhbw		mm1, mm7		;	/* ; unpack high four bytes to higher precision */
+
+    movq		mm2, mm0		;	
+    movq		mm3, mm1		;	
+    psrlq		mm2, 32		;	/* fold and add */
+    psrlq		mm3, 32		;	
+    paddw		mm0, mm2		;	
+    paddw		mm1, mm3		;	
+    movq		mm2, mm0		;	
+    movq		mm3, mm1		;	
+    psrlq		mm2, 16		;	
+    psrlq		mm3, 16		;	
+    paddw		mm0, mm2		;	
+    paddw		mm1, mm3		;	
+
+    psubusw		mm1, mm0		;	
+    paddw		mm1, mm0		;	/* mm1 = max(mm1, mm0) */
+    movd		eax, mm1		;
+
+    and         eax, 0xffff
+    mov         MaxSad, eax
+  };
+   return MaxSad;
+  
+  
+  
+ 
+
+#endif
+}
+
+
+
+
+static ogg_uint32_t col_sad8x8__mmx (unsigned char *Src1, unsigned char *Src2,
+		                    ogg_uint32_t stride)
+{
+
+#if 0
+  ogg_uint32_t SadValue[8] = {0,0,0,0,0,0,0,0};
+  ogg_uint32_t SadValue2[8] = {0,0,0,0,0,0,0,0};
+  ogg_uint32_t MaxSad = 0;
+  ogg_uint32_t i;
+
+  for ( i = 0; i < 4; i++ ){
+    SadValue[0] += abs(Src1[0] - Src2[0]);
+    SadValue[1] += abs(Src1[1] - Src2[1]);
+    SadValue[2] += abs(Src1[2] - Src2[2]);
+    SadValue[3] += abs(Src1[3] - Src2[3]);
+    SadValue[4] += abs(Src1[4] - Src2[4]);
+    SadValue[5] += abs(Src1[5] - Src2[5]);
+    SadValue[6] += abs(Src1[6] - Src2[6]);
+    SadValue[7] += abs(Src1[7] - Src2[7]);
+    
+    Src1 += stride;
+    Src2 += stride;
+  }
+
+  for ( i = 0; i < 4; i++ ){
+    SadValue2[0] += abs(Src1[0] - Src2[0]);
+    SadValue2[1] += abs(Src1[1] - Src2[1]);
+    SadValue2[2] += abs(Src1[2] - Src2[2]);
+    SadValue2[3] += abs(Src1[3] - Src2[3]);
+    SadValue2[4] += abs(Src1[4] - Src2[4]);
+    SadValue2[5] += abs(Src1[5] - Src2[5]);
+    SadValue2[6] += abs(Src1[6] - Src2[6]);
+    SadValue2[7] += abs(Src1[7] - Src2[7]);
+    
+    Src1 += stride;
+    Src2 += stride;
+  }
+    
+  for ( i = 0; i < 8; i++ ){
+    if ( SadValue[i] > MaxSad )
+      MaxSad = SadValue[i];
+    if ( SadValue2[i] > MaxSad )
+      MaxSad = SadValue2[i];
+  }
+    
+  return MaxSad;
+#else
+  ogg_uint32_t MaxSad;
+
+
+    __asm {
+        align       16
+        mov         ebx, Src1
+        mov         ecx, Src2
+
+        pxor		mm3, mm3		;	/* zero out mm3 for unpack */
+        pxor		mm4, mm4		;	/* mm4 low sum */
+        pxor		mm5, mm5		;	/* mm5 high sum */
+        pxor		mm6, mm6		;	/* mm6 low sum */
+        pxor		mm7, mm7		;	/* mm7 high sum */
+        mov		edi, 4		;	/* 4 rows */
+        label_1:				;	
+        movq		mm0, [ebx]		;	/* take 8 bytes */
+        movq		mm1, [ecx]		;	/* take 8 bytes */
+
+        movq		mm2, mm0		;	
+        psubusb		mm0, mm1		;	/* A - B */
+        psubusb		mm1, mm2		;	/* B - A */
+        por		mm0, mm1		;	/* and or gives abs difference */
+        movq		mm1, mm0		;	
+
+        punpcklbw		mm0, mm3		;	/* unpack to higher precision for accumulation */
+        paddw		mm4, mm0		;	/* accumulate difference... */
+        punpckhbw		mm1, mm3		;	/* unpack high four bytes to higher precision */
+        paddw		mm5, mm1		;	/* accumulate difference... */
+        add		ebx, stride		;	/* Inc pointer into the new data */
+        add		ecx, stride		;	/* Inc pointer into the new data */
+
+        dec		edi		;	
+        jnz		label_1		;	
+
+        mov		edi, 4		;	/* 4 rows */
+        label_2:				;	
+        movq		mm0, [ebx]		;	/* take 8 bytes */
+        movq		mm1, [ecx]		;	/* take 8 bytes */
+
+        movq		mm2, mm0		;	
+        psubusb		mm0, mm1		;	/* A - B */
+        psubusb		mm1, mm2		;	/* B - A */
+        por		mm0, mm1		;	/* and or gives abs difference */
+        movq		mm1, mm0		;	
+
+        punpcklbw		mm0, mm3		;	/* unpack to higher precision for accumulation */
+        paddw		mm6, mm0		;	/* accumulate difference... */
+        punpckhbw		mm1, mm3		;	/* unpack high four bytes to higher precision */
+        paddw		mm7, mm1		;	/* accumulate difference... */
+        add		ebx, stride		;	/* Inc pointer into the new data */
+        add		ecx, stride		;	/* Inc pointer into the new data */
+
+        dec		edi		;	
+        jnz		label_2		;	
+
+        psubusw		mm7, mm6		;	
+        paddw		mm7, mm6		;	/* mm7 = max(mm7, mm6) */
+        psubusw		mm5, mm4		;	
+        paddw		mm5, mm4		;	/* mm5 = max(mm5, mm4) */
+        psubusw		mm7, mm5		;	
+        paddw		mm7, mm5		;	/* mm7 = max(mm5, mm7) */
+        movq		mm6, mm7		;	
+        psrlq		mm6, 32		;	
+        psubusw		mm7, mm6		;	
+        paddw		mm7, mm6		;	/* mm7 = max(mm5, mm7) */
+        movq		mm6, mm7		;	
+        psrlq		mm6, 16		;	
+        psubusw		mm7, mm6		;	
+        paddw		mm7, mm6		;	/* mm7 = max(mm5, mm7) */
+        movd		eax, mm7		;	
+        and		    eax, 0xffff		;
+
+        mov         MaxSad, eax
+    };
+
+    return MaxSad;
+
+
+#endif
+}
+
+static ogg_uint32_t sad8x8__mmx (unsigned char *ptr1, ogg_uint32_t stride1,
+		       	    unsigned char *ptr2, ogg_uint32_t stride2)
+{
+
+#if 0
+  ogg_uint32_t  i;
+  ogg_uint32_t  sad = 0;
+
+  for (i=8; i; i--) {
+    sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]);
+    sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]);
+    sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]);
+    sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]);
+    sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]);
+    sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]);
+    sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]);
+    sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]);
+
+    /* Step to next row of block. */
+    ptr1 += stride1;
+    ptr2 += stride2;
+  }
+
+  return sad;
+#else
+  ogg_uint32_t  DiffVal;
+
+  __asm {
+    align  16
+
+    mov         ebx, ptr1
+    mov         edx, ptr2
+
+    pxor		mm6, mm6		;	/* zero out mm6 for unpack */
+    pxor		mm7, mm7		;	/* mm7 contains the result */
+    
+    ; /* ITERATION 1 */
+    movq		mm0, [ebx]		;	/* take 8 bytes */
+    movq		mm1, [edx]		;	
+    movq		mm2, mm0		;	
+
+    psubusb		mm0, mm1		;	/* A - B */
+    psubusb		mm1, mm2		;	/* B - A */
+    por		mm0, mm1		;	/* and or gives abs difference */
+    movq		mm1, mm0		;	
+
+    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
+    paddw		mm7, mm0		;	/* accumulate difference... */
+    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
+    add		ebx, stride1		;	/* Inc pointer into the new data */
+    paddw		mm7, mm1		;	/* accumulate difference... */
+    add		edx, stride2		;	/* Inc pointer into ref data */
+
+    ; /* ITERATION 2 */
+    movq		mm0, [ebx]		;	/* take 8 bytes */
+    movq		mm1, [edx]		;	
+    movq		mm2, mm0		;	
+
+    psubusb		mm0, mm1		;	/* A - B */
+    psubusb		mm1, mm2		;	/* B - A */
+    por		mm0, mm1		;	/* and or gives abs difference */
+    movq		mm1, mm0		;	
+
+    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
+    paddw		mm7, mm0		;	/* accumulate difference... */
+    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
+    add		ebx, stride1		;	/* Inc pointer into the new data */
+    paddw		mm7, mm1		;	/* accumulate difference... */
+    add		edx, stride2		;	/* Inc pointer into ref data */
+
+
+    ; /* ITERATION 3 */
+    movq		mm0, [ebx]		;	/* take 8 bytes */
+    movq		mm1, [edx]		;	
+    movq		mm2, mm0		;	
+
+    psubusb		mm0, mm1		;	/* A - B */
+    psubusb		mm1, mm2		;	/* B - A */
+    por		mm0, mm1		;	/* and or gives abs difference */
+    movq		mm1, mm0		;	
+
+    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
+    paddw		mm7, mm0		;	/* accumulate difference... */
+    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
+    add		ebx, stride1		;	/* Inc pointer into the new data */
+    paddw		mm7, mm1		;	/* accumulate difference... */
+    add		edx, stride2		;	/* Inc pointer into ref data */
+
+    ; /* ITERATION 4 */
+    movq		mm0, [ebx]		;	/* take 8 bytes */
+    movq		mm1, [edx]		;	
+    movq		mm2, mm0		;	
+
+    psubusb		mm0, mm1		;	/* A - B */
+    psubusb		mm1, mm2		;	/* B - A */
+    por		mm0, mm1		;	/* and or gives abs difference */
+    movq		mm1, mm0		;	
+
+    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
+    paddw		mm7, mm0		;	/* accumulate difference... */
+    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
+    add		ebx, stride1		;	/* Inc pointer into the new data */
+    paddw		mm7, mm1		;	/* accumulate difference... */
+    add		edx, stride2		;	/* Inc pointer into ref data */
+
+
+    ; /* ITERATION 5 */
+    movq		mm0, [ebx]		;	/* take 8 bytes */
+    movq		mm1, [edx]		;	
+    movq		mm2, mm0		;	
+
+    psubusb		mm0, mm1		;	/* A - B */
+    psubusb		mm1, mm2		;	/* B - A */
+    por		mm0, mm1		;	/* and or gives abs difference */
+    movq		mm1, mm0		;	
+
+    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
+    paddw		mm7, mm0		;	/* accumulate difference... */
+    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
+    add		ebx, stride1		;	/* Inc pointer into the new data */
+    paddw		mm7, mm1		;	/* accumulate difference... */
+    add		edx, stride2		;	/* Inc pointer into ref data */
+
+
+    ; /* ITERATION 6 */
+    movq		mm0, [ebx]		;	/* take 8 bytes */
+    movq		mm1, [edx]		;	
+    movq		mm2, mm0		;	
+
+    psubusb		mm0, mm1		;	/* A - B */
+    psubusb		mm1, mm2		;	/* B - A */
+    por		mm0, mm1		;	/* and or gives abs difference */
+    movq		mm1, mm0		;	
+
+    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
+    paddw		mm7, mm0		;	/* accumulate difference... */
+    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
+    add		ebx, stride1		;	/* Inc pointer into the new data */
+    paddw		mm7, mm1		;	/* accumulate difference... */
+    add		edx, stride2		;	/* Inc pointer into ref data */
+
+
+    ; /* ITERATION 7 */
+    movq		mm0, [ebx]		;	/* take 8 bytes */
+    movq		mm1, [edx]		;	
+    movq		mm2, mm0		;	
+
+    psubusb		mm0, mm1		;	/* A - B */
+    psubusb		mm1, mm2		;	/* B - A */
+    por		mm0, mm1		;	/* and or gives abs difference */
+    movq		mm1, mm0		;	
+
+    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
+    paddw		mm7, mm0		;	/* accumulate difference... */
+    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
+    add		ebx, stride1		;	/* Inc pointer into the new data */
+    paddw		mm7, mm1		;	/* accumulate difference... */
+    add		edx, stride2		;	/* Inc pointer into ref data */
+
+
+
+    ; /* ITERATION 8 */
+    movq		mm0, [ebx]		;	/* take 8 bytes */
+    movq		mm1, [edx]		;	
+    movq		mm2, mm0		;	
+
+    psubusb		mm0, mm1		;	/* A - B */
+    psubusb		mm1, mm2		;	/* B - A */
+    por		mm0, mm1		;	/* and or gives abs difference */
+    movq		mm1, mm0		;	
+
+    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
+    paddw		mm7, mm0		;	/* accumulate difference... */
+    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
+    add		ebx, stride1		;	/* Inc pointer into the new data */
+    paddw		mm7, mm1		;	/* accumulate difference... */
+    add		edx, stride2		;	/* Inc pointer into ref data */
+
+
+
+    ; /* ------ */
+
+    movq		mm0, mm7		;	
+    psrlq		mm7, 32		;	
+    paddw		mm7, mm0		;	
+    movq		mm0, mm7		;	
+    psrlq		mm7, 16		;	
+    paddw		mm7, mm0		;	
+    movd		eax, mm7		;	
+    and		    eax, 0xffff		;	
+
+    mov         DiffVal, eax
+  };
+
+  return DiffVal;
+
+ 
+
+#endif
+}
+
+static ogg_uint32_t sad8x8_thres__mmx (unsigned char *ptr1, ogg_uint32_t stride1,
+		       		  unsigned char *ptr2, ogg_uint32_t stride2, 
+			   	  ogg_uint32_t thres)
+{
+#if 0
+  ogg_uint32_t  i;
+  ogg_uint32_t  sad = 0;
+
+  for (i=8; i; i--) {
+    sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]);
+    sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]);
+    sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]);
+    sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]);
+    sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]);
+    sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]);
+    sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]);
+    sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]);
+
+    if (sad > thres )
+      break;
+
+    /* Step to next row of block. */
+    ptr1 += stride1;
+    ptr2 += stride2;
+  }
+
+  return sad;
+#else
+  return sad8x8__mmx (ptr1, stride1, ptr2, stride2);
+#endif
+}
+
+
+static ogg_uint32_t sad8x8_xy2_thres__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
+		                      unsigned char *RefDataPtr1,
+			              unsigned char *RefDataPtr2, ogg_uint32_t RefStride,
+			              ogg_uint32_t thres)
+{
+#if 0
+  ogg_uint32_t  i;
+  ogg_uint32_t  sad = 0;
+
+  for (i=8; i; i--) {
+    sad += DSP_OP_ABS_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0]));
+    sad += DSP_OP_ABS_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1]));
+    sad += DSP_OP_ABS_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2]));
+    sad += DSP_OP_ABS_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3]));
+    sad += DSP_OP_ABS_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4]));
+    sad += DSP_OP_ABS_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5]));
+    sad += DSP_OP_ABS_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6]));
+    sad += DSP_OP_ABS_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7]));
+
+    if ( sad > thres )
+      break;
+
+    /* Step to next row of block. */
+    SrcData += SrcStride;
+    RefDataPtr1 += RefStride;
+    RefDataPtr2 += RefStride;
+  }
+
+  return sad;
+#else
+  ogg_uint32_t  DiffVal;
+
+  __asm {
+    align 16
+
+        mov     ebx, SrcData
+        mov     ecx, RefDataPtr1
+        mov     edx, RefDataPtr2
+
+
+    pcmpeqd		mm5, mm5		;	/* fefefefefefefefe in mm5 */
+    paddb		mm5, mm5		;	
+				    ;	
+    pxor		mm6, mm6		;	/* zero out mm6 for unpack */
+    pxor		mm7, mm7		;	/* mm7 contains the result */
+    mov		edi, 8		;	/* 8 rows */
+    loop_start:				;	
+    movq		mm0, [ebx]		;	/* take 8 bytes */
+
+    movq		mm2, [ecx]		;	
+    movq		mm3, [edx]		;	/* take average of mm2 and mm3 */
+    movq		mm1, mm2		;	
+    pand		mm1, mm3		;	
+    pxor		mm3, mm2		;	
+    pand		mm3, mm5		;	
+    psrlq		mm3, 1		;	
+    paddb		mm1, mm3		;	
+
+    movq		mm2, mm0		;	
+
+    psubusb		mm0, mm1		;	/* A - B */
+    psubusb		mm1, mm2		;	/* B - A */
+    por		mm0, mm1		;	/* and or gives abs difference */
+    movq		mm1, mm0		;	
+
+    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
+    paddw		mm7, mm0		;	/* accumulate difference... */
+    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
+    add		ebx, SrcStride		;	/* Inc pointer into the new data */
+    paddw		mm7, mm1		;	/* accumulate difference... */
+    add		ecx, RefStride		;	/* Inc pointer into ref data */
+    add		edx, RefStride		;	/* Inc pointer into ref data */
+
+    dec		edi		;	
+    jnz		loop_start		;	
+
+    movq		mm0, mm7		;	
+    psrlq		mm7, 32		;	
+    paddw		mm7, mm0		;	
+    movq		mm0, mm7		;	
+    psrlq		mm7, 16		;	
+    paddw		mm7, mm0		;	
+    movd		eax, mm7		;	
+    and		eax, 0xffff		;	
+
+    mov DiffVal, eax
+  };
+
+  return DiffVal;
+
+ 
+
+#endif
+}
+
+static ogg_uint32_t intra8x8_err__mmx (unsigned char *DataPtr, ogg_uint32_t Stride)
+{
+#if 0
+  ogg_uint32_t  i;
+  ogg_uint32_t  XSum=0;
+  ogg_uint32_t  XXSum=0;
+
+  for (i=8; i; i--) {
+     /* Examine alternate pixel locations. */
+     XSum += DataPtr[0];
+     XXSum += DataPtr[0]*DataPtr[0];
+     XSum += DataPtr[1];
+     XXSum += DataPtr[1]*DataPtr[1];
+     XSum += DataPtr[2];
+     XXSum += DataPtr[2]*DataPtr[2];
+     XSum += DataPtr[3];
+     XXSum += DataPtr[3]*DataPtr[3];
+     XSum += DataPtr[4];
+     XXSum += DataPtr[4]*DataPtr[4];
+     XSum += DataPtr[5];
+     XXSum += DataPtr[5]*DataPtr[5];
+     XSum += DataPtr[6];
+     XXSum += DataPtr[6]*DataPtr[6];
+     XSum += DataPtr[7];
+     XXSum += DataPtr[7]*DataPtr[7];
+
+     /* Step to next row of block. */
+     DataPtr += Stride;
+   }
+
+   /* Compute population variance as mis-match metric. */
+   return (( (XXSum<<6) - XSum*XSum ) );
+#else
+  ogg_uint32_t  XSum;
+  ogg_uint32_t  XXSum;
+
+  __asm {
+    align 16
+
+        mov     ecx, DataPtr
+
+    pxor		mm5, mm5		;	
+    pxor		mm6, mm6		;	
+    pxor		mm7, mm7		;	
+    mov		edi, 8		;	
+    loop_start:		
+    movq		mm0, [ecx]		;	/* take 8 bytes */
+    movq		mm2, mm0		;	
+
+    punpcklbw		mm0, mm6		;	
+    punpckhbw		mm2, mm6		;	
+
+    paddw		mm5, mm0		;	
+    paddw		mm5, mm2		;	
+
+    pmaddwd		mm0, mm0		;	
+    pmaddwd		mm2, mm2		;	
+				    ;	
+    paddd		mm7, mm0		;	
+    paddd		mm7, mm2		;	
+
+    add		ecx, Stride		;	/* Inc pointer into src data */
+
+    dec		edi		;	
+    jnz		loop_start		;	
+
+    movq		mm0, mm5		;	
+    psrlq		mm5, 32		;	
+    paddw		mm5, mm0		;	
+    movq		mm0, mm5		;	
+    psrlq		mm5, 16		;	
+    paddw		mm5, mm0		;	
+    movd		edi, mm5		;	
+    movsx		edi, di		;	
+    mov		eax, edi		;	
+
+    movq		mm0, mm7		;	
+    psrlq		mm7, 32		;	
+    paddd		mm7, mm0		;	
+    movd		ebx, mm7		;	
+
+        mov         XSum, eax
+        mov         XXSum, ebx;
+
+  };
+    /* Compute population variance as mis-match metric. */
+    return (( (XXSum<<6) - XSum*XSum ) );
+
+ 
+
+#endif
+}
+
+static ogg_uint32_t inter8x8_err__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
+		                 unsigned char *RefDataPtr, ogg_uint32_t RefStride)
+{
+
+#if 0
+  ogg_uint32_t  i;
+  ogg_uint32_t  XSum=0;
+  ogg_uint32_t  XXSum=0;
+  ogg_int32_t   DiffVal;
+
+  for (i=8; i; i--) {
+    DiffVal = DSP_OP_DIFF (SrcData[0], RefDataPtr[0]);
+    XSum += DiffVal;
+    XXSum += DiffVal*DiffVal;
+
+    DiffVal = DSP_OP_DIFF (SrcData[1], RefDataPtr[1]);
+    XSum += DiffVal;
+    XXSum += DiffVal*DiffVal;
+
+    DiffVal = DSP_OP_DIFF (SrcData[2], RefDataPtr[2]);
+    XSum += DiffVal;
+    XXSum += DiffVal*DiffVal;
+
+    DiffVal = DSP_OP_DIFF (SrcData[3], RefDataPtr[3]);
+    XSum += DiffVal;
+    XXSum += DiffVal*DiffVal;
+        
+    DiffVal = DSP_OP_DIFF (SrcData[4], RefDataPtr[4]);
+    XSum += DiffVal;
+    XXSum += DiffVal*DiffVal;
+        
+    DiffVal = DSP_OP_DIFF (SrcData[5], RefDataPtr[5]);
+    XSum += DiffVal;
+    XXSum += DiffVal*DiffVal;
+        
+    DiffVal = DSP_OP_DIFF (SrcData[6], RefDataPtr[6]);
+    XSum += DiffVal;
+    XXSum += DiffVal*DiffVal;
+        
+    DiffVal = DSP_OP_DIFF (SrcData[7], RefDataPtr[7]);
+    XSum += DiffVal;
+    XXSum += DiffVal*DiffVal;
+        
+    /* Step to next row of block. */
+    SrcData += SrcStride;
+    RefDataPtr += RefStride;
+  }
+
+  /* Compute and return population variance as mis-match metric. */
+  return (( (XXSum<<6) - XSum*XSum ));
+#else
+  ogg_uint32_t  XSum;
+  ogg_uint32_t  XXSum;
+
+
+  __asm {
+    align 16
+
+        mov     ecx, SrcData
+        mov     edx, RefDataPtr
+
+    pxor		mm5, mm5		;	
+    pxor		mm6, mm6		;	
+    pxor		mm7, mm7		;	
+    mov		edi, 8		;	
+    loop_start:				;	
+    movq		mm0, [ecx]		;	/* take 8 bytes */
+    movq		mm1, [edx]		;	
+    movq		mm2, mm0		;	
+    movq		mm3, mm1		;	
+
+    punpcklbw		mm0, mm6		;	
+    punpcklbw		mm1, mm6		;	
+    punpckhbw		mm2, mm6		;	
+    punpckhbw		mm3, mm6		;	
+
+    psubsw		mm0, mm1		;	
+    psubsw		mm2, mm3		;	
+
+    paddw		mm5, mm0		;	
+    paddw		mm5, mm2		;	
+
+    pmaddwd		mm0, mm0		;	
+    pmaddwd		mm2, mm2		;	
+				    ;	
+    paddd		mm7, mm0		;	
+    paddd		mm7, mm2		;	
+
+    add		ecx, SrcStride		;	/* Inc pointer into src data */
+    add		edx, RefStride		;	/* Inc pointer into ref data */
+
+    dec		edi		;	
+    jnz		loop_start		;	
+
+    movq		mm0, mm5		;	
+    psrlq		mm5, 32		;	
+    paddw		mm5, mm0		;	
+    movq		mm0, mm5		;	
+    psrlq		mm5, 16		;	
+    paddw		mm5, mm0		;	
+    movd		edi, mm5		;	
+    movsx		edi, di		;	
+    mov		eax, edi		;	
+
+    movq		mm0, mm7		;	
+    psrlq		mm7, 32		;	
+    paddd		mm7, mm0		;	
+    movd		ebx, mm7		;	
+
+        mov     XSum, eax
+        mov     XXSum, ebx
+
+  };
+
+  /* Compute and return population variance as mis-match metric. */
+  return (( (XXSum<<6) - XSum*XSum ));
+
+ 
+#endif
+}
+
+static ogg_uint32_t inter8x8_err_xy2__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
+		                     unsigned char *RefDataPtr1,
+				     unsigned char *RefDataPtr2, ogg_uint32_t RefStride)
+{
+#if 0
+  ogg_uint32_t  i;
+  ogg_uint32_t  XSum=0;
+  ogg_uint32_t  XXSum=0;
+  ogg_int32_t   DiffVal;
+
+  for (i=8; i; i--) {
+    DiffVal = DSP_OP_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0]));
+    XSum += DiffVal;
+    XXSum += DiffVal*DiffVal;
+
+    DiffVal = DSP_OP_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1]));
+    XSum += DiffVal;
+    XXSum += DiffVal*DiffVal;
+
+    DiffVal = DSP_OP_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2]));
+    XSum += DiffVal;
+    XXSum += DiffVal*DiffVal;
+
+    DiffVal = DSP_OP_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3]));
+    XSum += DiffVal;
+    XXSum += DiffVal*DiffVal;
+
+    DiffVal = DSP_OP_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4]));
+    XSum += DiffVal;
+    XXSum += DiffVal*DiffVal;
+
+    DiffVal = DSP_OP_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5]));
+    XSum += DiffVal;
+    XXSum += DiffVal*DiffVal;
+
+    DiffVal = DSP_OP_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6]));
+    XSum += DiffVal;
+    XXSum += DiffVal*DiffVal;
+
+    DiffVal = DSP_OP_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7]));
+    XSum += DiffVal;
+    XXSum += DiffVal*DiffVal;
+
+    /* Step to next row of block. */
+    SrcData += SrcStride;
+    RefDataPtr1 += RefStride;
+    RefDataPtr2 += RefStride;
+  }
+
+  /* Compute and return population variance as mis-match metric. */
+  return (( (XXSum<<6) - XSum*XSum ));
+#else
+  ogg_uint32_t XSum;
+  ogg_uint32_t XXSum;
+
+  __asm {
+    align 16
+
+        mov ebx, SrcData
+        mov ecx, RefDataPtr1
+        mov edx, RefDataPtr2
+
+    pcmpeqd		mm4, mm4		;	/* fefefefefefefefe in mm4 */
+    paddb		mm4, mm4		;	
+    pxor		mm5, mm5		;	
+    pxor		mm6, mm6		;	
+    pxor		mm7, mm7		;	
+    mov		edi, 8		;	
+    loop_start:				;	
+    movq		mm0, [ebx]		;	/* take 8 bytes */
+
+    movq		mm2, [ecx]		;	
+    movq		mm3, [edx]		;	/* take average of mm2 and mm3 */
+    movq		mm1, mm2		;	
+    pand		mm1, mm3		;	
+    pxor		mm3, mm2		;	
+    pand		mm3, mm4		;	
+    psrlq		mm3, 1		;	
+    paddb		mm1, mm3		;	
+
+    movq		mm2, mm0		;	
+    movq		mm3, mm1		;	
+
+    punpcklbw		mm0, mm6		;	
+    punpcklbw		mm1, mm6		;	
+    punpckhbw		mm2, mm6		;	
+    punpckhbw		mm3, mm6		;	
+
+    psubsw		mm0, mm1		;	
+    psubsw		mm2, mm3		;	
+
+    paddw		mm5, mm0		;	
+    paddw		mm5, mm2		;	
+
+    pmaddwd		mm0, mm0		;	
+    pmaddwd		mm2, mm2		;	
+				    ;	
+    paddd		mm7, mm0		;	
+    paddd		mm7, mm2		;	
+
+    add		ebx, SrcStride		;	/* Inc pointer into src data */
+    add		ecx, RefStride		;	/* Inc pointer into ref data */
+    add		edx, RefStride		;	/* Inc pointer into ref data */
+
+    dec		edi		;	
+    jnz		loop_start		;	
+
+    movq		mm0, mm5		;	
+    psrlq		mm5, 32		;	
+    paddw		mm5, mm0		;	
+    movq		mm0, mm5		;	
+    psrlq		mm5, 16		;	
+    paddw		mm5, mm0		;	
+    movd		edi, mm5		;	
+    movsx		edi, di		;	
+    mov         XSum, edi   ; /* movl		eax, edi		;	Modified for vc to resuse eax*/
+
+    movq		mm0, mm7		;	
+    psrlq		mm7, 32		;	
+    paddd		mm7, mm0		;	
+    movd        XXSum, mm7 ; /*movd		eax, mm7		; Modified for vc to reuse eax */
+  };
+
+    return (( (XXSum<<6) - XSum*XSum ));
+
+#endif
+}
+
+static void restore_fpu (void)
+{
+
+    __asm {
+        emms
+    }
+
+}
+
+void dsp_mmx_init(DspFunctions *funcs)
+{
+  TH_DEBUG("enabling accelerated x86_32 mmx dsp functions.\n");
+  funcs->restore_fpu = restore_fpu;
+  funcs->sub8x8 = sub8x8__mmx;
+  funcs->sub8x8_128 = sub8x8_128__mmx;
+  funcs->sub8x8avg2 = sub8x8avg2__mmx;
+  funcs->row_sad8 = row_sad8__mmx;
+  funcs->col_sad8x8 = col_sad8x8__mmx;
+  funcs->sad8x8 = sad8x8__mmx;
+  funcs->sad8x8_thres = sad8x8_thres__mmx;
+  funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmx;
+  funcs->intra8x8_err = intra8x8_err__mmx;
+  funcs->inter8x8_err = inter8x8_err__mmx;
+  funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmx;
+}
+

Added: trunk/theora/lib/x86_32_vs/fdct_mmx.c
===================================================================
--- trunk/theora/lib/x86_32_vs/fdct_mmx.c	2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/lib/x86_32_vs/fdct_mmx.c	2006-05-28 18:18:26 UTC (rev 11451)
@@ -0,0 +1,334 @@
+;//==========================================================================
+;//
+;//  THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+;//  KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+;//  IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+;//  PURPOSE.
+;//
+;//  Copyright (c) 1999 - 2001  On2 Technologies Inc. All Rights Reserved.
+;//
+;//--------------------------------------------------------------------------
+
+#include "theora/theora.h"
+#include "codec_internal.h"
+#include "dsp.h"
+
+
+static const  ogg_int64_t xC1S7 = 0x0fb15fb15fb15fb15LL;
+static const  ogg_int64_t xC2S6 = 0x0ec83ec83ec83ec83LL;
+static const  ogg_int64_t xC3S5 = 0x0d4dbd4dbd4dbd4dbLL;
+static const  ogg_int64_t xC4S4 = 0x0b505b505b505b505LL;
+static const  ogg_int64_t xC5S3 = 0x08e3a8e3a8e3a8e3aLL;
+static const  ogg_int64_t xC6S2 = 0x061f861f861f861f8LL;
+static const  ogg_int64_t xC7S1 = 0x031f131f131f131f1LL;
+
+
+static __inline void Transpose_mmx( ogg_int16_t *InputData1, ogg_int16_t *OutputData1,
+                                 ogg_int16_t *InputData2, ogg_int16_t *OutputData2)
+{
+
+    __asm {
+        align 16
+            mov     eax, InputData1
+            mov     ebx, InputData2
+            mov     ecx, OutputData1
+            mov     edx, OutputData2
+
+
+        movq		mm0, [eax]		;	/* mm0 = a0 a1 a2 a3 */
+        movq		mm4, [ebx]		;	/* mm4 = e4 e5 e6 e7 */
+        movq		mm1, [16 + eax]		;	/* mm1 = b0 b1 b2 b3 */
+        movq		mm5, [16 + ebx]		;	/* mm5 = f4 f5 f6 f7 */
+        movq		mm2, [32 + eax]		;	/* mm2 = c0 c1 c2 c3 */
+        movq		mm6, [32 + ebx]		;	/* mm6 = g4 g5 g6 g7 */
+        movq		mm3, [48 + eax]		;	/* mm3 = d0 d1 d2 d3 */
+        movq		[16 + ecx], mm1		;	/* save  b0 b1 b2 b3 */
+        movq		mm7, [48 + ebx]		;	/* mm7 = h0 h1 h2 h3 */
+	        ;	/* Transpose 2x8 block */
+        movq		mm1, mm4		;	/* mm1 = e3 e2 e1 e0 */
+        punpcklwd		mm4, mm5		;	/* mm4 = f1 e1 f0 e0 */
+        movq		[ecx], mm0		;	/* save a3 a2 a1 a0  */
+        punpckhwd		mm1, mm5		;	/* mm1 = f3 e3 f2 e2 */
+        movq		mm0, mm6		;	/* mm0 = g3 g2 g1 g0 */
+        punpcklwd		mm6, mm7		;	/* mm6 = h1 g1 h0 g0 */
+        movq		mm5, mm4		;	/* mm5 = f1 e1 f0 e0 */
+        punpckldq		mm4, mm6		;	/* mm4 = h0 g0 f0 e0 = MM4 */
+        punpckhdq		mm5, mm6		;	/* mm5 = h1 g1 f1 e1 = MM5 */
+        movq		mm6, mm1		;	/* mm6 = f3 e3 f2 e2 */
+        movq		[edx], mm4		;	
+        punpckhwd		mm0, mm7		;	/* mm0 = h3 g3 h2 g2 */
+        movq		[16 + edx], mm5		;	
+        punpckhdq		mm6, mm0		;	/* mm6 = h3 g3 f3 e3 = MM7 */
+        movq		mm4, [ecx]		;	/* mm4 = a3 a2 a1 a0 */
+        punpckldq		mm1, mm0		;	/* mm1 = h2 g2 f2 e2 = MM6 */
+        movq		mm5, [16 + ecx]		;	/* mm5 = b3 b2 b1 b0 */
+        movq		mm0, mm4		;	/* mm0 = a3 a2 a1 a0 */
+        movq		[48 + edx], mm6		;	
+        punpcklwd		mm0, mm5		;	/* mm0 = b1 a1 b0 a0 */
+        movq		[32 + edx], mm1		;	
+        punpckhwd		mm4, mm5		;	/* mm4 = b3 a3 b2 a2 */
+        movq		mm5, mm2		;	/* mm5 = c3 c2 c1 c0 */
+        punpcklwd		mm2, mm3		;	/* mm2 = d1 c1 d0 c0 */
+        movq		mm1, mm0		;	/* mm1 = b1 a1 b0 a0 */
+        punpckldq		mm0, mm2		;	/* mm0 = d0 c0 b0 a0 = MM0 */
+        punpckhdq		mm1, mm2		;	/* mm1 = d1 c1 b1 a1 = MM1 */
+        movq		mm2, mm4		;	/* mm2 = b3 a3 b2 a2 */
+        movq		[ecx], mm0		;	
+        punpckhwd		mm5, mm3		;	/* mm5 = d3 c3 d2 c2 */
+        movq		[16 + ecx], mm1		;	
+        punpckhdq		mm4, mm5		;	/* mm4 = d3 c3 b3 a3 = MM3 */
+        punpckldq		mm2, mm5		;	/* mm2 = d2 c2 b2 a2 = MM2 */
+        movq		[48 + ecx], mm4		;	
+        movq		[32 + ecx], mm2		;	
+
+    };
+
+
+}
+
+static __inline void Fdct_mmx( ogg_int16_t *InputData1, ogg_int16_t *InputData2, ogg_int16_t *temp)
+{
+
+    __asm {
+        align 16
+
+
+                mov     eax, InputData1
+                mov     ebx, InputData2
+                mov     ecx, temp
+        movq		mm0, [eax]		;	
+        movq		mm1, [16 + eax]		;	
+        movq		mm2, [48 + eax]		;	
+        movq		mm3, [16 + ebx]		;	
+        movq		mm4, mm0		;	
+        movq		mm5, mm1		;	
+        movq		mm6, mm2		;	
+        movq		mm7, mm3		;	
+				        ;	
+        paddsw		mm0, [48 + ebx]		;	/* mm0 = ip0 + ip7 = is07 */
+        paddsw		mm1, [32 + eax]		;	/* mm1 = ip1 + ip2 = is12 */
+        paddsw		mm2, [ebx]		;	/* mm2 = ip3 + ip4 = is34 */
+        paddsw		mm3, [32 + ebx]		;	/* mm3 = ip5 + ip6 = is56 */
+        psubsw		mm4, [48 + ebx]		;	/* mm4 = ip0 - ip7 = id07 */
+        psubsw		mm5, [32 + eax]		;	/* mm5 = ip1 - ip2 = id12 */
+				        ;	
+        psubsw		mm0, mm2		;	/* mm0 = is07 - is34 */
+				        ;	
+        paddsw		mm2, mm2		;	
+				        ;	
+        psubsw		mm6, [ebx]		;	/* mm6 = ip3 - ip4 = id34 */
+				        ;	
+        paddsw		mm2, mm0		;	/* mm2 = is07 + is34 = is0734 */
+        psubsw		mm1, mm3		;	/* mm1 = is12 - is56 */
+        movq		[ecx], mm0		;	/* Save is07 - is34 to free mm0; */
+        paddsw		mm3, mm3		;	
+        paddsw		mm3, mm1		;	/* mm3 = is12 + 1s56	= is1256 */
+				        ;	
+        psubsw		mm7, [32 + ebx]		;	/* mm7 = ip5 - ip6 = id56 */
+	        ;	/* ------------------------------------------------------------------- */
+        psubsw		mm5, mm7		;	/* mm5 = id12 - id56 */
+        paddsw		mm7, mm7		;	
+        paddsw		mm7, mm5		;	/* mm7 = id12 + id56 */
+	        ;	/* ------------------------------------------------------------------- */
+        psubsw		mm2, mm3		;	/* mm2 = is0734 - is1256 */
+        paddsw		mm3, mm3		;	
+				        ;	
+        movq		mm0, mm2		;	/* make a copy */
+        paddsw		mm3, mm2		;	/* mm3 = is0734 + is1256 */
+				        ;	
+        pmulhw		mm0, xC4S4		;	/* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
+        paddw		mm0, mm2		;	/* mm0 = xC4S4 * ( is0734 - is1256 ) */
+        psrlw		mm2, 15		;	
+        paddw		mm0, mm2		;	/* Truncate mm0, now it is op[4] */
+				        ;	
+        movq		mm2, mm3		;	
+        movq		[ebx], mm0		;	/* save ip4, now mm0,mm2 are free */
+				        ;	
+        movq		mm0, mm3		;	
+        pmulhw		mm3, xC4S4		;	/* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */
+				        ;	
+        psrlw		mm2, 15		;	
+        paddw		mm3, mm0		;	/* mm3 = xC4S4 * ( is0734 +is1256 )	 */
+        paddw		mm3, mm2		;	/* Truncate mm3, now it is op[0] */
+				        ;	
+        movq		[eax], mm3		;	
+	        ;	/* ------------------------------------------------------------------- */
+        movq		mm3, [ecx]		;	/* mm3 = irot_input_y */
+        pmulhw		mm3, xC2S6	;	/* mm3 = xC2S6 * irot_input_y - irot_input_y */
+				        ;	
+        movq		mm2, [ecx]		;	
+        movq		mm0, mm2		;	
+				        ;	
+        psrlw		mm2, 15		;	/* mm3 = xC2S6 * irot_input_y */
+        paddw		mm3, mm0		;	
+				        ;	
+        paddw		mm3, mm2		;	/* Truncated */
+        movq		mm0, mm5		;	
+				        ;	
+        movq		mm2, mm5		;	
+        pmulhw		mm0, xC6S2		;	/* mm0 = xC6S2 * irot_input_x */
+				        ;	
+        psrlw		mm2, 15		;	
+        paddw		mm0, mm2		;	/* Truncated */
+				        ;	
+        paddsw		mm3, mm0		;	/* ip[2] */
+        movq		[32 + eax], mm3		;	/* Save ip2 */
+				        ;	
+        movq		mm0, mm5		;	
+        movq		mm2, mm5		;	
+				        ;	
+        pmulhw		mm5, xC2S6		;	/* mm5 = xC2S6 * irot_input_x - irot_input_x */
+        psrlw		mm2, 15		;	
+				        ;	
+        movq		mm3, [ecx]		;	
+        paddw		mm5, mm0		;	/* mm5 = xC2S6 * irot_input_x */
+				        ;	
+        paddw		mm5, mm2		;	/* Truncated */
+        movq		mm2, mm3		;	
+				        ;	
+        pmulhw		mm3, xC6S2		;	/* mm3 = xC6S2 * irot_input_y */
+        psrlw		mm2, 15		;	
+				        ;	
+        paddw		mm3, mm2		;	/* Truncated */
+        psubsw		mm3, mm5		;	
+				        ;	
+        movq		[32 + ebx], mm3		;	
+	        ;	/* ------------------------------------------------------------------- */
+        movq		mm0, xC4S4		;	
+        movq		mm2, mm1		;	
+        movq		mm3, mm1		;	
+				        ;	
+        pmulhw		mm1, mm0		;	/* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
+        psrlw		mm2, 15		;	
+				        ;	
+        paddw		mm1, mm3		;	/* mm0 = xC4S4 * ( is12 - is56 ) */
+        paddw		mm1, mm2		;	/* Truncate mm1, now it is icommon_product1 */
+				        ;	
+        movq		mm2, mm7		;	
+        movq		mm3, mm7		;	
+				        ;	
+        pmulhw		mm7, mm0		;	/* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
+        psrlw		mm2, 15		;	
+				        ;	
+        paddw		mm7, mm3		;	/* mm7 = xC4S4 * ( id12 + id56 ) */
+        paddw		mm7, mm2		;	/* Truncate mm7, now it is icommon_product2 */
+	        ;	/* ------------------------------------------------------------------- */
+        pxor		mm0, mm0		;	/* Clear mm0 */
+        psubsw		mm0, mm6		;	/* mm0 = - id34 */
+				        ;	
+        psubsw		mm0, mm7		;	/* mm0 = - ( id34 + idcommon_product2 ) */
+        paddsw		mm6, mm6		;	
+        paddsw		mm6, mm0		;	/* mm6 = id34 - icommon_product2 */
+				        ;	
+        psubsw		mm4, mm1		;	/* mm4 = id07 - icommon_product1 */
+        paddsw		mm1, mm1		;	
+        paddsw		mm1, mm4		;	/* mm1 = id07 + icommon_product1 */
+	        ;	/* ------------------------------------------------------------------- */
+        movq		mm7, xC1S7		;	
+        movq		mm2, mm1		;	
+				        ;	
+        movq		mm3, mm1		;	
+        pmulhw		mm1, mm7		;	/* mm1 = xC1S7 * irot_input_x - irot_input_x */
+				        ;	
+        movq		mm7, xC7S1		;	
+        psrlw		mm2, 15		;	
+				        ;	
+        paddw		mm1, mm3		;	/* mm1 = xC1S7 * irot_input_x */
+        paddw		mm1, mm2		;	/* Trucated */
+				        ;	
+        pmulhw		mm3, mm7		;	/* mm3 = xC7S1 * irot_input_x */
+        paddw		mm3, mm2		;	/* Truncated */
+				        ;	
+        movq		mm5, mm0		;	
+        movq		mm2, mm0		;	
+				        ;	
+        movq		mm7, xC1S7		;	
+        pmulhw		mm0, mm7		;	/* mm0 = xC1S7 * irot_input_y - irot_input_y */
+				        ;	
+        movq		mm7, xC7S1		;	
+        psrlw		mm2, 15		;	
+				        ;	
+        paddw		mm0, mm5		;	/* mm0 = xC1S7 * irot_input_y */
+        paddw		mm0, mm2		;	/* Truncated */
+				        ;	
+        pmulhw		mm5, mm7		;	/* mm5 = xC7S1 * irot_input_y */
+        paddw		mm5, mm2		;	/* Truncated */
+				        ;	
+        psubsw		mm1, mm5		;	/* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1 */
+        paddsw		mm3, mm0		;	/* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7 */
+				        ;	
+        movq		[16 + eax], mm1		;	
+        movq		[48 + ebx], mm3		;	
+	        ;	/* ------------------------------------------------------------------- */
+        movq		mm0, xC3S5		;	
+        movq		mm1, xC5S3		;	
+				        ;	
+        movq		mm5, mm6		;	
+        movq		mm7, mm6		;	
+				        ;	
+        movq		mm2, mm4		;	
+        movq		mm3, mm4		;	
+				        ;	
+        pmulhw		mm4, mm0		;	/* mm4 = xC3S5 * irot_input_x - irot_input_x */
+        pmulhw		mm6, mm1		;	/* mm6 = xC5S3 * irot_input_y - irot_input_y */
+				        ;	
+        psrlw		mm2, 15		;	
+        psrlw		mm5, 15		;	
+				        ;	
+        paddw		mm4, mm3		;	/* mm4 = xC3S5 * irot_input_x */
+        paddw		mm6, mm7		;	/* mm6 = xC5S3 * irot_input_y */
+				        ;	
+        paddw		mm4, mm2		;	/* Truncated */
+        paddw		mm6, mm5		;	/* Truncated */
+				        ;	
+        psubsw		mm4, mm6		;	/* ip3 */
+        movq		[48 + eax], mm4		;	
+				        ;	
+        movq		mm4, mm3		;	
+        movq		mm6, mm7		;	
+				        ;	
+        pmulhw		mm3, mm1		;	/* mm3 = xC5S3 * irot_input_x - irot_input_x */
+        pmulhw		mm7, mm0		;	/* mm7 = xC3S5 * irot_input_y - irot_input_y */
+				        ;	
+        paddw		mm4, mm2		;	
+        paddw		mm6, mm5		;	
+				        ;	
+        paddw		mm3, mm4		;	/* mm3 = xC5S3 * irot_input_x */
+        paddw		mm7, mm6		;	/* mm7 = xC3S5 * irot_input_y */
+				        ;	
+        paddw		mm3, mm7		;	/* ip5 */
+        movq		[16 + ebx], mm3		;	
+
+};
+
+}
+
+
+static void fdct_short__mmx ( ogg_int16_t *InputData, ogg_int16_t *OutputData)
+{
+
+  static ogg_int16_t tmp[32];
+  ogg_int16_t* align_tmp = (ogg_int16_t*)((unsigned char*)tmp + (16 - ((int)tmp)&15));
+
+
+  Transpose_mmx(InputData, OutputData, InputData + 4, OutputData + 4);
+  Fdct_mmx(OutputData, OutputData + 4, align_tmp);
+
+  Transpose_mmx(InputData + 32, OutputData + 32, InputData + 36, OutputData + 36);
+  Fdct_mmx(OutputData+32, OutputData + 36, align_tmp);
+
+  Transpose_mmx(OutputData, OutputData, OutputData + 32, OutputData + 32);
+  Fdct_mmx(OutputData, OutputData + 32, align_tmp);
+
+  Transpose_mmx(OutputData + 4, OutputData + 4, OutputData + 36, OutputData + 36);
+  Fdct_mmx(OutputData + 4, OutputData + 36, align_tmp);
+
+  __asm     emms
+
+}
+
+void dsp_mmx_fdct_init(DspFunctions *funcs)
+{
+  TH_DEBUG("enabling accelerated x86_32 mmx fdct function.\n");
+  funcs->fdct_short = fdct_short__mmx;
+}

Added: trunk/theora/lib/x86_32_vs/recon_mmx.c
===================================================================
--- trunk/theora/lib/x86_32_vs/recon_mmx.c	2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/lib/x86_32_vs/recon_mmx.c	2006-05-28 18:18:26 UTC (rev 11451)
@@ -0,0 +1,198 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: reconstruct.c,v 1.6 2003/12/03 08:59:41 arc Exp $
+
+ ********************************************************************/
+
+#include "codec_internal.h"
+
+
+static const unsigned __int64 V128 = 0x8080808080808080LL;
+
+static void copy8x8__mmx (unsigned char *src,
+	                unsigned char *dest,
+	                unsigned int stride)
+{
+
+    //Is this even the fastest way to do this?
+    __asm {
+        align 16        
+
+        mov         eax, src
+        mov         ebx, dest
+        mov         ecx, stride
+
+        lea		    edi, [ecx + ecx * 2]
+        movq		mm0, [eax]
+        movq		mm1, [eax + ecx]
+        movq		mm2, [eax + ecx * 2]
+        movq		mm3, [eax + edi]
+        lea		    eax, [eax + ecx * 4]
+        movq		[ebx], mm0
+        movq		[ebx + ecx], mm1
+        movq		[ebx + ecx * 2], mm2
+        movq		[ebx + edi], mm3
+        lea		    ebx, [ebx + ecx * 4]
+        movq		mm0, [eax]
+        movq		mm1, [eax + ecx]
+        movq		mm2, [eax + ecx * 2]
+        movq		mm3, [eax + edi]
+        movq		[ebx], mm0
+        movq		[ebx + ecx], mm1
+        movq		[ebx + ecx * 2], mm2
+        movq		[ebx + edi], mm3
+
+    };
+
+}
+
+static void recon_intra8x8__mmx (unsigned char *ReconPtr, ogg_int16_t *ChangePtr,
+		      ogg_uint32_t LineStep)
+{
+
+    __asm {
+        align 16
+
+        mov         eax, ReconPtr
+        mov         ebx, ChangePtr
+        mov         ecx, LineStep
+
+        movq		mm0, V128
+
+        lea		    edi, [128 + ebx]
+    loop_start:	
+        movq		mm2, [ebx]
+
+        packsswb	mm2, [8 + ebx]
+        por		    mm0, mm0
+        pxor		mm2, mm0
+        lea		    ebx, [16 + ebx]
+        cmp		    ebx, edi
+
+        movq		[eax], mm2
+
+
+
+        lea		    eax, [eax + ecx]
+        jc		    loop_start
+
+
+    };
+    
+}
+
+
+
+
+
+static void recon_inter8x8__mmx (unsigned char *ReconPtr, unsigned char *RefPtr,
+		      ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
+{
+
+    __asm {
+
+        align 16
+
+        mov         eax, ReconPtr
+        mov         ebx, ChangePtr
+        mov         ecx, LineStep
+        mov         edx, RefPtr
+    
+        pxor		mm0, mm0
+        lea		    edi, [128 + ebx]
+
+    loop_start:
+        movq		mm2, [edx]
+
+        movq		mm4, [ebx]
+        movq		mm3, mm2
+        movq		mm5, [8 + ebx]
+        punpcklbw	mm2, mm0
+        paddsw		mm2, mm4
+        punpckhbw	mm3, mm0
+        paddsw		mm3, mm5
+        add		    edx, ecx
+        packuswb	mm2, mm3
+        lea		    ebx, [16 + ebx]
+        cmp		    ebx, edi
+
+        movq		[eax], mm2
+
+        lea		    eax, [eax + ecx]
+        jc		    loop_start
+
+    };
+}
+
+
+
+
+static void recon_inter8x8_half__mmx (unsigned char *ReconPtr, unsigned char *RefPtr1,
+		           unsigned char *RefPtr2, ogg_int16_t *ChangePtr,
+			   ogg_uint32_t LineStep)
+{
+    __asm {
+        align 16
+
+        mov     eax, ReconPtr
+        mov     ebx, ChangePtr
+        mov     ecx, RefPtr1
+        mov     edx, RefPtr2
+                
+        pxor		mm0, mm0
+        lea		edi, [128 + ebx]
+
+    loop_start:
+        movq		mm2, [ecx]
+        movq		mm4, [edx]
+
+        movq		mm3, mm2
+        punpcklbw		mm2, mm0
+        movq		mm5, mm4
+        movq		mm6, [ebx]
+        punpckhbw		mm3, mm0
+        movq		mm7, [8 + ebx]
+        punpcklbw		mm4, mm0
+        punpckhbw		mm5, mm0
+        paddw		mm2, mm4
+        paddw		mm3, mm5
+        psrlw		mm2, 1
+        psrlw		mm3, 1
+        paddw		mm2, mm6
+        paddw		mm3, mm7
+        lea		ebx, [16 + ebx]
+        packuswb		mm2, mm3
+        add		ecx, LineStep
+        add		edx, LineStep
+        movq		[eax], mm2
+        add		eax, LineStep
+        cmp		ebx, edi
+        jc		loop_start
+
+    };
+
+}
+
+
+
+
+void dsp_mmx_recon_init(DspFunctions *funcs)
+{
+  TH_DEBUG("enabling accelerated x86_32 mmx recon functions.\n");
+  funcs->copy8x8 = copy8x8__mmx;
+  funcs->recon_intra8x8 = recon_intra8x8__mmx;
+  funcs->recon_inter8x8 = recon_inter8x8__mmx;
+  funcs->recon_inter8x8_half = recon_inter8x8_half__mmx;
+}
+

Added: trunk/theora/win32/VS2005/dump_video/dump_video.vcproj
===================================================================
--- trunk/theora/win32/VS2005/dump_video/dump_video.vcproj	2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/win32/VS2005/dump_video/dump_video.vcproj	2006-05-28 18:18:26 UTC (rev 11451)
@@ -0,0 +1,215 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="8.00"
+	Name="dump_video"
+	ProjectGUID="{1A8CA99D-B6C7-48CB-B263-6CECDADF5FBF}"
+	RootNamespace="dump_video"
+	Keyword="Win32Proj"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include"
+				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="1"
+				UsePrecompiledHeader="0"
+				WarningLevel="3"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="4"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="2"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCWebDeploymentTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="1"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include"
+				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+				RuntimeLibrary="0"
+				UsePrecompiledHeader="0"
+				WarningLevel="3"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="1"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCWebDeploymentTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+			>
+			<File
+				RelativePath="..\..\..\examples\dump_video.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\getopt.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\getopt1.c"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+			>
+			<File
+				RelativePath="..\..\..\examples\getopt.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\getopt_win.h"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
+			>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>

Added: trunk/theora/win32/VS2005/encoder_example/encoder_example.vcproj
===================================================================
--- trunk/theora/win32/VS2005/encoder_example/encoder_example.vcproj	2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/win32/VS2005/encoder_example/encoder_example.vcproj	2006-05-28 18:18:26 UTC (rev 11451)
@@ -0,0 +1,211 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="8.00"
+	Name="encoder_example"
+	ProjectGUID="{AD710263-EBFA-4388-BAA9-AD73C32AFF26}"
+	RootNamespace="encoder_example"
+	Keyword="Win32Proj"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include;..\..\..\..\libvorbis\include"
+				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="1"
+				UsePrecompiledHeader="0"
+				WarningLevel="3"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="4"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="2"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCWebDeploymentTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+			IntermediateDirectory="$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="1"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				InlineFunctionExpansion="2"
+				EnableIntrinsicFunctions="true"
+				FavorSizeOrSpeed="1"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include;..\..\..\..\libvorbis\include"
+				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+				RuntimeLibrary="0"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="1"
+				GenerateDebugInformation="false"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				OptimizeForWindows98="1"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCWebDeploymentTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+			>
+			<File
+				RelativePath="..\..\..\examples\encoder_example.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\getopt.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\getopt1.c"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+			>
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
+			>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>

Modified: trunk/theora/win32/VS2005/libtheora/libtheora.vcproj
===================================================================
--- trunk/theora/win32/VS2005/libtheora/libtheora.vcproj	2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/win32/VS2005/libtheora/libtheora.vcproj	2006-05-28 18:18:26 UTC (rev 11451)
@@ -41,8 +41,8 @@
 			<Tool
 				Name="VCCLCompilerTool"
 				Optimization="0"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include"
-				PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include;..\..\..\lib"
+				PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS; THEORA_USE_ASM"
 				MinimalRebuild="true"
 				BasicRuntimeChecks="3"
 				RuntimeLibrary="1"
@@ -122,12 +122,12 @@
 			/>
 			<Tool
 				Name="VCCLCompilerTool"
-				Optimization="3"
+				Optimization="2"
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include"
-				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include;..\..\..\lib"
+				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS; THEORA_USE_ASM"
 				StringPooling="true"
 				ExceptionHandling="0"
 				RuntimeLibrary="0"
@@ -135,7 +135,7 @@
 				UsePrecompiledHeader="0"
 				WarningLevel="4"
 				Detect64BitPortabilityProblems="true"
-				DebugInformationFormat="3"
+				DebugInformationFormat="0"
 				CompileAs="1"
 				DisableSpecificWarnings="4244;4267;4057;4100;4245"
 			/>
@@ -153,7 +153,7 @@
 				OutputFile="$(OutDir)/libtheora.dll"
 				LinkIncremental="1"
 				ModuleDefinitionFile="..\..\libtheora.def"
-				GenerateDebugInformation="true"
+				GenerateDebugInformation="false"
 				SubSystem="2"
 				OptimizeReferences="2"
 				EnableCOMDATFolding="2"
@@ -216,8 +216,8 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include"
-				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include;..\..\..\lib"
+				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS; THEORA_USE_ASM"
 				StringPooling="true"
 				ExceptionHandling="0"
 				RuntimeLibrary="0"
@@ -307,8 +307,8 @@
 				InlineFunctionExpansion="2"
 				EnableIntrinsicFunctions="true"
 				FavorSizeOrSpeed="1"
-				AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include"
-				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS"
+				AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include;..\..\..\lib"
+				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS; THEORA_USE_ASM"
 				StringPooling="true"
 				ExceptionHandling="0"
 				RuntimeLibrary="0"
@@ -410,6 +410,10 @@
 				>
 			</File>
 			<File
+				RelativePath="..\..\..\lib\x86_32_vs\dsp_mmx.c"
+				>
+			</File>
+			<File
 				RelativePath="..\..\..\lib\encode.c"
 				>
 			</File>
@@ -418,6 +422,10 @@
 				>
 			</File>
 			<File
+				RelativePath="..\..\..\lib\x86_32_vs\fdct_mmx.c"
+				>
+			</File>
+			<File
 				RelativePath="..\..\..\lib\frarray.c"
 				>
 			</File>
@@ -458,6 +466,10 @@
 				>
 			</File>
 			<File
+				RelativePath="..\..\..\lib\x86_32_vs\recon_mmx.c"
+				>
+			</File>
+			<File
 				RelativePath="..\..\..\lib\reconstruct.c"
 				>
 			</File>

Added: trunk/theora/win32/getopt.c
===================================================================
--- trunk/theora/win32/getopt.c	2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/win32/getopt.c	2006-05-28 18:18:26 UTC (rev 11451)
@@ -0,0 +1,1047 @@
+/* Getopt for GNU.
+   NOTE: getopt is now part of the C library, so if you don't know what
+   "Keep this file name-space clean" means, talk to drepper at gnu.org
+   before changing it!
+
+   Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99
+   	Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
+   Ditto for AIX 3.2 and <stdlib.h>.  */
+#ifndef _NO_PROTO
+# define _NO_PROTO
+#endif
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#if !defined __STDC__ || !__STDC__
+/* This is a separate conditional since some stdc systems
+   reject `defined (const)'.  */
+# ifndef const
+#  define const
+# endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+   actually compiling the library itself.  This code is part of the GNU C
+   Library, but also included in many other GNU distributions.  Compiling
+   and linking in this code is a waste when using the GNU C library
+   (especially if it is a shared library).  Rather than having every GNU
+   program understand `configure --with-gnu-libc' and omit the object files,
+   it is simpler to just do this in the source for each such file.  */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
+# include <gnu-versions.h>
+# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+#  define ELIDE_CODE
+# endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+   to get __GNU_LIBRARY__ defined.  */
+#ifdef	__GNU_LIBRARY__
+/* Don't include stdlib.h for non-GNU C libraries because some of them
+   contain conflicting prototypes for getopt.  */
+# include <stdlib.h>
+# include <unistd.h>
+#endif	/* GNU C library.  */
+
+#ifdef VMS
+# include <unixlib.h>
+# if HAVE_STRING_H - 0
+#  include <string.h>
+# endif
+#endif
+
+#ifndef _
+/* This is for other GNU distributions with internationalized messages.
+   When compiling libc, the _ macro is predefined.  */
+# ifdef HAVE_LIBINTL_H
+#  include <libintl.h>
+#  define _(msgid)	gettext (msgid)
+# else
+#  define _(msgid)	(msgid)
+# endif
+#endif
+
+/* This version of `getopt' appears to the caller like standard Unix `getopt'
+   but it behaves differently for the user, since it allows the user
+   to intersperse the options with the other arguments.
+
+   As `getopt' works, it permutes the elements of ARGV so that,
+   when it is done, all the options precede everything else.  Thus
+   all application programs are extended to handle flexible argument order.
+
+   Setting the environment variable POSIXLY_CORRECT disables permutation.
+   Then the behavior is completely standard.
+
+   GNU application programs can use a third alternative mode in which
+   they can distinguish the relative order of options and other arguments.  */
+
+#include "getopt_win.h"
+
+/* For communication from `getopt' to the caller.
+   When `getopt' finds an option that takes an argument,
+   the argument value is returned here.
+   Also, when `ordering' is RETURN_IN_ORDER,
+   each non-option ARGV-element is returned here.  */
+
+char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+   This is used for communication to and from the caller
+   and for communication between successive calls to `getopt'.
+
+   On entry to `getopt', zero means this is the first call; initialize.
+
+   When `getopt' returns -1, this is the index of the first of the
+   non-option elements that the caller should itself scan.
+
+   Otherwise, `optind' communicates from one call to the next
+   how much of ARGV has been scanned so far.  */
+
+/* 1003.2 says this must be 1 before any call.  */
+int optind = 1;
+
+/* Formerly, initialization of getopt depended on optind==0, which
+   causes problems with re-calling getopt as programs generally don't
+   know that. */
+
+int __getopt_initialized;
+
+/* The next char to be scanned in the option-element
+   in which the last option character we returned was found.
+   This allows us to pick up the scan where we left off.
+
+   If this is zero, or a null string, it means resume the scan
+   by advancing to the next ARGV-element.  */
+
+static char *nextchar;
+
+/* Callers store zero here to inhibit the error message
+   for unrecognized options.  */
+
+int opterr = 1;
+
+/* Set to an option character which was unrecognized.
+   This must be initialized on some systems to avoid linking in the
+   system's own getopt implementation.  */
+
+int optopt = '?';
+
+/* Describe how to deal with options that follow non-option ARGV-elements.
+
+   If the caller did not specify anything,
+   the default is REQUIRE_ORDER if the environment variable
+   POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+   REQUIRE_ORDER means don't recognize them as options;
+   stop option processing when the first non-option is seen.
+   This is what Unix does.
+   This mode of operation is selected by either setting the environment
+   variable POSIXLY_CORRECT, or using `+' as the first character
+   of the list of option characters.
+
+   PERMUTE is the default.  We permute the contents of ARGV as we scan,
+   so that eventually all the non-options are at the end.  This allows options
+   to be given in any order, even with programs that were not written to
+   expect this.
+
+   RETURN_IN_ORDER is an option available to programs that were written
+   to expect options and other ARGV-elements in any order and that care about
+   the ordering of the two.  We describe each non-option ARGV-element
+   as if it were the argument of an option with character code 1.
+   Using `-' as the first character of the list of option characters
+   selects this mode of operation.
+
+   The special argument `--' forces an end of option-scanning regardless
+   of the value of `ordering'.  In the case of RETURN_IN_ORDER, only
+   `--' can cause `getopt' to return -1 with `optind' != ARGC.  */
+
+static enum
+{
+  REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+} ordering;
+
+/* Value of POSIXLY_CORRECT environment variable.  */
+static char *posixly_correct;
+
+#ifdef	__GNU_LIBRARY__
+/* We want to avoid inclusion of string.h with non-GNU libraries
+   because there are many ways it can cause trouble.
+   On some systems, it contains special magic macros that don't work
+   in GCC.  */
+# include <string.h>
+# define my_index	strchr
+#else
+
+#include <string.h>
+
+/* Avoid depending on library functions or files
+   whose names are inconsistent.  */
+
+#ifndef getenv
+extern char *getenv ();
+#endif
+
+static char *
+my_index (str, chr)
+     const char *str;
+     int chr;
+{
+  while (*str)
+    {
+      if (*str == chr)
+	return (char *) str;
+      str++;
+    }
+  return 0;
+}
+
+/* If using GCC, we can safely declare strlen this way.
+   If not using GCC, it is ok not to declare it.  */
+#ifdef __GNUC__
+/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
+   That was relevant to code that was here before.  */
+# if (!defined __STDC__ || !__STDC__) && !defined strlen
+/* gcc with -traditional declares the built-in strlen to return int,
+   and has done so at least since version 2.4.5. -- rms.  */
+extern int strlen (const char *);
+# endif /* not __STDC__ */
+#endif /* __GNUC__ */
+
+#endif /* not __GNU_LIBRARY__ */
+
+/* Handle permutation of arguments.  */
+
+/* Describe the part of ARGV that contains non-options that have
+   been skipped.  `first_nonopt' is the index in ARGV of the first of them;
+   `last_nonopt' is the index after the last of them.  */
+
+static int first_nonopt;
+static int last_nonopt;
+
+#ifdef _LIBC
+/* Bash 2.0 gives us an environment variable containing flags
+   indicating ARGV elements that should not be considered arguments.  */
+
+/* Defined in getopt_init.c  */
+extern char *__getopt_nonoption_flags;
+
+static int nonoption_flags_max_len;
+static int nonoption_flags_len;
+
+static int original_argc;
+static char *const *original_argv;
+
+/* Make sure the environment variable bash 2.0 puts in the environment
+   is valid for the getopt call we must make sure that the ARGV passed
+   to getopt is that one passed to the process.  */
+static void
+__attribute__ ((unused))
+store_args_and_env (int argc, char *const *argv)
+{
+  /* XXX This is no good solution.  We should rather copy the args so
+     that we can compare them later.  But we must not use malloc(3).  */
+  original_argc = argc;
+  original_argv = argv;
+}
+# ifdef text_set_element
+text_set_element (__libc_subinit, store_args_and_env);
+# endif /* text_set_element */
+
+# define SWAP_FLAGS(ch1, ch2) \
+  if (nonoption_flags_len > 0)						      \
+    {									      \
+      char __tmp = __getopt_nonoption_flags[ch1];			      \
+      __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2];	      \
+      __getopt_nonoption_flags[ch2] = __tmp;				      \
+    }
+#else	/* !_LIBC */
+# define SWAP_FLAGS(ch1, ch2)
+#endif	/* _LIBC */
+
+/* Exchange two adjacent subsequences of ARGV.
+   One subsequence is elements [first_nonopt,last_nonopt)
+   which contains all the non-options that have been skipped so far.
+   The other is elements [last_nonopt,optind), which contains all
+   the options processed since those non-options were skipped.
+
+   `first_nonopt' and `last_nonopt' are relocated so that they describe
+   the new indices of the non-options in ARGV after they are moved.  */
+
+#if defined __STDC__ && __STDC__
+static void exchange (char **);
+#endif
+
+static void
+exchange (argv)
+     char **argv;
+{
+  int bottom = first_nonopt;
+  int middle = last_nonopt;
+  int top = optind;
+  char *tem;
+
+  /* Exchange the shorter segment with the far end of the longer segment.
+     That puts the shorter segment into the right place.
+     It leaves the longer segment in the right place overall,
+     but it consists of two parts that need to be swapped next.  */
+
+#ifdef _LIBC
+  /* First make sure the handling of the `__getopt_nonoption_flags'
+     string can work normally.  Our top argument must be in the range
+     of the string.  */
+  if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len)
+    {
+      /* We must extend the array.  The user plays games with us and
+	 presents new arguments.  */
+      char *new_str = malloc (top + 1);
+      if (new_str == NULL)
+	nonoption_flags_len = nonoption_flags_max_len = 0;
+      else
+	{
+	  memset (__mempcpy (new_str, __getopt_nonoption_flags,
+			     nonoption_flags_max_len),
+		  '\0', top + 1 - nonoption_flags_max_len);
+	  nonoption_flags_max_len = top + 1;
+	  __getopt_nonoption_flags = new_str;
+	}
+    }
+#endif
+
+  while (top > middle && middle > bottom)
+    {
+      if (top - middle > middle - bottom)
+	{
+	  /* Bottom segment is the short one.  */
+	  int len = middle - bottom;
+	  register int i;
+
+	  /* Swap it with the top part of the top segment.  */
+	  for (i = 0; i < len; i++)
+	    {
+	      tem = argv[bottom + i];
+	      argv[bottom + i] = argv[top - (middle - bottom) + i];
+	      argv[top - (middle - bottom) + i] = tem;
+	      SWAP_FLAGS (bottom + i, top - (middle - bottom) + i);
+	    }
+	  /* Exclude the moved bottom segment from further swapping.  */
+	  top -= len;
+	}
+      else
+	{
+	  /* Top segment is the short one.  */
+	  int len = top - middle;
+	  register int i;
+
+	  /* Swap it with the bottom part of the bottom segment.  */
+	  for (i = 0; i < len; i++)
+	    {
+	      tem = argv[bottom + i];
+	      argv[bottom + i] = argv[middle + i];
+	      argv[middle + i] = tem;
+	      SWAP_FLAGS (bottom + i, middle + i);
+	    }
+	  /* Exclude the moved top segment from further swapping.  */
+	  bottom += len;
+	}
+    }
+
+  /* Update records for the slots the non-options now occupy.  */
+
+  first_nonopt += (optind - last_nonopt);
+  last_nonopt = optind;
+}
+
+/* Initialize the internal data when the first call is made.  */
+
+#if defined __STDC__ && __STDC__
+static const char *_getopt_initialize (int, char *const *, const char *);
+#endif
+static const char *
+_getopt_initialize (argc, argv, optstring)
+     int argc;
+     char *const *argv;
+     const char *optstring;
+{
+  /* Start processing options with ARGV-element 1 (since ARGV-element 0
+     is the program name); the sequence of previously skipped
+     non-option ARGV-elements is empty.  */
+
+  first_nonopt = last_nonopt = optind;
+
+  nextchar = NULL;
+
+  posixly_correct = getenv ("POSIXLY_CORRECT");
+
+  /* Determine how to handle the ordering of options and nonoptions.  */
+
+  if (optstring[0] == '-')
+    {
+      ordering = RETURN_IN_ORDER;
+      ++optstring;
+    }
+  else if (optstring[0] == '+')
+    {
+      ordering = REQUIRE_ORDER;
+      ++optstring;
+    }
+  else if (posixly_correct != NULL)
+    ordering = REQUIRE_ORDER;
+  else
+    ordering = PERMUTE;
+
+#ifdef _LIBC
+  if (posixly_correct == NULL
+      && argc == original_argc && argv == original_argv)
+    {
+      if (nonoption_flags_max_len == 0)
+	{
+	  if (__getopt_nonoption_flags == NULL
+	      || __getopt_nonoption_flags[0] == '\0')
+	    nonoption_flags_max_len = -1;
+	  else
+	    {
+	      const char *orig_str = __getopt_nonoption_flags;
+	      int len = nonoption_flags_max_len = strlen (orig_str);
+	      if (nonoption_flags_max_len < argc)
+		nonoption_flags_max_len = argc;
+	      __getopt_nonoption_flags =
+		(char *) malloc (nonoption_flags_max_len);
+	      if (__getopt_nonoption_flags == NULL)
+		nonoption_flags_max_len = -1;
+	      else
+		memset (__mempcpy (__getopt_nonoption_flags, orig_str, len),
+			'\0', nonoption_flags_max_len - len);
+	    }
+	}
+      nonoption_flags_len = nonoption_flags_max_len;
+    }
+  else
+    nonoption_flags_len = 0;
+#endif
+
+  return optstring;
+}
+
+/* Scan elements of ARGV (whose length is ARGC) for option characters
+   given in OPTSTRING.
+
+   If an element of ARGV starts with '-', and is not exactly "-" or "--",
+   then it is an option element.  The characters of this element
+   (aside from the initial '-') are option characters.  If `getopt'
+   is called repeatedly, it returns successively each of the option characters
+   from each of the option elements.
+
+   If `getopt' finds another option character, it returns that character,
+   updating `optind' and `nextchar' so that the next call to `getopt' can
+   resume the scan with the following option character or ARGV-element.
+
+   If there are no more option characters, `getopt' returns -1.
+   Then `optind' is the index in ARGV of the first ARGV-element
+   that is not an option.  (The ARGV-elements have been permuted
+   so that those that are not options now come last.)
+
+   OPTSTRING is a string containing the legitimate option characters.
+   If an option character is seen that is not listed in OPTSTRING,
+   return '?' after printing an error message.  If you set `opterr' to
+   zero, the error message is suppressed but we still return '?'.
+
+   If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+   so the following text in the same ARGV-element, or the text of the following
+   ARGV-element, is returned in `optarg'.  Two colons mean an option that
+   wants an optional arg; if there is text in the current ARGV-element,
+   it is returned in `optarg', otherwise `optarg' is set to zero.
+
+   If OPTSTRING starts with `-' or `+', it requests different methods of
+   handling the non-option ARGV-elements.
+   See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+   Long-named options begin with `--' instead of `-'.
+   Their names may be abbreviated as long as the abbreviation is unique
+   or is an exact match for some defined option.  If they have an
+   argument, it follows the option name in the same ARGV-element, separated
+   from the option name by a `=', or else the in next ARGV-element.
+   When `getopt' finds a long-named option, it returns 0 if that option's
+   `flag' field is nonzero, the value of the option's `val' field
+   if the `flag' field is zero.
+
+   The elements of ARGV aren't really const, because we permute them.
+   But we pretend they're const in the prototype to be compatible
+   with other systems.
+
+   LONGOPTS is a vector of `struct option' terminated by an
+   element containing a name which is zero.
+
+   LONGIND returns the index in LONGOPT of the long-named option found.
+   It is only valid when a long-named option has been found by the most
+   recent call.
+
+   If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+   long-named options.  */
+
+int
+_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
+     int argc;
+     char *const *argv;
+     const char *optstring;
+     const struct option *longopts;
+     int *longind;
+     int long_only;
+{
+  optarg = NULL;
+
+  if (optind == 0 || !__getopt_initialized)
+    {
+      if (optind == 0)
+	optind = 1;	/* Don't scan ARGV[0], the program name.  */
+      optstring = _getopt_initialize (argc, argv, optstring);
+      __getopt_initialized = 1;
+    }
+
+  /* Test whether ARGV[optind] points to a non-option argument.
+     Either it does not have option syntax, or there is an environment flag
+     from the shell indicating it is not an option.  The later information
+     is only used when the used in the GNU libc.  */
+#ifdef _LIBC
+# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0'	      \
+		      || (optind < nonoption_flags_len			      \
+			  && __getopt_nonoption_flags[optind] == '1'))
+#else
+# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0')
+#endif
+
+  if (nextchar == NULL || *nextchar == '\0')
+    {
+      /* Advance to the next ARGV-element.  */
+
+      /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
+	 moved back by the user (who may also have changed the arguments).  */
+      if (last_nonopt > optind)
+	last_nonopt = optind;
+      if (first_nonopt > optind)
+	first_nonopt = optind;
+
+      if (ordering == PERMUTE)
+	{
+	  /* If we have just processed some options following some non-options,
+	     exchange them so that the options come first.  */
+
+	  if (first_nonopt != last_nonopt && last_nonopt != optind)
+	    exchange ((char **) argv);
+	  else if (last_nonopt != optind)
+	    first_nonopt = optind;
+
+	  /* Skip any additional non-options
+	     and extend the range of non-options previously skipped.  */
+
+	  while (optind < argc && NONOPTION_P)
+	    optind++;
+	  last_nonopt = optind;
+	}
+
+      /* The special ARGV-element `--' means premature end of options.
+	 Skip it like a null option,
+	 then exchange with previous non-options as if it were an option,
+	 then skip everything else like a non-option.  */
+
+      if (optind != argc && !strcmp (argv[optind], "--"))
+	{
+	  optind++;
+
+	  if (first_nonopt != last_nonopt && last_nonopt != optind)
+	    exchange ((char **) argv);
+	  else if (first_nonopt == last_nonopt)
+	    first_nonopt = optind;
+	  last_nonopt = argc;
+
+	  optind = argc;
+	}
+
+      /* If we have done all the ARGV-elements, stop the scan
+	 and back over any non-options that we skipped and permuted.  */
+
+      if (optind == argc)
+	{
+	  /* Set the next-arg-index to point at the non-options
+	     that we previously skipped, so the caller will digest them.  */
+	  if (first_nonopt != last_nonopt)
+	    optind = first_nonopt;
+	  return -1;
+	}
+
+      /* If we have come to a non-option and did not permute it,
+	 either stop the scan or describe it to the caller and pass it by.  */
+
+      if (NONOPTION_P)
+	{
+	  if (ordering == REQUIRE_ORDER)
+	    return -1;
+	  optarg = argv[optind++];
+	  return 1;
+	}
+
+      /* We have found another option-ARGV-element.
+	 Skip the initial punctuation.  */
+
+      nextchar = (argv[optind] + 1
+		  + (longopts != NULL && argv[optind][1] == '-'));
+    }
+
+  /* Decode the current option-ARGV-element.  */
+
+  /* Check whether the ARGV-element is a long option.
+
+     If long_only and the ARGV-element has the form "-f", where f is
+     a valid short option, don't consider it an abbreviated form of
+     a long option that starts with f.  Otherwise there would be no
+     way to give the -f short option.
+
+     On the other hand, if there's a long option "fubar" and
+     the ARGV-element is "-fu", do consider that an abbreviation of
+     the long option, just like "--fu", and not "-f" with arg "u".
+
+     This distinction seems to be the most useful approach.  */
+
+  if (longopts != NULL
+      && (argv[optind][1] == '-'
+	  || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1])))))
+    {
+      char *nameend;
+      const struct option *p;
+      const struct option *pfound = NULL;
+      int exact = 0;
+      int ambig = 0;
+      int indfound = -1;
+      int option_index;
+
+      for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
+	/* Do nothing.  */ ;
+
+      /* Test all long options for either exact match
+	 or abbreviated matches.  */
+      for (p = longopts, option_index = 0; p->name; p++, option_index++)
+	if (!strncmp (p->name, nextchar, nameend - nextchar))
+	  {
+	    if ((unsigned int) (nameend - nextchar)
+		== (unsigned int) strlen (p->name))
+	      {
+		/* Exact match found.  */
+		pfound = p;
+		indfound = option_index;
+		exact = 1;
+		break;
+	      }
+	    else if (pfound == NULL)
+	      {
+		/* First nonexact match found.  */
+		pfound = p;
+		indfound = option_index;
+	      }
+	    else
+	      /* Second or later nonexact match found.  */
+	      ambig = 1;
+	  }
+
+      if (ambig && !exact)
+	{
+	  if (opterr)
+	    fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
+		     argv[0], argv[optind]);
+	  nextchar += strlen (nextchar);
+	  optind++;
+	  optopt = 0;
+	  return '?';
+	}
+
+      if (pfound != NULL)
+	{
+	  option_index = indfound;
+	  optind++;
+	  if (*nameend)
+	    {
+	      /* Don't test has_arg with >, because some C compilers don't
+		 allow it to be used on enums.  */
+	      if (pfound->has_arg)
+		optarg = nameend + 1;
+	      else
+		{
+		  if (opterr)
+		    {
+		      if (argv[optind - 1][1] == '-')
+			/* --option */
+			fprintf (stderr,
+				 _("%s: option `--%s' doesn't allow an argument\n"),
+				 argv[0], pfound->name);
+		      else
+			/* +option or -option */
+			fprintf (stderr,
+				 _("%s: option `%c%s' doesn't allow an argument\n"),
+				 argv[0], argv[optind - 1][0], pfound->name);
+		    }
+
+		  nextchar += strlen (nextchar);
+
+		  optopt = pfound->val;
+		  return '?';
+		}
+	    }
+	  else if (pfound->has_arg == 1)
+	    {
+	      if (optind < argc)
+		optarg = argv[optind++];
+	      else
+		{
+		  if (opterr)
+		    fprintf (stderr,
+			   _("%s: option `%s' requires an argument\n"),
+			   argv[0], argv[optind - 1]);
+		  nextchar += strlen (nextchar);
+		  optopt = pfound->val;
+		  return optstring[0] == ':' ? ':' : '?';
+		}
+	    }
+	  nextchar += strlen (nextchar);
+	  if (longind != NULL)
+	    *longind = option_index;
+	  if (pfound->flag)
+	    {
+	      *(pfound->flag) = pfound->val;
+	      return 0;
+	    }
+	  return pfound->val;
+	}
+
+      /* Can't find it as a long option.  If this is not getopt_long_only,
+	 or the option starts with '--' or is not a valid short
+	 option, then it's an error.
+	 Otherwise interpret it as a short option.  */
+      if (!long_only || argv[optind][1] == '-'
+	  || my_index (optstring, *nextchar) == NULL)
+	{
+	  if (opterr)
+	    {
+	      if (argv[optind][1] == '-')
+		/* --option */
+		fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
+			 argv[0], nextchar);
+	      else
+		/* +option or -option */
+		fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
+			 argv[0], argv[optind][0], nextchar);
+	    }
+	  nextchar = (char *) "";
+	  optind++;
+	  optopt = 0;
+	  return '?';
+	}
+    }
+
+  /* Look at and handle the next short option-character.  */
+
+  {
+    char c = *nextchar++;
+    char *temp = my_index (optstring, c);
+
+    /* Increment `optind' when we start to process its last character.  */
+    if (*nextchar == '\0')
+      ++optind;
+
+    if (temp == NULL || c == ':')
+      {
+	if (opterr)
+	  {
+	    if (posixly_correct)
+	      /* 1003.2 specifies the format of this message.  */
+	      fprintf (stderr, _("%s: illegal option -- %c\n"),
+		       argv[0], c);
+	    else
+	      fprintf (stderr, _("%s: invalid option -- %c\n"),
+		       argv[0], c);
+	  }
+	optopt = c;
+	return '?';
+      }
+    /* Convenience. Treat POSIX -W foo same as long option --foo */
+    if (temp[0] == 'W' && temp[1] == ';')
+      {
+	char *nameend;
+	const struct option *p;
+	const struct option *pfound = NULL;
+	int exact = 0;
+	int ambig = 0;
+	int indfound = 0;
+	int option_index;
+
+	/* This is an option that requires an argument.  */
+	if (*nextchar != '\0')
+	  {
+	    optarg = nextchar;
+	    /* If we end this ARGV-element by taking the rest as an arg,
+	       we must advance to the next element now.  */
+	    optind++;
+	  }
+	else if (optind == argc)
+	  {
+	    if (opterr)
+	      {
+		/* 1003.2 specifies the format of this message.  */
+		fprintf (stderr, _("%s: option requires an argument -- %c\n"),
+			 argv[0], c);
+	      }
+	    optopt = c;
+	    if (optstring[0] == ':')
+	      c = ':';
+	    else
+	      c = '?';
+	    return c;
+	  }
+	else
+	  /* We already incremented `optind' once;
+	     increment it again when taking next ARGV-elt as argument.  */
+	  optarg = argv[optind++];
+
+	/* optarg is now the argument, see if it's in the
+	   table of longopts.  */
+
+	for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++)
+	  /* Do nothing.  */ ;
+
+	/* Test all long options for either exact match
+	   or abbreviated matches.  */
+	for (p = longopts, option_index = 0; p->name; p++, option_index++)
+	  if (!strncmp (p->name, nextchar, nameend - nextchar))
+	    {
+	      if ((unsigned int) (nameend - nextchar) == strlen (p->name))
+		{
+		  /* Exact match found.  */
+		  pfound = p;
+		  indfound = option_index;
+		  exact = 1;
+		  break;
+		}
+	      else if (pfound == NULL)
+		{
+		  /* First nonexact match found.  */
+		  pfound = p;
+		  indfound = option_index;
+		}
+	      else
+		/* Second or later nonexact match found.  */
+		ambig = 1;
+	    }
+	if (ambig && !exact)
+	  {
+	    if (opterr)
+	      fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
+		       argv[0], argv[optind]);
+	    nextchar += strlen (nextchar);
+	    optind++;
+	    return '?';
+	  }
+	if (pfound != NULL)
+	  {
+	    option_index = indfound;
+	    if (*nameend)
+	      {
+		/* Don't test has_arg with >, because some C compilers don't
+		   allow it to be used on enums.  */
+		if (pfound->has_arg)
+		  optarg = nameend + 1;
+		else
+		  {
+		    if (opterr)
+		      fprintf (stderr, _("\
+%s: option `-W %s' doesn't allow an argument\n"),
+			       argv[0], pfound->name);
+
+		    nextchar += strlen (nextchar);
+		    return '?';
+		  }
+	      }
+	    else if (pfound->has_arg == 1)
+	      {
+		if (optind < argc)
+		  optarg = argv[optind++];
+		else
+		  {
+		    if (opterr)
+		      fprintf (stderr,
+			       _("%s: option `%s' requires an argument\n"),
+			       argv[0], argv[optind - 1]);
+		    nextchar += strlen (nextchar);
+		    return optstring[0] == ':' ? ':' : '?';
+		  }
+	      }
+	    nextchar += strlen (nextchar);
+	    if (longind != NULL)
+	      *longind = option_index;
+	    if (pfound->flag)
+	      {
+		*(pfound->flag) = pfound->val;
+		return 0;
+	      }
+	    return pfound->val;
+	  }
+	  nextchar = NULL;
+	  return 'W';	/* Let the application handle it.   */
+      }
+    if (temp[1] == ':')
+      {
+	if (temp[2] == ':')
+	  {
+	    /* This is an option that accepts an argument optionally.  */
+	    if (*nextchar != '\0')
+	      {
+		optarg = nextchar;
+		optind++;
+	      }
+	    else
+	      optarg = NULL;
+	    nextchar = NULL;
+	  }
+	else
+	  {
+	    /* This is an option that requires an argument.  */
+	    if (*nextchar != '\0')
+	      {
+		optarg = nextchar;
+		/* If we end this ARGV-element by taking the rest as an arg,
+		   we must advance to the next element now.  */
+		optind++;
+	      }
+	    else if (optind == argc)
+	      {
+		if (opterr)
+		  {
+		    /* 1003.2 specifies the format of this message.  */
+		    fprintf (stderr,
+			   _("%s: option requires an argument -- %c\n"),
+			   argv[0], c);
+		  }
+		optopt = c;
+		if (optstring[0] == ':')
+		  c = ':';
+		else
+		  c = '?';
+	      }
+	    else
+	      /* We already incremented `optind' once;
+		 increment it again when taking next ARGV-elt as argument.  */
+	      optarg = argv[optind++];
+	    nextchar = NULL;
+	  }
+      }
+    return c;
+  }
+}
+
+int
+getopt (argc, argv, optstring)
+     int argc;
+     char *const *argv;
+     const char *optstring;
+{
+  return _getopt_internal (argc, argv, optstring,
+			   (const struct option *) 0,
+			   (int *) 0,
+			   0);
+}
+
+#endif	/* Not ELIDE_CODE.  */
+
+#ifdef TEST
+
+/* Compile with -DTEST to make an executable for use in testing
+   the above definition of `getopt'.  */
+
+int
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  int c;
+  int digit_optind = 0;
+
+  while (1)
+    {
+      int this_option_optind = optind ? optind : 1;
+
+      c = getopt (argc, argv, "abc:d:0123456789");
+      if (c == -1)
+	break;
+
+      switch (c)
+	{
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+	case '8':
+	case '9':
+	  if (digit_optind != 0 && digit_optind != this_option_optind)
+	    printf ("digits occur in two different argv-elements.\n");
+	  digit_optind = this_option_optind;
+	  printf ("option %c\n", c);
+	  break;
+
+	case 'a':
+	  printf ("option a\n");
+	  break;
+
+	case 'b':
+	  printf ("option b\n");
+	  break;
+
+	case 'c':
+	  printf ("option c with value `%s'\n", optarg);
+	  break;
+
+	case '?':
+	  break;
+
+	default:
+	  printf ("?? getopt returned character code 0%o ??\n", c);
+	}
+    }
+
+  if (optind < argc)
+    {
+      printf ("non-option ARGV-elements: ");
+      while (optind < argc)
+	printf ("%s ", argv[optind++]);
+      printf ("\n");
+    }
+
+  exit (0);
+}
+
+#endif /* TEST */

Added: trunk/theora/win32/getopt1.c
===================================================================
--- trunk/theora/win32/getopt1.c	2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/win32/getopt1.c	2006-05-28 18:18:26 UTC (rev 11451)
@@ -0,0 +1,188 @@
+/* getopt_long and getopt_long_only entry points for GNU getopt.
+   Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98
+     Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "getopt_win.h"
+
+#if !defined __STDC__ || !__STDC__
+/* This is a separate conditional since some stdc systems
+   reject `defined (const)'.  */
+#ifndef const
+#define const
+#endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+   actually compiling the library itself.  This code is part of the GNU C
+   Library, but also included in many other GNU distributions.  Compiling
+   and linking in this code is a waste when using the GNU C library
+   (especially if it is a shared library).  Rather than having every GNU
+   program understand `configure --with-gnu-libc' and omit the object files,
+   it is simpler to just do this in the source for each such file.  */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
+#include <gnu-versions.h>
+#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+#define ELIDE_CODE
+#endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+   to get __GNU_LIBRARY__ defined.  */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#endif
+
+#ifndef	NULL
+#define NULL 0
+#endif
+
+int
+getopt_long (argc, argv, options, long_options, opt_index)
+     int argc;
+     char *const *argv;
+     const char *options;
+     const struct option *long_options;
+     int *opt_index;
+{
+  return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+   If an option that starts with '-' (not '--') doesn't match a long option,
+   but does match a short option, it is parsed as a short option
+   instead.  */
+
+int
+getopt_long_only (argc, argv, options, long_options, opt_index)
+     int argc;
+     char *const *argv;
+     const char *options;
+     const struct option *long_options;
+     int *opt_index;
+{
+  return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
+}
+
+
+#endif	/* Not ELIDE_CODE.  */
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  int c;
+  int digit_optind = 0;
+
+  while (1)
+    {
+      int this_option_optind = optind ? optind : 1;
+      int option_index = 0;
+      static struct option long_options[] =
+      {
+	{"add", 1, 0, 0},
+	{"append", 0, 0, 0},
+	{"delete", 1, 0, 0},
+	{"verbose", 0, 0, 0},
+	{"create", 0, 0, 0},
+	{"file", 1, 0, 0},
+	{0, 0, 0, 0}
+      };
+
+      c = getopt_long (argc, argv, "abc:d:0123456789",
+		       long_options, &option_index);
+      if (c == -1)
+	break;
+
+      switch (c)
+	{
+	case 0:
+	  printf ("option %s", long_options[option_index].name);
+	  if (optarg)
+	    printf (" with arg %s", optarg);
+	  printf ("\n");
+	  break;
+
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+	case '8':
+	case '9':
+	  if (digit_optind != 0 && digit_optind != this_option_optind)
+	    printf ("digits occur in two different argv-elements.\n");
+	  digit_optind = this_option_optind;
+	  printf ("option %c\n", c);
+	  break;
+
+	case 'a':
+	  printf ("option a\n");
+	  break;
+
+	case 'b':
+	  printf ("option b\n");
+	  break;
+
+	case 'c':
+	  printf ("option c with value `%s'\n", optarg);
+	  break;
+
+	case 'd':
+	  printf ("option d with value `%s'\n", optarg);
+	  break;
+
+	case '?':
+	  break;
+
+	default:
+	  printf ("?? getopt returned character code 0%o ??\n", c);
+	}
+    }
+
+  if (optind < argc)
+    {
+      printf ("non-option ARGV-elements: ");
+      while (optind < argc)
+	printf ("%s ", argv[optind++]);
+      printf ("\n");
+    }
+
+  exit (0);
+}
+
+#endif /* TEST */



More information about the commits mailing list