[xiph-commits] r11451 - in trunk/theora: examples lib lib/x86_32_vs
win32 win32/VS2005 win32/VS2005/dump_video
win32/VS2005/encoder_example win32/VS2005/libtheora
illiminable at svn.xiph.org
illiminable at svn.xiph.org
Sun May 28 11:18:47 PDT 2006
Author: illiminable
Date: 2006-05-28 11:18:26 -0700 (Sun, 28 May 2006)
New Revision: 11451
Added:
trunk/theora/lib/x86_32_vs/
trunk/theora/lib/x86_32_vs/cpu.c
trunk/theora/lib/x86_32_vs/dsp_mmx.c
trunk/theora/lib/x86_32_vs/fdct_mmx.c
trunk/theora/lib/x86_32_vs/recon_mmx.c
trunk/theora/win32/VS2005/dump_video/
trunk/theora/win32/VS2005/dump_video/dump_video.vcproj
trunk/theora/win32/VS2005/encoder_example/
trunk/theora/win32/VS2005/encoder_example/encoder_example.vcproj
trunk/theora/win32/getopt.c
trunk/theora/win32/getopt1.c
Modified:
trunk/theora/examples/dump_video.c
trunk/theora/examples/encoder_example.c
trunk/theora/win32/VS2005/libtheora/libtheora.vcproj
Log:
* Make dump vid build in win32
* MAke encoder example build in win32
* Add converted intel/inline masm mmx code
* Change some <> to "" conditionally for windows when including getopt, since the default copy in lib theora doesn't build on windows.
* PUt a copy of getopt that actually builds on windows in win32 - probably no reason we can't use this version as the main version (i copied it from libspeex, since i knew that one worked)
* Add a dump_video vs2005 project
* Add an encoder example vs2005 project
* Add a macro for performance data logging (currently only win32)
Modified: trunk/theora/examples/dump_video.c
===================================================================
--- trunk/theora/examples/dump_video.c 2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/examples/dump_video.c 2006-05-28 18:18:26 UTC (rev 11451)
@@ -28,7 +28,9 @@
#define _FILE_OFFSET_BITS 64
#include <stdio.h>
+#ifndef WIN32
#include <unistd.h>
+#endif
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
Modified: trunk/theora/examples/encoder_example.c
===================================================================
--- trunk/theora/examples/encoder_example.c 2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/examples/encoder_example.c 2006-05-28 18:18:26 UTC (rev 11451)
@@ -21,8 +21,15 @@
#define _LARGEFILE64_SOURCE
#define _FILE_OFFSET_BITS 64
+/* Define to give performance data win32 only*/
+//#define THEORA_PERF_DATA
+#ifdef THEORA_PERF_DATA
+#include <windows.h>
+#endif
+
#ifdef HAVE_CONFIG_H
# include <config.h>
+
#endif
#ifndef _REENTRANT
@@ -30,10 +37,16 @@
#endif
#include <stdio.h>
+#ifndef WIN32
#include <unistd.h>
+#endif
#include <stdlib.h>
#include <string.h>
+#ifndef WIN32
#include <getopt.h>
+#else
+#include "getopt.h"
+#endif
#include <time.h>
#include <math.h>
#include "theora/theora.h"
@@ -537,15 +550,30 @@
ogg_int64_t video_bytesout=0;
double timebase;
+
FILE* outfile = stdout;
-#ifdef _WIN32 /* We need to set stdin/stdout to binary mode. Damn windows. */
+#ifdef _WIN32
+# ifdef THEORA_PERF_DATA
+ LARGE_INTEGER start_time;
+ LARGE_INTEGER final_time;
+
+ LONGLONG elapsed_ticks;
+ LARGE_INTEGER ticks_per_second;
+
+ LONGLONG elapsed_secs;
+ LONGLONG elapsed_sec_mod;
+ double elapsed_secs_dbl ;
+# endif
+ /* We need to set stdin/stdout to binary mode. Damn windows. */
/* if we were reading/writing a file, it would also need to in
binary mode, eg, fopen("file.wav","wb"); */
/* Beware the evil ifdef. We avoid these where we can, but this one we
cannot. Don't add any more, you'll probably go to hell if you do. */
_setmode( _fileno( stdin ), _O_BINARY );
_setmode( _fileno( stdout ), _O_BINARY );
+
+
#endif
while((c=getopt_long(argc,argv,optstring,options,&long_option_index))!=EOF){
@@ -621,6 +649,15 @@
optind++;
}
+
+
+#ifdef THEORA_PERF_DATA
+# ifdef WIN32
+ QueryPerformanceCounter(&start_time);
+# endif
+#endif
+
+
/* yayness. Set up Ogg output stream */
srand(time(NULL));
{
@@ -848,6 +885,23 @@
fprintf(stderr,"\r \ndone.\n\n");
+#ifdef THEORA_PERF_DATA
+# ifdef WIN32
+ QueryPerformanceCounter(&final_time);
+ elapsed_ticks = final_time.QuadPart - start_time.QuadPart;
+ ticks_per_second;
+ QueryPerformanceFrequency(&ticks_per_second);
+ elapsed_secs = elapsed_ticks / ticks_per_second.QuadPart;
+ elapsed_sec_mod = elapsed_ticks % ticks_per_second.QuadPart;
+ elapsed_secs_dbl = elapsed_secs;
+ elapsed_secs_dbl += ((double)elapsed_sec_mod / (double)ticks_per_second.QuadPart);
+ printf("Encode time = %lld ticks\n", elapsed_ticks);
+ printf("~%lld and %lld / %lld seconds\n", elapsed_secs, elapsed_sec_mod, ticks_per_second.QuadPart);
+ printf("~%Lf seconds\n", elapsed_secs_dbl);
+# endif
+
+#endif
+
return(0);
}
Added: trunk/theora/lib/x86_32_vs/cpu.c
===================================================================
--- trunk/theora/lib/x86_32_vs/cpu.c 2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/lib/x86_32_vs/cpu.c 2006-05-28 18:18:26 UTC (rev 11451)
@@ -0,0 +1,215 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
+ * by the Xiph.Org Foundation http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function:
+ last mod: $Id: mcomp.c,v 1.8 2003/12/03 08:59:41 arc Exp $
+
+ ********************************************************************/
+
+#include "cpu.h"
+
+void
+cpuid(ogg_int32_t op, ogg_uint32_t *out_eax, ogg_uint32_t *out_ebx, ogg_uint32_t *out_ecx, ogg_uint32_t *out_edx)
+{
+
+
+#if defined(THEORA_USE_ASM)
+
+
+ ogg_uint32_t my_eax, my_ebx, my_ecx, my_edx;
+ __asm {
+ //push ebx
+ mov eax, op
+ cpuid
+ mov my_eax, eax
+ mov my_ebx, ebx
+ mov my_ecx, ecx
+ mov my_edx, edx
+
+
+
+ };
+
+ *out_eax = my_eax;
+ *out_ebx = my_ebx;
+ *out_ecx = my_ecx;
+ *out_edx = my_edx;
+
+#endif
+//#if defined(__x86_64__)
+// asm volatile ("pushq %%rbx \n\t"
+// "cpuid \n\t"
+// "movl %%ebx,%1 \n\t"
+// "popq %%rbx"
+// : "=a" (*eax),
+// "=r" (*ebx),
+// "=c" (*ecx),
+// "=d" (*edx)
+// : "a" (op)
+// : "cc");
+//#elif defined(__i386__)
+// asm volatile ("pushl %%ebx \n\t"
+// "cpuid \n\t"
+// "movl %%ebx,%1 \n\t"
+// "popl %%ebx"
+// : "=a" (*eax),
+// "=r" (*ebx),
+// "=c" (*ecx),
+// "=d" (*edx)
+// : "a" (op)
+// : "cc");
+//#endif
+}
+
+//#if defined(__x86_64__) || defined(__i386__)
+
+static ogg_uint32_t cpu_get_flags (void)
+{
+
+#if !defined(THEORA_USE_ASM)
+ return 0;
+#else
+
+ ogg_uint32_t my_eax, my_ebx, my_ecx, my_edx, flags = 0;
+ //__asm {
+
+ // pushf
+ // pushf
+ // pop eax
+ // mov ebx, eax
+ // xor eax, 200000h
+ // push eax
+ // popf
+ // pushf
+ // pop eax
+ // popf
+ // mov my_eax, eax
+ // mov my_ebx, ebx
+ //};
+
+ //if (my_eax == my_ebx)
+ // return 0;
+
+ /*cpuid(0, &eax, &ebx, &ecx, &edx); */
+ /* Intel */
+ cpuid(1, &my_eax, &my_ebx, &my_ecx, &my_edx);
+ if ((my_edx & 0x00800000) == 0)
+ return 0;
+ flags |= CPU_X86_MMX;
+ if (my_edx & 0x02000000)
+ flags |= CPU_X86_MMXEXT | CPU_X86_SSE;
+ if (my_edx & 0x04000000)
+ flags |= CPU_X86_SSE2;
+
+ /* AMD */
+ cpuid(0x80000000, &my_eax, &my_ebx, &my_ecx, &my_edx);
+ if(my_eax >= 0x80000001) {
+ cpuid(0x80000001, &my_eax, &my_ebx, &my_ecx, &my_edx);
+ if ((my_edx & 0x00800000) != 0) {
+ flags |= CPU_X86_MMX;
+ if (my_edx & 0x80000000)
+ flags |= CPU_X86_3DNOW;
+ if (my_edx & 0x40000000)
+ flags |= CPU_X86_3DNOWEXT;
+ if (my_edx & 0x00400000)
+ flags |= CPU_X86_MMXEXT;
+ }
+ }
+
+ return flags;
+
+#endif
+
+
+
+
+
+}
+
+// ogg_uint32_t eax, ebx, ecx, edx;
+// ogg_uint32_t flags = 0;
+//
+// /* check for cpuid support on i386 */
+//#if defined(__i386__)
+// asm volatile ("pushfl \n\t"
+// "pushfl \n\t"
+// "popl %0 \n\t"
+// "movl %0,%1 \n\t"
+// "xorl $0x200000,%0 \n\t"
+// "pushl %0 \n\t"
+// "popfl \n\t"
+// "pushfl \n\t"
+// "popl %0 \n\t"
+// "popfl"
+// : "=r" (eax),
+// "=r" (ebx)
+// :
+// : "cc");
+//
+// if (eax == ebx) /* no cpuid */
+// return 0;
+//#endif
+//
+// /*cpuid(0, &eax, &ebx, &ecx, &edx); */
+// /* Intel */
+// cpuid(1, &eax, &ebx, &ecx, &edx);
+// if ((edx & 0x00800000) == 0)
+// return 0;
+// flags |= CPU_X86_MMX;
+// if (edx & 0x02000000)
+// flags |= CPU_X86_MMXEXT | CPU_X86_SSE;
+// if (edx & 0x04000000)
+// flags |= CPU_X86_SSE2;
+//
+// /* AMD */
+// cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
+// if(eax >= 0x80000001) {
+// cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+// if ((edx & 0x00800000) != 0) {
+// flags |= CPU_X86_MMX;
+// if (edx & 0x80000000)
+// flags |= CPU_X86_3DNOW;
+// if (edx & 0x40000000)
+// flags |= CPU_X86_3DNOWEXT;
+// if (edx & 0x00400000)
+// flags |= CPU_X86_MMXEXT;
+// }
+// }
+//
+// return flags;
+//}
+//
+//#else /* not an i386 or x86_64 */
+//
+//static ogg_uint32_t cpu_get_flags (void) {
+// return 0;
+//}
+//
+//#endif
+
+ogg_uint32_t cpu_init (void)
+{
+ ogg_uint32_t cpu_flags = cpu_get_flags();
+
+ if (cpu_flags) {
+ TH_DEBUG("vectorized instruction sets supported:");
+ if (cpu_flags & CPU_X86_MMX) TH_DEBUG(" mmx");
+ if (cpu_flags & CPU_X86_MMXEXT) TH_DEBUG(" mmxext");
+ if (cpu_flags & CPU_X86_SSE) TH_DEBUG(" sse");
+ if (cpu_flags & CPU_X86_SSE2) TH_DEBUG(" sse2");
+ if (cpu_flags & CPU_X86_3DNOW) TH_DEBUG(" 3dnow");
+ if (cpu_flags & CPU_X86_3DNOWEXT) TH_DEBUG(" 3dnowext");
+ TH_DEBUG("\n");
+ }
+
+ return cpu_flags;
+}
Added: trunk/theora/lib/x86_32_vs/dsp_mmx.c
===================================================================
--- trunk/theora/lib/x86_32_vs/dsp_mmx.c 2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/lib/x86_32_vs/dsp_mmx.c 2006-05-28 18:18:26 UTC (rev 11451)
@@ -0,0 +1,1606 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
+ * by the Xiph.Org Foundation http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function:
+ last mod: $Id: mcomp.c,v 1.8 2003/12/03 08:59:41 arc Exp $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+
+#include "codec_internal.h"
+#include "dsp.h"
+
+#if 0
+//These are to let me selectively enable the C versions, these are needed
+#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
+#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b)))
+#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
+#endif
+
+
+static const ogg_int64_t V128 = 0x0080008000800080LL;
+
+static void sub8x8__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr,
+ ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine,
+ ogg_uint32_t ReconPixelsPerLine)
+{
+
+ //Make non-zero to use the C-version
+#if 0
+ int i;
+
+ /* For each block row */
+ for (i=8; i; i--) {
+ DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], ReconPtr[0]);
+ DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], ReconPtr[1]);
+ DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], ReconPtr[2]);
+ DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], ReconPtr[3]);
+ DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], ReconPtr[4]);
+ DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], ReconPtr[5]);
+ DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], ReconPtr[6]);
+ DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], ReconPtr[7]);
+
+ /* Start next row */
+ FiltPtr += PixelsPerLine;
+ ReconPtr += ReconPixelsPerLine;
+ DctInputPtr += 8;
+ }
+#else
+ __asm {
+ align 16
+
+ pxor mm7, mm7
+
+ mov eax, FiltPtr
+ mov ebx, ReconPtr
+ mov edx, DctInputPtr
+
+ /* You can't use rept in inline masm and macro parsing seems screwed with inline asm*/
+
+ /* ITERATION 1 */
+ movq mm0, [eax] /* mm0 = FiltPtr */
+ movq mm1, [ebx] /* mm1 = ReconPtr */
+ movq mm2, mm0 /* dup to prepare for up conversion */
+ movq mm3, mm1 /* dup to prepare for up conversion */
+ /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
+ punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
+ punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
+ punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
+ /* start calculation */
+ psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
+ psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
+ movq [edx], mm0 /* write answer out */
+ movq [8 + edx], mm2 /* write answer out */
+ /* Increment pointers */
+ add edx, 16
+ add eax, PixelsPerLine
+ add ebx, ReconPixelsPerLine
+
+
+ /* ITERATION 2 */
+ movq mm0, [eax] /* mm0 = FiltPtr */
+ movq mm1, [ebx] /* mm1 = ReconPtr */
+ movq mm2, mm0 /* dup to prepare for up conversion */
+ movq mm3, mm1 /* dup to prepare for up conversion */
+ /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
+ punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
+ punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
+ punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
+ /* start calculation */
+ psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
+ psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
+ movq [edx], mm0 /* write answer out */
+ movq [8 + edx], mm2 /* write answer out */
+ /* Increment pointers */
+ add edx, 16
+ add eax, PixelsPerLine
+ add ebx, ReconPixelsPerLine
+
+
+ /* ITERATION 3 */
+ movq mm0, [eax] /* mm0 = FiltPtr */
+ movq mm1, [ebx] /* mm1 = ReconPtr */
+ movq mm2, mm0 /* dup to prepare for up conversion */
+ movq mm3, mm1 /* dup to prepare for up conversion */
+ /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
+ punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
+ punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
+ punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
+ /* start calculation */
+ psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
+ psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
+ movq [edx], mm0 /* write answer out */
+ movq [8 + edx], mm2 /* write answer out */
+ /* Increment pointers */
+ add edx, 16
+ add eax, PixelsPerLine
+ add ebx, ReconPixelsPerLine
+
+
+ /* ITERATION 4 */
+ movq mm0, [eax] /* mm0 = FiltPtr */
+ movq mm1, [ebx] /* mm1 = ReconPtr */
+ movq mm2, mm0 /* dup to prepare for up conversion */
+ movq mm3, mm1 /* dup to prepare for up conversion */
+ /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
+ punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
+ punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
+ punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
+ /* start calculation */
+ psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
+ psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
+ movq [edx], mm0 /* write answer out */
+ movq [8 + edx], mm2 /* write answer out */
+ /* Increment pointers */
+ add edx, 16
+ add eax, PixelsPerLine
+ add ebx, ReconPixelsPerLine
+
+
+ /* ITERATION 5 */
+ movq mm0, [eax] /* mm0 = FiltPtr */
+ movq mm1, [ebx] /* mm1 = ReconPtr */
+ movq mm2, mm0 /* dup to prepare for up conversion */
+ movq mm3, mm1 /* dup to prepare for up conversion */
+ /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
+ punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
+ punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
+ punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
+ /* start calculation */
+ psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
+ psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
+ movq [edx], mm0 /* write answer out */
+ movq [8 + edx], mm2 /* write answer out */
+ /* Increment pointers */
+ add edx, 16
+ add eax, PixelsPerLine
+ add ebx, ReconPixelsPerLine
+
+
+ /* ITERATION 6 */
+ movq mm0, [eax] /* mm0 = FiltPtr */
+ movq mm1, [ebx] /* mm1 = ReconPtr */
+ movq mm2, mm0 /* dup to prepare for up conversion */
+ movq mm3, mm1 /* dup to prepare for up conversion */
+ /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
+ punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
+ punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
+ punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
+ /* start calculation */
+ psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
+ psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
+ movq [edx], mm0 /* write answer out */
+ movq [8 + edx], mm2 /* write answer out */
+ /* Increment pointers */
+ add edx, 16
+ add eax, PixelsPerLine
+ add ebx, ReconPixelsPerLine
+
+
+ /* ITERATION 7 */
+ movq mm0, [eax] /* mm0 = FiltPtr */
+ movq mm1, [ebx] /* mm1 = ReconPtr */
+ movq mm2, mm0 /* dup to prepare for up conversion */
+ movq mm3, mm1 /* dup to prepare for up conversion */
+ /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
+ punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
+ punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
+ punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
+ /* start calculation */
+ psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
+ psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
+ movq [edx], mm0 /* write answer out */
+ movq [8 + edx], mm2 /* write answer out */
+ /* Increment pointers */
+ add edx, 16
+ add eax, PixelsPerLine
+ add ebx, ReconPixelsPerLine
+
+
+ /* ITERATION 8 */
+ movq mm0, [eax] /* mm0 = FiltPtr */
+ movq mm1, [ebx] /* mm1 = ReconPtr */
+ movq mm2, mm0 /* dup to prepare for up conversion */
+ movq mm3, mm1 /* dup to prepare for up conversion */
+ /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
+ punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
+ punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
+ punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
+ /* start calculation */
+ psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
+ psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
+ movq [edx], mm0 /* write answer out */
+ movq [8 + edx], mm2 /* write answer out */
+ /* Increment pointers */
+ add edx, 16
+ add eax, PixelsPerLine
+ add ebx, ReconPixelsPerLine
+
+
+
+
+
+ };
+
+#endif
+}
+
+static void sub8x8_128__mmx (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
+ ogg_uint32_t PixelsPerLine)
+{
+
+#if 0
+ int i;
+ /* For each block row */
+ for (i=8; i; i--) {
+ /* INTRA mode so code raw image data */
+ /* We convert the data to 8 bit signed (by subtracting 128) as
+ this reduces the internal precision requirments in the DCT
+ transform. */
+ DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], 128);
+ DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], 128);
+ DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], 128);
+ DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], 128);
+ DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], 128);
+ DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], 128);
+ DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], 128);
+ DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], 128);
+
+ /* Start next row */
+ FiltPtr += PixelsPerLine;
+ DctInputPtr += 8;
+ }
+
+#else
+ __asm {
+ align 16
+
+ pxor mm7, mm7
+
+ mov eax, FiltPtr
+ mov ebx, DctInputPtr
+
+ movq mm1, V128
+
+ /* ITERATION 1 */
+ movq mm0, [eax] /* mm0 = FiltPtr */
+ movq mm2, mm0 /* dup to prepare for up conversion */
+ /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
+ punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
+ /* start calculation */
+ psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
+ psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
+ movq [ebx], mm0 /* write answer out */
+ movq [8 + ebx], mm2 /* write answer out */
+ /* Increment pointers */
+ add ebx, 16
+ add eax, PixelsPerLine
+
+
+ /* ITERATION 2 */
+ movq mm0, [eax] /* mm0 = FiltPtr */
+ movq mm2, mm0 /* dup to prepare for up conversion */
+ /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
+ punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
+ /* start calculation */
+ psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
+ psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
+ movq [ebx], mm0 /* write answer out */
+ movq [8 + ebx], mm2 /* write answer out */
+ /* Increment pointers */
+ add ebx, 16
+ add eax, PixelsPerLine
+
+
+ /* ITERATION 3 */
+ movq mm0, [eax] /* mm0 = FiltPtr */
+ movq mm2, mm0 /* dup to prepare for up conversion */
+ /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
+ punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
+ /* start calculation */
+ psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
+ psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
+ movq [ebx], mm0 /* write answer out */
+ movq [8 + ebx], mm2 /* write answer out */
+ /* Increment pointers */
+ add ebx, 16
+ add eax, PixelsPerLine
+
+
+ /* ITERATION 4 */
+ movq mm0, [eax] /* mm0 = FiltPtr */
+ movq mm2, mm0 /* dup to prepare for up conversion */
+ /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
+ punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
+ /* start calculation */
+ psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
+ psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
+ movq [ebx], mm0 /* write answer out */
+ movq [8 + ebx], mm2 /* write answer out */
+ /* Increment pointers */
+ add ebx, 16
+ add eax, PixelsPerLine
+
+
+ /* ITERATION 5 */
+ movq mm0, [eax] /* mm0 = FiltPtr */
+ movq mm2, mm0 /* dup to prepare for up conversion */
+ /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
+ punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
+ /* start calculation */
+ psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
+ psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
+ movq [ebx], mm0 /* write answer out */
+ movq [8 + ebx], mm2 /* write answer out */
+ /* Increment pointers */
+ add ebx, 16
+ add eax, PixelsPerLine
+
+
+ /* ITERATION 6 */
+ movq mm0, [eax] /* mm0 = FiltPtr */
+ movq mm2, mm0 /* dup to prepare for up conversion */
+ /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
+ punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
+ /* start calculation */
+ psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
+ psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
+ movq [ebx], mm0 /* write answer out */
+ movq [8 + ebx], mm2 /* write answer out */
+ /* Increment pointers */
+ add ebx, 16
+ add eax, PixelsPerLine
+
+
+ /* ITERATION 7 */
+ movq mm0, [eax] /* mm0 = FiltPtr */
+ movq mm2, mm0 /* dup to prepare for up conversion */
+ /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
+ punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
+ /* start calculation */
+ psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
+ psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
+ movq [ebx], mm0 /* write answer out */
+ movq [8 + ebx], mm2 /* write answer out */
+ /* Increment pointers */
+ add ebx, 16
+ add eax, PixelsPerLine
+
+
+ /* ITERATION 8 */
+ movq mm0, [eax] /* mm0 = FiltPtr */
+ movq mm2, mm0 /* dup to prepare for up conversion */
+ /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
+ punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
+ /* start calculation */
+ psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
+ psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
+ movq [ebx], mm0 /* write answer out */
+ movq [8 + ebx], mm2 /* write answer out */
+ /* Increment pointers */
+ add ebx, 16
+ add eax, PixelsPerLine
+
+ };
+
+#endif
+}
+
+
+
+
+static void sub8x8avg2__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr1,
+ unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
+ ogg_uint32_t PixelsPerLine,
+ ogg_uint32_t ReconPixelsPerLine)
+{
+
+#if 0
+ int i;
+
+ /* For each block row */
+ for (i=8; i; i--) {
+ DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], DSP_OP_AVG (ReconPtr1[0], ReconPtr2[0]));
+ DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], DSP_OP_AVG (ReconPtr1[1], ReconPtr2[1]));
+ DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], DSP_OP_AVG (ReconPtr1[2], ReconPtr2[2]));
+ DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], DSP_OP_AVG (ReconPtr1[3], ReconPtr2[3]));
+ DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], DSP_OP_AVG (ReconPtr1[4], ReconPtr2[4]));
+ DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], DSP_OP_AVG (ReconPtr1[5], ReconPtr2[5]));
+ DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], DSP_OP_AVG (ReconPtr1[6], ReconPtr2[6]));
+ DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], DSP_OP_AVG (ReconPtr1[7], ReconPtr2[7]));
+
+ /* Start next row */
+ FiltPtr += PixelsPerLine;
+ ReconPtr1 += ReconPixelsPerLine;
+ ReconPtr2 += ReconPixelsPerLine;
+ DctInputPtr += 8;
+ }
+#else
+
+ __asm {
+ align 16
+
+ pxor mm7, mm7
+
+ mov eax, FiltPtr
+ mov ebx, ReconPtr1
+ mov ecx, ReconPtr2
+ mov edx, DctInputPtr
+
+ /* ITERATION 1 */
+ movq mm0, [eax] ; /* mm0 = FiltPtr */
+ movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
+ movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
+ movq mm2, mm0 ; /* dup to prepare for up conversion */
+ movq mm3, mm1 ; /* dup to prepare for up conversion */
+ movq mm5, mm4 ; /* dup to prepare for up conversion */
+ ; /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
+ punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
+ punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
+ punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
+ punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
+ punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
+ ; /* average ReconPtr1 and ReconPtr2 */
+ paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
+ paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
+ psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
+ psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
+ psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+ psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+ movq [edx], mm0 ; /* write answer out */
+ movq [8 + edx], mm2 ; /* write answer out */
+ ; /* Increment pointers */
+ add edx, 16 ;
+ add eax, PixelsPerLine ;
+ add ebx, ReconPixelsPerLine ;
+ add ecx, ReconPixelsPerLine ;
+
+
+ /* ITERATION 2 */
+ movq mm0, [eax] ; /* mm0 = FiltPtr */
+ movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
+ movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
+ movq mm2, mm0 ; /* dup to prepare for up conversion */
+ movq mm3, mm1 ; /* dup to prepare for up conversion */
+ movq mm5, mm4 ; /* dup to prepare for up conversion */
+ ; /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
+ punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
+ punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
+ punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
+ punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
+ punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
+ ; /* average ReconPtr1 and ReconPtr2 */
+ paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
+ paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
+ psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
+ psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
+ psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+ psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+ movq [edx], mm0 ; /* write answer out */
+ movq [8 + edx], mm2 ; /* write answer out */
+ ; /* Increment pointers */
+ add edx, 16 ;
+ add eax, PixelsPerLine ;
+ add ebx, ReconPixelsPerLine ;
+ add ecx, ReconPixelsPerLine ;
+
+
+ /* ITERATION 3 */
+ movq mm0, [eax] ; /* mm0 = FiltPtr */
+ movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
+ movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
+ movq mm2, mm0 ; /* dup to prepare for up conversion */
+ movq mm3, mm1 ; /* dup to prepare for up conversion */
+ movq mm5, mm4 ; /* dup to prepare for up conversion */
+ ; /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
+ punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
+ punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
+ punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
+ punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
+ punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
+ ; /* average ReconPtr1 and ReconPtr2 */
+ paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
+ paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
+ psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
+ psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
+ psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+ psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+ movq [edx], mm0 ; /* write answer out */
+ movq [8 + edx], mm2 ; /* write answer out */
+ ; /* Increment pointers */
+ add edx, 16 ;
+ add eax, PixelsPerLine ;
+ add ebx, ReconPixelsPerLine ;
+ add ecx, ReconPixelsPerLine ;
+
+
+ /* ITERATION 4 */
+ movq mm0, [eax] ; /* mm0 = FiltPtr */
+ movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
+ movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
+ movq mm2, mm0 ; /* dup to prepare for up conversion */
+ movq mm3, mm1 ; /* dup to prepare for up conversion */
+ movq mm5, mm4 ; /* dup to prepare for up conversion */
+ ; /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
+ punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
+ punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
+ punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
+ punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
+ punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
+ ; /* average ReconPtr1 and ReconPtr2 */
+ paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
+ paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
+ psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
+ psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
+ psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+ psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+ movq [edx], mm0 ; /* write answer out */
+ movq [8 + edx], mm2 ; /* write answer out */
+ ; /* Increment pointers */
+ add edx, 16 ;
+ add eax, PixelsPerLine ;
+ add ebx, ReconPixelsPerLine ;
+ add ecx, ReconPixelsPerLine ;
+
+
+ /* ITERATION 5 */
+ movq mm0, [eax] ; /* mm0 = FiltPtr */
+ movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
+ movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
+ movq mm2, mm0 ; /* dup to prepare for up conversion */
+ movq mm3, mm1 ; /* dup to prepare for up conversion */
+ movq mm5, mm4 ; /* dup to prepare for up conversion */
+ ; /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
+ punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
+ punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
+ punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
+ punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
+ punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
+ ; /* average ReconPtr1 and ReconPtr2 */
+ paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
+ paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
+ psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
+ psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
+ psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+ psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+ movq [edx], mm0 ; /* write answer out */
+ movq [8 + edx], mm2 ; /* write answer out */
+ ; /* Increment pointers */
+ add edx, 16 ;
+ add eax, PixelsPerLine ;
+ add ebx, ReconPixelsPerLine ;
+ add ecx, ReconPixelsPerLine ;
+
+
+ /* ITERATION 6 */
+ movq mm0, [eax] ; /* mm0 = FiltPtr */
+ movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
+ movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
+ movq mm2, mm0 ; /* dup to prepare for up conversion */
+ movq mm3, mm1 ; /* dup to prepare for up conversion */
+ movq mm5, mm4 ; /* dup to prepare for up conversion */
+ ; /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
+ punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
+ punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
+ punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
+ punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
+ punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
+ ; /* average ReconPtr1 and ReconPtr2 */
+ paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
+ paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
+ psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
+ psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
+ psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+ psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+ movq [edx], mm0 ; /* write answer out */
+ movq [8 + edx], mm2 ; /* write answer out */
+ ; /* Increment pointers */
+ add edx, 16 ;
+ add eax, PixelsPerLine ;
+ add ebx, ReconPixelsPerLine ;
+ add ecx, ReconPixelsPerLine ;
+
+
+ /* ITERATION 7 */
+ movq mm0, [eax] ; /* mm0 = FiltPtr */
+ movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
+ movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
+ movq mm2, mm0 ; /* dup to prepare for up conversion */
+ movq mm3, mm1 ; /* dup to prepare for up conversion */
+ movq mm5, mm4 ; /* dup to prepare for up conversion */
+ ; /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
+ punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
+ punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
+ punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
+ punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
+ punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
+ ; /* average ReconPtr1 and ReconPtr2 */
+ paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
+ paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
+ psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
+ psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
+ psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+ psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+ movq [edx], mm0 ; /* write answer out */
+ movq [8 + edx], mm2 ; /* write answer out */
+ ; /* Increment pointers */
+ add edx, 16 ;
+ add eax, PixelsPerLine ;
+ add ebx, ReconPixelsPerLine ;
+ add ecx, ReconPixelsPerLine ;
+
+
+ /* ITERATION 8 */
+ movq mm0, [eax] ; /* mm0 = FiltPtr */
+ movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
+ movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
+ movq mm2, mm0 ; /* dup to prepare for up conversion */
+ movq mm3, mm1 ; /* dup to prepare for up conversion */
+ movq mm5, mm4 ; /* dup to prepare for up conversion */
+ ; /* convert from UINT8 to INT16 */
+ punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
+ punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
+ punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
+ punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
+ punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
+ punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
+ ; /* average ReconPtr1 and ReconPtr2 */
+ paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
+ paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
+ psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
+ psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
+ psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+ psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
+ movq [edx], mm0 ; /* write answer out */
+ movq [8 + edx], mm2 ; /* write answer out */
+ ; /* Increment pointers */
+ add edx, 16 ;
+ add eax, PixelsPerLine ;
+ add ebx, ReconPixelsPerLine ;
+ add ecx, ReconPixelsPerLine ;
+
+ };
+
+
+
+
+
+#endif
+}
+
+static ogg_uint32_t row_sad8__mmx (unsigned char *Src1, unsigned char *Src2)
+{
+
+#if 0
+ ogg_uint32_t SadValue;
+ ogg_uint32_t SadValue1;
+
+ SadValue = DSP_OP_ABS_DIFF (Src1[0], Src2[0]) +
+ DSP_OP_ABS_DIFF (Src1[1], Src2[1]) +
+ DSP_OP_ABS_DIFF (Src1[2], Src2[2]) +
+ DSP_OP_ABS_DIFF (Src1[3], Src2[3]);
+
+ SadValue1 = DSP_OP_ABS_DIFF (Src1[4], Src2[4]) +
+ DSP_OP_ABS_DIFF (Src1[5], Src2[5]) +
+ DSP_OP_ABS_DIFF (Src1[6], Src2[6]) +
+ DSP_OP_ABS_DIFF (Src1[7], Src2[7]);
+
+ SadValue = ( SadValue > SadValue1 ) ? SadValue : SadValue1;
+
+ return SadValue;
+
+#else
+ ogg_uint32_t MaxSad;
+
+
+ __asm {
+ align 16
+ mov ebx, Src1
+ mov ecx, Src2
+
+
+ pxor mm6, mm6 ; /* zero out mm6 for unpack */
+ pxor mm7, mm7 ; /* zero out mm7 for unpack */
+ movq mm0, [ebx] ; /* take 8 bytes */
+ movq mm1, [ecx] ;
+
+ movq mm2, mm0 ;
+ psubusb mm0, mm1 ; /* A - B */
+ psubusb mm1, mm2 ; /* B - A */
+ por mm0, mm1 ; /* and or gives abs difference */
+
+ movq mm1, mm0 ;
+
+ punpcklbw mm0, mm6 ; /* ; unpack low four bytes to higher precision */
+ punpckhbw mm1, mm7 ; /* ; unpack high four bytes to higher precision */
+
+ movq mm2, mm0 ;
+ movq mm3, mm1 ;
+ psrlq mm2, 32 ; /* fold and add */
+ psrlq mm3, 32 ;
+ paddw mm0, mm2 ;
+ paddw mm1, mm3 ;
+ movq mm2, mm0 ;
+ movq mm3, mm1 ;
+ psrlq mm2, 16 ;
+ psrlq mm3, 16 ;
+ paddw mm0, mm2 ;
+ paddw mm1, mm3 ;
+
+ psubusw mm1, mm0 ;
+ paddw mm1, mm0 ; /* mm1 = max(mm1, mm0) */
+ movd eax, mm1 ;
+
+ and eax, 0xffff
+ mov MaxSad, eax
+ };
+ return MaxSad;
+
+
+
+
+
+#endif
+}
+
+
+
+
+static ogg_uint32_t col_sad8x8__mmx (unsigned char *Src1, unsigned char *Src2,
+ ogg_uint32_t stride)
+{
+
+#if 0
+ ogg_uint32_t SadValue[8] = {0,0,0,0,0,0,0,0};
+ ogg_uint32_t SadValue2[8] = {0,0,0,0,0,0,0,0};
+ ogg_uint32_t MaxSad = 0;
+ ogg_uint32_t i;
+
+ for ( i = 0; i < 4; i++ ){
+ SadValue[0] += abs(Src1[0] - Src2[0]);
+ SadValue[1] += abs(Src1[1] - Src2[1]);
+ SadValue[2] += abs(Src1[2] - Src2[2]);
+ SadValue[3] += abs(Src1[3] - Src2[3]);
+ SadValue[4] += abs(Src1[4] - Src2[4]);
+ SadValue[5] += abs(Src1[5] - Src2[5]);
+ SadValue[6] += abs(Src1[6] - Src2[6]);
+ SadValue[7] += abs(Src1[7] - Src2[7]);
+
+ Src1 += stride;
+ Src2 += stride;
+ }
+
+ for ( i = 0; i < 4; i++ ){
+ SadValue2[0] += abs(Src1[0] - Src2[0]);
+ SadValue2[1] += abs(Src1[1] - Src2[1]);
+ SadValue2[2] += abs(Src1[2] - Src2[2]);
+ SadValue2[3] += abs(Src1[3] - Src2[3]);
+ SadValue2[4] += abs(Src1[4] - Src2[4]);
+ SadValue2[5] += abs(Src1[5] - Src2[5]);
+ SadValue2[6] += abs(Src1[6] - Src2[6]);
+ SadValue2[7] += abs(Src1[7] - Src2[7]);
+
+ Src1 += stride;
+ Src2 += stride;
+ }
+
+ for ( i = 0; i < 8; i++ ){
+ if ( SadValue[i] > MaxSad )
+ MaxSad = SadValue[i];
+ if ( SadValue2[i] > MaxSad )
+ MaxSad = SadValue2[i];
+ }
+
+ return MaxSad;
+#else
+ ogg_uint32_t MaxSad;
+
+
+ __asm {
+ align 16
+ mov ebx, Src1
+ mov ecx, Src2
+
+ pxor mm3, mm3 ; /* zero out mm3 for unpack */
+ pxor mm4, mm4 ; /* mm4 low sum */
+ pxor mm5, mm5 ; /* mm5 high sum */
+ pxor mm6, mm6 ; /* mm6 low sum */
+ pxor mm7, mm7 ; /* mm7 high sum */
+ mov edi, 4 ; /* 4 rows */
+ label_1: ;
+ movq mm0, [ebx] ; /* take 8 bytes */
+ movq mm1, [ecx] ; /* take 8 bytes */
+
+ movq mm2, mm0 ;
+ psubusb mm0, mm1 ; /* A - B */
+ psubusb mm1, mm2 ; /* B - A */
+ por mm0, mm1 ; /* and or gives abs difference */
+ movq mm1, mm0 ;
+
+ punpcklbw mm0, mm3 ; /* unpack to higher precision for accumulation */
+ paddw mm4, mm0 ; /* accumulate difference... */
+ punpckhbw mm1, mm3 ; /* unpack high four bytes to higher precision */
+ paddw mm5, mm1 ; /* accumulate difference... */
+ add ebx, stride ; /* Inc pointer into the new data */
+ add ecx, stride ; /* Inc pointer into the new data */
+
+ dec edi ;
+ jnz label_1 ;
+
+ mov edi, 4 ; /* 4 rows */
+ label_2: ;
+ movq mm0, [ebx] ; /* take 8 bytes */
+ movq mm1, [ecx] ; /* take 8 bytes */
+
+ movq mm2, mm0 ;
+ psubusb mm0, mm1 ; /* A - B */
+ psubusb mm1, mm2 ; /* B - A */
+ por mm0, mm1 ; /* and or gives abs difference */
+ movq mm1, mm0 ;
+
+ punpcklbw mm0, mm3 ; /* unpack to higher precision for accumulation */
+ paddw mm6, mm0 ; /* accumulate difference... */
+ punpckhbw mm1, mm3 ; /* unpack high four bytes to higher precision */
+ paddw mm7, mm1 ; /* accumulate difference... */
+ add ebx, stride ; /* Inc pointer into the new data */
+ add ecx, stride ; /* Inc pointer into the new data */
+
+ dec edi ;
+ jnz label_2 ;
+
+ psubusw mm7, mm6 ;
+ paddw mm7, mm6 ; /* mm7 = max(mm7, mm6) */
+ psubusw mm5, mm4 ;
+ paddw mm5, mm4 ; /* mm5 = max(mm5, mm4) */
+ psubusw mm7, mm5 ;
+ paddw mm7, mm5 ; /* mm7 = max(mm5, mm7) */
+ movq mm6, mm7 ;
+ psrlq mm6, 32 ;
+ psubusw mm7, mm6 ;
+ paddw mm7, mm6 ; /* mm7 = max(mm5, mm7) */
+ movq mm6, mm7 ;
+ psrlq mm6, 16 ;
+ psubusw mm7, mm6 ;
+ paddw mm7, mm6 ; /* mm7 = max(mm5, mm7) */
+ movd eax, mm7 ;
+ and eax, 0xffff ;
+
+ mov MaxSad, eax
+ };
+
+ return MaxSad;
+
+
+#endif
+}
+
+static ogg_uint32_t sad8x8__mmx (unsigned char *ptr1, ogg_uint32_t stride1,
+ unsigned char *ptr2, ogg_uint32_t stride2)
+{
+
+#if 0
+ ogg_uint32_t i;
+ ogg_uint32_t sad = 0;
+
+ for (i=8; i; i--) {
+ sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]);
+ sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]);
+ sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]);
+ sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]);
+ sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]);
+ sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]);
+ sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]);
+ sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]);
+
+ /* Step to next row of block. */
+ ptr1 += stride1;
+ ptr2 += stride2;
+ }
+
+ return sad;
+#else
+ ogg_uint32_t DiffVal;
+
+ __asm {
+ align 16
+
+ mov ebx, ptr1
+ mov edx, ptr2
+
+ pxor mm6, mm6 ; /* zero out mm6 for unpack */
+ pxor mm7, mm7 ; /* mm7 contains the result */
+
+ ; /* ITERATION 1 */
+ movq mm0, [ebx] ; /* take 8 bytes */
+ movq mm1, [edx] ;
+ movq mm2, mm0 ;
+
+ psubusb mm0, mm1 ; /* A - B */
+ psubusb mm1, mm2 ; /* B - A */
+ por mm0, mm1 ; /* and or gives abs difference */
+ movq mm1, mm0 ;
+
+ punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
+ paddw mm7, mm0 ; /* accumulate difference... */
+ punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
+ add ebx, stride1 ; /* Inc pointer into the new data */
+ paddw mm7, mm1 ; /* accumulate difference... */
+ add edx, stride2 ; /* Inc pointer into ref data */
+
+ ; /* ITERATION 2 */
+ movq mm0, [ebx] ; /* take 8 bytes */
+ movq mm1, [edx] ;
+ movq mm2, mm0 ;
+
+ psubusb mm0, mm1 ; /* A - B */
+ psubusb mm1, mm2 ; /* B - A */
+ por mm0, mm1 ; /* and or gives abs difference */
+ movq mm1, mm0 ;
+
+ punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
+ paddw mm7, mm0 ; /* accumulate difference... */
+ punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
+ add ebx, stride1 ; /* Inc pointer into the new data */
+ paddw mm7, mm1 ; /* accumulate difference... */
+ add edx, stride2 ; /* Inc pointer into ref data */
+
+
+ ; /* ITERATION 3 */
+ movq mm0, [ebx] ; /* take 8 bytes */
+ movq mm1, [edx] ;
+ movq mm2, mm0 ;
+
+ psubusb mm0, mm1 ; /* A - B */
+ psubusb mm1, mm2 ; /* B - A */
+ por mm0, mm1 ; /* and or gives abs difference */
+ movq mm1, mm0 ;
+
+ punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
+ paddw mm7, mm0 ; /* accumulate difference... */
+ punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
+ add ebx, stride1 ; /* Inc pointer into the new data */
+ paddw mm7, mm1 ; /* accumulate difference... */
+ add edx, stride2 ; /* Inc pointer into ref data */
+
+ ; /* ITERATION 4 */
+ movq mm0, [ebx] ; /* take 8 bytes */
+ movq mm1, [edx] ;
+ movq mm2, mm0 ;
+
+ psubusb mm0, mm1 ; /* A - B */
+ psubusb mm1, mm2 ; /* B - A */
+ por mm0, mm1 ; /* and or gives abs difference */
+ movq mm1, mm0 ;
+
+ punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
+ paddw mm7, mm0 ; /* accumulate difference... */
+ punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
+ add ebx, stride1 ; /* Inc pointer into the new data */
+ paddw mm7, mm1 ; /* accumulate difference... */
+ add edx, stride2 ; /* Inc pointer into ref data */
+
+
+ ; /* ITERATION 5 */
+ movq mm0, [ebx] ; /* take 8 bytes */
+ movq mm1, [edx] ;
+ movq mm2, mm0 ;
+
+ psubusb mm0, mm1 ; /* A - B */
+ psubusb mm1, mm2 ; /* B - A */
+ por mm0, mm1 ; /* and or gives abs difference */
+ movq mm1, mm0 ;
+
+ punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
+ paddw mm7, mm0 ; /* accumulate difference... */
+ punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
+ add ebx, stride1 ; /* Inc pointer into the new data */
+ paddw mm7, mm1 ; /* accumulate difference... */
+ add edx, stride2 ; /* Inc pointer into ref data */
+
+
+ ; /* ITERATION 6 */
+ movq mm0, [ebx] ; /* take 8 bytes */
+ movq mm1, [edx] ;
+ movq mm2, mm0 ;
+
+ psubusb mm0, mm1 ; /* A - B */
+ psubusb mm1, mm2 ; /* B - A */
+ por mm0, mm1 ; /* and or gives abs difference */
+ movq mm1, mm0 ;
+
+ punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
+ paddw mm7, mm0 ; /* accumulate difference... */
+ punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
+ add ebx, stride1 ; /* Inc pointer into the new data */
+ paddw mm7, mm1 ; /* accumulate difference... */
+ add edx, stride2 ; /* Inc pointer into ref data */
+
+
+ ; /* ITERATION 7 */
+ movq mm0, [ebx] ; /* take 8 bytes */
+ movq mm1, [edx] ;
+ movq mm2, mm0 ;
+
+ psubusb mm0, mm1 ; /* A - B */
+ psubusb mm1, mm2 ; /* B - A */
+ por mm0, mm1 ; /* and or gives abs difference */
+ movq mm1, mm0 ;
+
+ punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
+ paddw mm7, mm0 ; /* accumulate difference... */
+ punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
+ add ebx, stride1 ; /* Inc pointer into the new data */
+ paddw mm7, mm1 ; /* accumulate difference... */
+ add edx, stride2 ; /* Inc pointer into ref data */
+
+
+
+ ; /* ITERATION 8 */
+ movq mm0, [ebx] ; /* take 8 bytes */
+ movq mm1, [edx] ;
+ movq mm2, mm0 ;
+
+ psubusb mm0, mm1 ; /* A - B */
+ psubusb mm1, mm2 ; /* B - A */
+ por mm0, mm1 ; /* and or gives abs difference */
+ movq mm1, mm0 ;
+
+ punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
+ paddw mm7, mm0 ; /* accumulate difference... */
+ punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
+ add ebx, stride1 ; /* Inc pointer into the new data */
+ paddw mm7, mm1 ; /* accumulate difference... */
+ add edx, stride2 ; /* Inc pointer into ref data */
+
+
+
+ ; /* ------ */
+
+ movq mm0, mm7 ;
+ psrlq mm7, 32 ;
+ paddw mm7, mm0 ;
+ movq mm0, mm7 ;
+ psrlq mm7, 16 ;
+ paddw mm7, mm0 ;
+ movd eax, mm7 ;
+ and eax, 0xffff ;
+
+ mov DiffVal, eax
+ };
+
+ return DiffVal;
+
+
+
+#endif
+}
+
+static ogg_uint32_t sad8x8_thres__mmx (unsigned char *ptr1, ogg_uint32_t stride1,
+ unsigned char *ptr2, ogg_uint32_t stride2,
+ ogg_uint32_t thres)
+{
+#if 0
+ ogg_uint32_t i;
+ ogg_uint32_t sad = 0;
+
+ for (i=8; i; i--) {
+ sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]);
+ sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]);
+ sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]);
+ sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]);
+ sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]);
+ sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]);
+ sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]);
+ sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]);
+
+ if (sad > thres )
+ break;
+
+ /* Step to next row of block. */
+ ptr1 += stride1;
+ ptr2 += stride2;
+ }
+
+ return sad;
+#else
+ return sad8x8__mmx (ptr1, stride1, ptr2, stride2);
+#endif
+}
+
+
+static ogg_uint32_t sad8x8_xy2_thres__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
+ unsigned char *RefDataPtr1,
+ unsigned char *RefDataPtr2, ogg_uint32_t RefStride,
+ ogg_uint32_t thres)
+{
+#if 0
+ ogg_uint32_t i;
+ ogg_uint32_t sad = 0;
+
+ for (i=8; i; i--) {
+ sad += DSP_OP_ABS_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0]));
+ sad += DSP_OP_ABS_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1]));
+ sad += DSP_OP_ABS_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2]));
+ sad += DSP_OP_ABS_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3]));
+ sad += DSP_OP_ABS_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4]));
+ sad += DSP_OP_ABS_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5]));
+ sad += DSP_OP_ABS_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6]));
+ sad += DSP_OP_ABS_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7]));
+
+ if ( sad > thres )
+ break;
+
+ /* Step to next row of block. */
+ SrcData += SrcStride;
+ RefDataPtr1 += RefStride;
+ RefDataPtr2 += RefStride;
+ }
+
+ return sad;
+#else
+ ogg_uint32_t DiffVal;
+
+ __asm {
+ align 16
+
+ mov ebx, SrcData
+ mov ecx, RefDataPtr1
+ mov edx, RefDataPtr2
+
+
+ pcmpeqd mm5, mm5 ; /* fefefefefefefefe in mm5 */
+ paddb mm5, mm5 ;
+ ;
+ pxor mm6, mm6 ; /* zero out mm6 for unpack */
+ pxor mm7, mm7 ; /* mm7 contains the result */
+ mov edi, 8 ; /* 8 rows */
+ loop_start: ;
+ movq mm0, [ebx] ; /* take 8 bytes */
+
+ movq mm2, [ecx] ;
+ movq mm3, [edx] ; /* take average of mm2 and mm3 */
+ movq mm1, mm2 ;
+ pand mm1, mm3 ;
+ pxor mm3, mm2 ;
+ pand mm3, mm5 ;
+ psrlq mm3, 1 ;
+ paddb mm1, mm3 ;
+
+ movq mm2, mm0 ;
+
+ psubusb mm0, mm1 ; /* A - B */
+ psubusb mm1, mm2 ; /* B - A */
+ por mm0, mm1 ; /* and or gives abs difference */
+ movq mm1, mm0 ;
+
+ punpcklbw mm0, mm6 ; /* unpack to higher precision for accumulation */
+ paddw mm7, mm0 ; /* accumulate difference... */
+ punpckhbw mm1, mm6 ; /* unpack high four bytes to higher precision */
+ add ebx, SrcStride ; /* Inc pointer into the new data */
+ paddw mm7, mm1 ; /* accumulate difference... */
+ add ecx, RefStride ; /* Inc pointer into ref data */
+ add edx, RefStride ; /* Inc pointer into ref data */
+
+ dec edi ;
+ jnz loop_start ;
+
+ movq mm0, mm7 ;
+ psrlq mm7, 32 ;
+ paddw mm7, mm0 ;
+ movq mm0, mm7 ;
+ psrlq mm7, 16 ;
+ paddw mm7, mm0 ;
+ movd eax, mm7 ;
+ and eax, 0xffff ;
+
+ mov DiffVal, eax
+ };
+
+ return DiffVal;
+
+
+
+#endif
+}
+
+static ogg_uint32_t intra8x8_err__mmx (unsigned char *DataPtr, ogg_uint32_t Stride)
+{
+#if 0
+ ogg_uint32_t i;
+ ogg_uint32_t XSum=0;
+ ogg_uint32_t XXSum=0;
+
+ for (i=8; i; i--) {
+ /* Examine alternate pixel locations. */
+ XSum += DataPtr[0];
+ XXSum += DataPtr[0]*DataPtr[0];
+ XSum += DataPtr[1];
+ XXSum += DataPtr[1]*DataPtr[1];
+ XSum += DataPtr[2];
+ XXSum += DataPtr[2]*DataPtr[2];
+ XSum += DataPtr[3];
+ XXSum += DataPtr[3]*DataPtr[3];
+ XSum += DataPtr[4];
+ XXSum += DataPtr[4]*DataPtr[4];
+ XSum += DataPtr[5];
+ XXSum += DataPtr[5]*DataPtr[5];
+ XSum += DataPtr[6];
+ XXSum += DataPtr[6]*DataPtr[6];
+ XSum += DataPtr[7];
+ XXSum += DataPtr[7]*DataPtr[7];
+
+ /* Step to next row of block. */
+ DataPtr += Stride;
+ }
+
+ /* Compute population variance as mis-match metric. */
+ return (( (XXSum<<6) - XSum*XSum ) );
+#else
+ ogg_uint32_t XSum;
+ ogg_uint32_t XXSum;
+
+ __asm {
+ align 16
+
+ mov ecx, DataPtr
+
+ pxor mm5, mm5 ;
+ pxor mm6, mm6 ;
+ pxor mm7, mm7 ;
+ mov edi, 8 ;
+ loop_start:
+ movq mm0, [ecx] ; /* take 8 bytes */
+ movq mm2, mm0 ;
+
+ punpcklbw mm0, mm6 ;
+ punpckhbw mm2, mm6 ;
+
+ paddw mm5, mm0 ;
+ paddw mm5, mm2 ;
+
+ pmaddwd mm0, mm0 ;
+ pmaddwd mm2, mm2 ;
+ ;
+ paddd mm7, mm0 ;
+ paddd mm7, mm2 ;
+
+ add ecx, Stride ; /* Inc pointer into src data */
+
+ dec edi ;
+ jnz loop_start ;
+
+ movq mm0, mm5 ;
+ psrlq mm5, 32 ;
+ paddw mm5, mm0 ;
+ movq mm0, mm5 ;
+ psrlq mm5, 16 ;
+ paddw mm5, mm0 ;
+ movd edi, mm5 ;
+ movsx edi, di ;
+ mov eax, edi ;
+
+ movq mm0, mm7 ;
+ psrlq mm7, 32 ;
+ paddd mm7, mm0 ;
+ movd ebx, mm7 ;
+
+ mov XSum, eax
+ mov XXSum, ebx;
+
+ };
+ /* Compute population variance as mis-match metric. */
+ return (( (XXSum<<6) - XSum*XSum ) );
+
+
+
+#endif
+}
+
+static ogg_uint32_t inter8x8_err__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
+ unsigned char *RefDataPtr, ogg_uint32_t RefStride)
+{
+
+#if 0
+ ogg_uint32_t i;
+ ogg_uint32_t XSum=0;
+ ogg_uint32_t XXSum=0;
+ ogg_int32_t DiffVal;
+
+ for (i=8; i; i--) {
+ DiffVal = DSP_OP_DIFF (SrcData[0], RefDataPtr[0]);
+ XSum += DiffVal;
+ XXSum += DiffVal*DiffVal;
+
+ DiffVal = DSP_OP_DIFF (SrcData[1], RefDataPtr[1]);
+ XSum += DiffVal;
+ XXSum += DiffVal*DiffVal;
+
+ DiffVal = DSP_OP_DIFF (SrcData[2], RefDataPtr[2]);
+ XSum += DiffVal;
+ XXSum += DiffVal*DiffVal;
+
+ DiffVal = DSP_OP_DIFF (SrcData[3], RefDataPtr[3]);
+ XSum += DiffVal;
+ XXSum += DiffVal*DiffVal;
+
+ DiffVal = DSP_OP_DIFF (SrcData[4], RefDataPtr[4]);
+ XSum += DiffVal;
+ XXSum += DiffVal*DiffVal;
+
+ DiffVal = DSP_OP_DIFF (SrcData[5], RefDataPtr[5]);
+ XSum += DiffVal;
+ XXSum += DiffVal*DiffVal;
+
+ DiffVal = DSP_OP_DIFF (SrcData[6], RefDataPtr[6]);
+ XSum += DiffVal;
+ XXSum += DiffVal*DiffVal;
+
+ DiffVal = DSP_OP_DIFF (SrcData[7], RefDataPtr[7]);
+ XSum += DiffVal;
+ XXSum += DiffVal*DiffVal;
+
+ /* Step to next row of block. */
+ SrcData += SrcStride;
+ RefDataPtr += RefStride;
+ }
+
+ /* Compute and return population variance as mis-match metric. */
+ return (( (XXSum<<6) - XSum*XSum ));
+#else
+ ogg_uint32_t XSum;
+ ogg_uint32_t XXSum;
+
+
+ __asm {
+ align 16
+
+ mov ecx, SrcData
+ mov edx, RefDataPtr
+
+ pxor mm5, mm5 ;
+ pxor mm6, mm6 ;
+ pxor mm7, mm7 ;
+ mov edi, 8 ;
+ loop_start: ;
+ movq mm0, [ecx] ; /* take 8 bytes */
+ movq mm1, [edx] ;
+ movq mm2, mm0 ;
+ movq mm3, mm1 ;
+
+ punpcklbw mm0, mm6 ;
+ punpcklbw mm1, mm6 ;
+ punpckhbw mm2, mm6 ;
+ punpckhbw mm3, mm6 ;
+
+ psubsw mm0, mm1 ;
+ psubsw mm2, mm3 ;
+
+ paddw mm5, mm0 ;
+ paddw mm5, mm2 ;
+
+ pmaddwd mm0, mm0 ;
+ pmaddwd mm2, mm2 ;
+ ;
+ paddd mm7, mm0 ;
+ paddd mm7, mm2 ;
+
+ add ecx, SrcStride ; /* Inc pointer into src data */
+ add edx, RefStride ; /* Inc pointer into ref data */
+
+ dec edi ;
+ jnz loop_start ;
+
+ movq mm0, mm5 ;
+ psrlq mm5, 32 ;
+ paddw mm5, mm0 ;
+ movq mm0, mm5 ;
+ psrlq mm5, 16 ;
+ paddw mm5, mm0 ;
+ movd edi, mm5 ;
+ movsx edi, di ;
+ mov eax, edi ;
+
+ movq mm0, mm7 ;
+ psrlq mm7, 32 ;
+ paddd mm7, mm0 ;
+ movd ebx, mm7 ;
+
+ mov XSum, eax
+ mov XXSum, ebx
+
+ };
+
+ /* Compute and return population variance as mis-match metric. */
+ return (( (XXSum<<6) - XSum*XSum ));
+
+
+#endif
+}
+
+static ogg_uint32_t inter8x8_err_xy2__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
+ unsigned char *RefDataPtr1,
+ unsigned char *RefDataPtr2, ogg_uint32_t RefStride)
+{
+#if 0
+ ogg_uint32_t i;
+ ogg_uint32_t XSum=0;
+ ogg_uint32_t XXSum=0;
+ ogg_int32_t DiffVal;
+
+ for (i=8; i; i--) {
+ DiffVal = DSP_OP_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0]));
+ XSum += DiffVal;
+ XXSum += DiffVal*DiffVal;
+
+ DiffVal = DSP_OP_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1]));
+ XSum += DiffVal;
+ XXSum += DiffVal*DiffVal;
+
+ DiffVal = DSP_OP_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2]));
+ XSum += DiffVal;
+ XXSum += DiffVal*DiffVal;
+
+ DiffVal = DSP_OP_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3]));
+ XSum += DiffVal;
+ XXSum += DiffVal*DiffVal;
+
+ DiffVal = DSP_OP_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4]));
+ XSum += DiffVal;
+ XXSum += DiffVal*DiffVal;
+
+ DiffVal = DSP_OP_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5]));
+ XSum += DiffVal;
+ XXSum += DiffVal*DiffVal;
+
+ DiffVal = DSP_OP_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6]));
+ XSum += DiffVal;
+ XXSum += DiffVal*DiffVal;
+
+ DiffVal = DSP_OP_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7]));
+ XSum += DiffVal;
+ XXSum += DiffVal*DiffVal;
+
+ /* Step to next row of block. */
+ SrcData += SrcStride;
+ RefDataPtr1 += RefStride;
+ RefDataPtr2 += RefStride;
+ }
+
+ /* Compute and return population variance as mis-match metric. */
+ return (( (XXSum<<6) - XSum*XSum ));
+#else
+ ogg_uint32_t XSum;
+ ogg_uint32_t XXSum;
+
+ __asm {
+ align 16
+
+ mov ebx, SrcData
+ mov ecx, RefDataPtr1
+ mov edx, RefDataPtr2
+
+ pcmpeqd mm4, mm4 ; /* fefefefefefefefe in mm4 */
+ paddb mm4, mm4 ;
+ pxor mm5, mm5 ;
+ pxor mm6, mm6 ;
+ pxor mm7, mm7 ;
+ mov edi, 8 ;
+ loop_start: ;
+ movq mm0, [ebx] ; /* take 8 bytes */
+
+ movq mm2, [ecx] ;
+ movq mm3, [edx] ; /* take average of mm2 and mm3 */
+ movq mm1, mm2 ;
+ pand mm1, mm3 ;
+ pxor mm3, mm2 ;
+ pand mm3, mm4 ;
+ psrlq mm3, 1 ;
+ paddb mm1, mm3 ;
+
+ movq mm2, mm0 ;
+ movq mm3, mm1 ;
+
+ punpcklbw mm0, mm6 ;
+ punpcklbw mm1, mm6 ;
+ punpckhbw mm2, mm6 ;
+ punpckhbw mm3, mm6 ;
+
+ psubsw mm0, mm1 ;
+ psubsw mm2, mm3 ;
+
+ paddw mm5, mm0 ;
+ paddw mm5, mm2 ;
+
+ pmaddwd mm0, mm0 ;
+ pmaddwd mm2, mm2 ;
+ ;
+ paddd mm7, mm0 ;
+ paddd mm7, mm2 ;
+
+ add ebx, SrcStride ; /* Inc pointer into src data */
+ add ecx, RefStride ; /* Inc pointer into ref data */
+ add edx, RefStride ; /* Inc pointer into ref data */
+
+ dec edi ;
+ jnz loop_start ;
+
+ movq mm0, mm5 ;
+ psrlq mm5, 32 ;
+ paddw mm5, mm0 ;
+ movq mm0, mm5 ;
+ psrlq mm5, 16 ;
+ paddw mm5, mm0 ;
+ movd edi, mm5 ;
+ movsx edi, di ;
+ mov XSum, edi ; /* movl eax, edi ; Modified for vc to resuse eax*/
+
+ movq mm0, mm7 ;
+ psrlq mm7, 32 ;
+ paddd mm7, mm0 ;
+ movd XXSum, mm7 ; /*movd eax, mm7 ; Modified for vc to reuse eax */
+ };
+
+ return (( (XXSum<<6) - XSum*XSum ));
+
+#endif
+}
+
+static void restore_fpu (void)
+{
+
+ __asm {
+ emms
+ }
+
+}
+
+void dsp_mmx_init(DspFunctions *funcs)
+{
+ TH_DEBUG("enabling accelerated x86_32 mmx dsp functions.\n");
+ funcs->restore_fpu = restore_fpu;
+ funcs->sub8x8 = sub8x8__mmx;
+ funcs->sub8x8_128 = sub8x8_128__mmx;
+ funcs->sub8x8avg2 = sub8x8avg2__mmx;
+ funcs->row_sad8 = row_sad8__mmx;
+ funcs->col_sad8x8 = col_sad8x8__mmx;
+ funcs->sad8x8 = sad8x8__mmx;
+ funcs->sad8x8_thres = sad8x8_thres__mmx;
+ funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmx;
+ funcs->intra8x8_err = intra8x8_err__mmx;
+ funcs->inter8x8_err = inter8x8_err__mmx;
+ funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmx;
+}
+
Added: trunk/theora/lib/x86_32_vs/fdct_mmx.c
===================================================================
--- trunk/theora/lib/x86_32_vs/fdct_mmx.c 2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/lib/x86_32_vs/fdct_mmx.c 2006-05-28 18:18:26 UTC (rev 11451)
@@ -0,0 +1,334 @@
+;//==========================================================================
+;//
+;// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
+;// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+;// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
+;// PURPOSE.
+;//
+;// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
+;//
+;//--------------------------------------------------------------------------
+
+#include "theora/theora.h"
+#include "codec_internal.h"
+#include "dsp.h"
+
+
+static const ogg_int64_t xC1S7 = 0x0fb15fb15fb15fb15LL;
+static const ogg_int64_t xC2S6 = 0x0ec83ec83ec83ec83LL;
+static const ogg_int64_t xC3S5 = 0x0d4dbd4dbd4dbd4dbLL;
+static const ogg_int64_t xC4S4 = 0x0b505b505b505b505LL;
+static const ogg_int64_t xC5S3 = 0x08e3a8e3a8e3a8e3aLL;
+static const ogg_int64_t xC6S2 = 0x061f861f861f861f8LL;
+static const ogg_int64_t xC7S1 = 0x031f131f131f131f1LL;
+
+
+static __inline void Transpose_mmx( ogg_int16_t *InputData1, ogg_int16_t *OutputData1,
+ ogg_int16_t *InputData2, ogg_int16_t *OutputData2)
+{
+
+ __asm {
+ align 16
+ mov eax, InputData1
+ mov ebx, InputData2
+ mov ecx, OutputData1
+ mov edx, OutputData2
+
+
+ movq mm0, [eax] ; /* mm0 = a0 a1 a2 a3 */
+ movq mm4, [ebx] ; /* mm4 = e4 e5 e6 e7 */
+ movq mm1, [16 + eax] ; /* mm1 = b0 b1 b2 b3 */
+ movq mm5, [16 + ebx] ; /* mm5 = f4 f5 f6 f7 */
+ movq mm2, [32 + eax] ; /* mm2 = c0 c1 c2 c3 */
+ movq mm6, [32 + ebx] ; /* mm6 = g4 g5 g6 g7 */
+ movq mm3, [48 + eax] ; /* mm3 = d0 d1 d2 d3 */
+ movq [16 + ecx], mm1 ; /* save b0 b1 b2 b3 */
+ movq mm7, [48 + ebx] ; /* mm7 = h0 h1 h2 h3 */
+ ; /* Transpose 2x8 block */
+ movq mm1, mm4 ; /* mm1 = e3 e2 e1 e0 */
+ punpcklwd mm4, mm5 ; /* mm4 = f1 e1 f0 e0 */
+ movq [ecx], mm0 ; /* save a3 a2 a1 a0 */
+ punpckhwd mm1, mm5 ; /* mm1 = f3 e3 f2 e2 */
+ movq mm0, mm6 ; /* mm0 = g3 g2 g1 g0 */
+ punpcklwd mm6, mm7 ; /* mm6 = h1 g1 h0 g0 */
+ movq mm5, mm4 ; /* mm5 = f1 e1 f0 e0 */
+ punpckldq mm4, mm6 ; /* mm4 = h0 g0 f0 e0 = MM4 */
+ punpckhdq mm5, mm6 ; /* mm5 = h1 g1 f1 e1 = MM5 */
+ movq mm6, mm1 ; /* mm6 = f3 e3 f2 e2 */
+ movq [edx], mm4 ;
+ punpckhwd mm0, mm7 ; /* mm0 = h3 g3 h2 g2 */
+ movq [16 + edx], mm5 ;
+ punpckhdq mm6, mm0 ; /* mm6 = h3 g3 f3 e3 = MM7 */
+ movq mm4, [ecx] ; /* mm4 = a3 a2 a1 a0 */
+ punpckldq mm1, mm0 ; /* mm1 = h2 g2 f2 e2 = MM6 */
+ movq mm5, [16 + ecx] ; /* mm5 = b3 b2 b1 b0 */
+ movq mm0, mm4 ; /* mm0 = a3 a2 a1 a0 */
+ movq [48 + edx], mm6 ;
+ punpcklwd mm0, mm5 ; /* mm0 = b1 a1 b0 a0 */
+ movq [32 + edx], mm1 ;
+ punpckhwd mm4, mm5 ; /* mm4 = b3 a3 b2 a2 */
+ movq mm5, mm2 ; /* mm5 = c3 c2 c1 c0 */
+ punpcklwd mm2, mm3 ; /* mm2 = d1 c1 d0 c0 */
+ movq mm1, mm0 ; /* mm1 = b1 a1 b0 a0 */
+ punpckldq mm0, mm2 ; /* mm0 = d0 c0 b0 a0 = MM0 */
+ punpckhdq mm1, mm2 ; /* mm1 = d1 c1 b1 a1 = MM1 */
+ movq mm2, mm4 ; /* mm2 = b3 a3 b2 a2 */
+ movq [ecx], mm0 ;
+ punpckhwd mm5, mm3 ; /* mm5 = d3 c3 d2 c2 */
+ movq [16 + ecx], mm1 ;
+ punpckhdq mm4, mm5 ; /* mm4 = d3 c3 b3 a3 = MM3 */
+ punpckldq mm2, mm5 ; /* mm2 = d2 c2 b2 a2 = MM2 */
+ movq [48 + ecx], mm4 ;
+ movq [32 + ecx], mm2 ;
+
+ };
+
+
+}
+
+static __inline void Fdct_mmx( ogg_int16_t *InputData1, ogg_int16_t *InputData2, ogg_int16_t *temp)
+{
+
+ __asm {
+ align 16
+
+
+ mov eax, InputData1
+ mov ebx, InputData2
+ mov ecx, temp
+ movq mm0, [eax] ;
+ movq mm1, [16 + eax] ;
+ movq mm2, [48 + eax] ;
+ movq mm3, [16 + ebx] ;
+ movq mm4, mm0 ;
+ movq mm5, mm1 ;
+ movq mm6, mm2 ;
+ movq mm7, mm3 ;
+ ;
+ paddsw mm0, [48 + ebx] ; /* mm0 = ip0 + ip7 = is07 */
+ paddsw mm1, [32 + eax] ; /* mm1 = ip1 + ip2 = is12 */
+ paddsw mm2, [ebx] ; /* mm2 = ip3 + ip4 = is34 */
+ paddsw mm3, [32 + ebx] ; /* mm3 = ip5 + ip6 = is56 */
+ psubsw mm4, [48 + ebx] ; /* mm4 = ip0 - ip7 = id07 */
+ psubsw mm5, [32 + eax] ; /* mm5 = ip1 - ip2 = id12 */
+ ;
+ psubsw mm0, mm2 ; /* mm0 = is07 - is34 */
+ ;
+ paddsw mm2, mm2 ;
+ ;
+ psubsw mm6, [ebx] ; /* mm6 = ip3 - ip4 = id34 */
+ ;
+ paddsw mm2, mm0 ; /* mm2 = is07 + is34 = is0734 */
+ psubsw mm1, mm3 ; /* mm1 = is12 - is56 */
+ movq [ecx], mm0 ; /* Save is07 - is34 to free mm0; */
+ paddsw mm3, mm3 ;
+ paddsw mm3, mm1 ; /* mm3 = is12 + 1s56 = is1256 */
+ ;
+ psubsw mm7, [32 + ebx] ; /* mm7 = ip5 - ip6 = id56 */
+ ; /* ------------------------------------------------------------------- */
+ psubsw mm5, mm7 ; /* mm5 = id12 - id56 */
+ paddsw mm7, mm7 ;
+ paddsw mm7, mm5 ; /* mm7 = id12 + id56 */
+ ; /* ------------------------------------------------------------------- */
+ psubsw mm2, mm3 ; /* mm2 = is0734 - is1256 */
+ paddsw mm3, mm3 ;
+ ;
+ movq mm0, mm2 ; /* make a copy */
+ paddsw mm3, mm2 ; /* mm3 = is0734 + is1256 */
+ ;
+ pmulhw mm0, xC4S4 ; /* mm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
+ paddw mm0, mm2 ; /* mm0 = xC4S4 * ( is0734 - is1256 ) */
+ psrlw mm2, 15 ;
+ paddw mm0, mm2 ; /* Truncate mm0, now it is op[4] */
+ ;
+ movq mm2, mm3 ;
+ movq [ebx], mm0 ; /* save ip4, now mm0,mm2 are free */
+ ;
+ movq mm0, mm3 ;
+ pmulhw mm3, xC4S4 ; /* mm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */
+ ;
+ psrlw mm2, 15 ;
+ paddw mm3, mm0 ; /* mm3 = xC4S4 * ( is0734 +is1256 ) */
+ paddw mm3, mm2 ; /* Truncate mm3, now it is op[0] */
+ ;
+ movq [eax], mm3 ;
+ ; /* ------------------------------------------------------------------- */
+ movq mm3, [ecx] ; /* mm3 = irot_input_y */
+ pmulhw mm3, xC2S6 ; /* mm3 = xC2S6 * irot_input_y - irot_input_y */
+ ;
+ movq mm2, [ecx] ;
+ movq mm0, mm2 ;
+ ;
+ psrlw mm2, 15 ; /* mm3 = xC2S6 * irot_input_y */
+ paddw mm3, mm0 ;
+ ;
+ paddw mm3, mm2 ; /* Truncated */
+ movq mm0, mm5 ;
+ ;
+ movq mm2, mm5 ;
+ pmulhw mm0, xC6S2 ; /* mm0 = xC6S2 * irot_input_x */
+ ;
+ psrlw mm2, 15 ;
+ paddw mm0, mm2 ; /* Truncated */
+ ;
+ paddsw mm3, mm0 ; /* ip[2] */
+ movq [32 + eax], mm3 ; /* Save ip2 */
+ ;
+ movq mm0, mm5 ;
+ movq mm2, mm5 ;
+ ;
+ pmulhw mm5, xC2S6 ; /* mm5 = xC2S6 * irot_input_x - irot_input_x */
+ psrlw mm2, 15 ;
+ ;
+ movq mm3, [ecx] ;
+ paddw mm5, mm0 ; /* mm5 = xC2S6 * irot_input_x */
+ ;
+ paddw mm5, mm2 ; /* Truncated */
+ movq mm2, mm3 ;
+ ;
+ pmulhw mm3, xC6S2 ; /* mm3 = xC6S2 * irot_input_y */
+ psrlw mm2, 15 ;
+ ;
+ paddw mm3, mm2 ; /* Truncated */
+ psubsw mm3, mm5 ;
+ ;
+ movq [32 + ebx], mm3 ;
+ ; /* ------------------------------------------------------------------- */
+ movq mm0, xC4S4 ;
+ movq mm2, mm1 ;
+ movq mm3, mm1 ;
+ ;
+ pmulhw mm1, mm0 ; /* mm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
+ psrlw mm2, 15 ;
+ ;
+ paddw mm1, mm3 ; /* mm0 = xC4S4 * ( is12 - is56 ) */
+ paddw mm1, mm2 ; /* Truncate mm1, now it is icommon_product1 */
+ ;
+ movq mm2, mm7 ;
+ movq mm3, mm7 ;
+ ;
+ pmulhw mm7, mm0 ; /* mm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
+ psrlw mm2, 15 ;
+ ;
+ paddw mm7, mm3 ; /* mm7 = xC4S4 * ( id12 + id56 ) */
+ paddw mm7, mm2 ; /* Truncate mm7, now it is icommon_product2 */
+ ; /* ------------------------------------------------------------------- */
+ pxor mm0, mm0 ; /* Clear mm0 */
+ psubsw mm0, mm6 ; /* mm0 = - id34 */
+ ;
+ psubsw mm0, mm7 ; /* mm0 = - ( id34 + idcommon_product2 ) */
+ paddsw mm6, mm6 ;
+ paddsw mm6, mm0 ; /* mm6 = id34 - icommon_product2 */
+ ;
+ psubsw mm4, mm1 ; /* mm4 = id07 - icommon_product1 */
+ paddsw mm1, mm1 ;
+ paddsw mm1, mm4 ; /* mm1 = id07 + icommon_product1 */
+ ; /* ------------------------------------------------------------------- */
+ movq mm7, xC1S7 ;
+ movq mm2, mm1 ;
+ ;
+ movq mm3, mm1 ;
+ pmulhw mm1, mm7 ; /* mm1 = xC1S7 * irot_input_x - irot_input_x */
+ ;
+ movq mm7, xC7S1 ;
+ psrlw mm2, 15 ;
+ ;
+ paddw mm1, mm3 ; /* mm1 = xC1S7 * irot_input_x */
+ paddw mm1, mm2 ; /* Trucated */
+ ;
+ pmulhw mm3, mm7 ; /* mm3 = xC7S1 * irot_input_x */
+ paddw mm3, mm2 ; /* Truncated */
+ ;
+ movq mm5, mm0 ;
+ movq mm2, mm0 ;
+ ;
+ movq mm7, xC1S7 ;
+ pmulhw mm0, mm7 ; /* mm0 = xC1S7 * irot_input_y - irot_input_y */
+ ;
+ movq mm7, xC7S1 ;
+ psrlw mm2, 15 ;
+ ;
+ paddw mm0, mm5 ; /* mm0 = xC1S7 * irot_input_y */
+ paddw mm0, mm2 ; /* Truncated */
+ ;
+ pmulhw mm5, mm7 ; /* mm5 = xC7S1 * irot_input_y */
+ paddw mm5, mm2 ; /* Truncated */
+ ;
+ psubsw mm1, mm5 ; /* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1 */
+ paddsw mm3, mm0 ; /* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7 */
+ ;
+ movq [16 + eax], mm1 ;
+ movq [48 + ebx], mm3 ;
+ ; /* ------------------------------------------------------------------- */
+ movq mm0, xC3S5 ;
+ movq mm1, xC5S3 ;
+ ;
+ movq mm5, mm6 ;
+ movq mm7, mm6 ;
+ ;
+ movq mm2, mm4 ;
+ movq mm3, mm4 ;
+ ;
+ pmulhw mm4, mm0 ; /* mm4 = xC3S5 * irot_input_x - irot_input_x */
+ pmulhw mm6, mm1 ; /* mm6 = xC5S3 * irot_input_y - irot_input_y */
+ ;
+ psrlw mm2, 15 ;
+ psrlw mm5, 15 ;
+ ;
+ paddw mm4, mm3 ; /* mm4 = xC3S5 * irot_input_x */
+ paddw mm6, mm7 ; /* mm6 = xC5S3 * irot_input_y */
+ ;
+ paddw mm4, mm2 ; /* Truncated */
+ paddw mm6, mm5 ; /* Truncated */
+ ;
+ psubsw mm4, mm6 ; /* ip3 */
+ movq [48 + eax], mm4 ;
+ ;
+ movq mm4, mm3 ;
+ movq mm6, mm7 ;
+ ;
+ pmulhw mm3, mm1 ; /* mm3 = xC5S3 * irot_input_x - irot_input_x */
+ pmulhw mm7, mm0 ; /* mm7 = xC3S5 * irot_input_y - irot_input_y */
+ ;
+ paddw mm4, mm2 ;
+ paddw mm6, mm5 ;
+ ;
+ paddw mm3, mm4 ; /* mm3 = xC5S3 * irot_input_x */
+ paddw mm7, mm6 ; /* mm7 = xC3S5 * irot_input_y */
+ ;
+ paddw mm3, mm7 ; /* ip5 */
+ movq [16 + ebx], mm3 ;
+
+};
+
+}
+
+
+static void fdct_short__mmx ( ogg_int16_t *InputData, ogg_int16_t *OutputData)
+{
+
+ static ogg_int16_t tmp[32];
+ ogg_int16_t* align_tmp = (ogg_int16_t*)((unsigned char*)tmp + (16 - ((int)tmp)&15));
+
+
+ Transpose_mmx(InputData, OutputData, InputData + 4, OutputData + 4);
+ Fdct_mmx(OutputData, OutputData + 4, align_tmp);
+
+ Transpose_mmx(InputData + 32, OutputData + 32, InputData + 36, OutputData + 36);
+ Fdct_mmx(OutputData+32, OutputData + 36, align_tmp);
+
+ Transpose_mmx(OutputData, OutputData, OutputData + 32, OutputData + 32);
+ Fdct_mmx(OutputData, OutputData + 32, align_tmp);
+
+ Transpose_mmx(OutputData + 4, OutputData + 4, OutputData + 36, OutputData + 36);
+ Fdct_mmx(OutputData + 4, OutputData + 36, align_tmp);
+
+ __asm emms
+
+}
+
+void dsp_mmx_fdct_init(DspFunctions *funcs)
+{
+ TH_DEBUG("enabling accelerated x86_32 mmx fdct function.\n");
+ funcs->fdct_short = fdct_short__mmx;
+}
Added: trunk/theora/lib/x86_32_vs/recon_mmx.c
===================================================================
--- trunk/theora/lib/x86_32_vs/recon_mmx.c 2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/lib/x86_32_vs/recon_mmx.c 2006-05-28 18:18:26 UTC (rev 11451)
@@ -0,0 +1,198 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
+ * by the Xiph.Org Foundation http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function:
+ last mod: $Id: reconstruct.c,v 1.6 2003/12/03 08:59:41 arc Exp $
+
+ ********************************************************************/
+
+#include "codec_internal.h"
+
+
+static const unsigned __int64 V128 = 0x8080808080808080LL;
+
+static void copy8x8__mmx (unsigned char *src,
+ unsigned char *dest,
+ unsigned int stride)
+{
+
+ //Is this even the fastest way to do this?
+ __asm {
+ align 16
+
+ mov eax, src
+ mov ebx, dest
+ mov ecx, stride
+
+ lea edi, [ecx + ecx * 2]
+ movq mm0, [eax]
+ movq mm1, [eax + ecx]
+ movq mm2, [eax + ecx * 2]
+ movq mm3, [eax + edi]
+ lea eax, [eax + ecx * 4]
+ movq [ebx], mm0
+ movq [ebx + ecx], mm1
+ movq [ebx + ecx * 2], mm2
+ movq [ebx + edi], mm3
+ lea ebx, [ebx + ecx * 4]
+ movq mm0, [eax]
+ movq mm1, [eax + ecx]
+ movq mm2, [eax + ecx * 2]
+ movq mm3, [eax + edi]
+ movq [ebx], mm0
+ movq [ebx + ecx], mm1
+ movq [ebx + ecx * 2], mm2
+ movq [ebx + edi], mm3
+
+ };
+
+}
+
+static void recon_intra8x8__mmx (unsigned char *ReconPtr, ogg_int16_t *ChangePtr,
+ ogg_uint32_t LineStep)
+{
+
+ __asm {
+ align 16
+
+ mov eax, ReconPtr
+ mov ebx, ChangePtr
+ mov ecx, LineStep
+
+ movq mm0, V128
+
+ lea edi, [128 + ebx]
+ loop_start:
+ movq mm2, [ebx]
+
+ packsswb mm2, [8 + ebx]
+ por mm0, mm0
+ pxor mm2, mm0
+ lea ebx, [16 + ebx]
+ cmp ebx, edi
+
+ movq [eax], mm2
+
+
+
+ lea eax, [eax + ecx]
+ jc loop_start
+
+
+ };
+
+}
+
+
+
+
+
+static void recon_inter8x8__mmx (unsigned char *ReconPtr, unsigned char *RefPtr,
+ ogg_int16_t *ChangePtr, ogg_uint32_t LineStep)
+{
+
+ __asm {
+
+ align 16
+
+ mov eax, ReconPtr
+ mov ebx, ChangePtr
+ mov ecx, LineStep
+ mov edx, RefPtr
+
+ pxor mm0, mm0
+ lea edi, [128 + ebx]
+
+ loop_start:
+ movq mm2, [edx]
+
+ movq mm4, [ebx]
+ movq mm3, mm2
+ movq mm5, [8 + ebx]
+ punpcklbw mm2, mm0
+ paddsw mm2, mm4
+ punpckhbw mm3, mm0
+ paddsw mm3, mm5
+ add edx, ecx
+ packuswb mm2, mm3
+ lea ebx, [16 + ebx]
+ cmp ebx, edi
+
+ movq [eax], mm2
+
+ lea eax, [eax + ecx]
+ jc loop_start
+
+ };
+}
+
+
+
+
+static void recon_inter8x8_half__mmx (unsigned char *ReconPtr, unsigned char *RefPtr1,
+ unsigned char *RefPtr2, ogg_int16_t *ChangePtr,
+ ogg_uint32_t LineStep)
+{
+ __asm {
+ align 16
+
+ mov eax, ReconPtr
+ mov ebx, ChangePtr
+ mov ecx, RefPtr1
+ mov edx, RefPtr2
+
+ pxor mm0, mm0
+ lea edi, [128 + ebx]
+
+ loop_start:
+ movq mm2, [ecx]
+ movq mm4, [edx]
+
+ movq mm3, mm2
+ punpcklbw mm2, mm0
+ movq mm5, mm4
+ movq mm6, [ebx]
+ punpckhbw mm3, mm0
+ movq mm7, [8 + ebx]
+ punpcklbw mm4, mm0
+ punpckhbw mm5, mm0
+ paddw mm2, mm4
+ paddw mm3, mm5
+ psrlw mm2, 1
+ psrlw mm3, 1
+ paddw mm2, mm6
+ paddw mm3, mm7
+ lea ebx, [16 + ebx]
+ packuswb mm2, mm3
+ add ecx, LineStep
+ add edx, LineStep
+ movq [eax], mm2
+ add eax, LineStep
+ cmp ebx, edi
+ jc loop_start
+
+ };
+
+}
+
+
+
+
+void dsp_mmx_recon_init(DspFunctions *funcs)
+{
+ TH_DEBUG("enabling accelerated x86_32 mmx recon functions.\n");
+ funcs->copy8x8 = copy8x8__mmx;
+ funcs->recon_intra8x8 = recon_intra8x8__mmx;
+ funcs->recon_inter8x8 = recon_inter8x8__mmx;
+ funcs->recon_inter8x8_half = recon_inter8x8_half__mmx;
+}
+
Added: trunk/theora/win32/VS2005/dump_video/dump_video.vcproj
===================================================================
--- trunk/theora/win32/VS2005/dump_video/dump_video.vcproj 2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/win32/VS2005/dump_video/dump_video.vcproj 2006-05-28 18:18:26 UTC (rev 11451)
@@ -0,0 +1,215 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+ ProjectType="Visual C++"
+ Version="8.00"
+ Name="dump_video"
+ ProjectGUID="{1A8CA99D-B6C7-48CB-B263-6CECDADF5FBF}"
+ RootNamespace="dump_video"
+ Keyword="Win32Proj"
+ >
+ <Platforms>
+ <Platform
+ Name="Win32"
+ />
+ </Platforms>
+ <ToolFiles>
+ </ToolFiles>
+ <Configurations>
+ <Configuration
+ Name="Debug|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="1"
+ CharacterSet="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include"
+ PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+ MinimalRebuild="true"
+ BasicRuntimeChecks="3"
+ RuntimeLibrary="1"
+ UsePrecompiledHeader="0"
+ WarningLevel="3"
+ Detect64BitPortabilityProblems="true"
+ DebugInformationFormat="4"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLinkerTool"
+ LinkIncremental="2"
+ GenerateDebugInformation="true"
+ SubSystem="1"
+ TargetMachine="1"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCManifestTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCAppVerifierTool"
+ />
+ <Tool
+ Name="VCWebDeploymentTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Release|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="1"
+ CharacterSet="1"
+ WholeProgramOptimization="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include"
+ PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+ RuntimeLibrary="0"
+ UsePrecompiledHeader="0"
+ WarningLevel="3"
+ Detect64BitPortabilityProblems="true"
+ DebugInformationFormat="3"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLinkerTool"
+ LinkIncremental="1"
+ GenerateDebugInformation="true"
+ SubSystem="1"
+ OptimizeReferences="2"
+ EnableCOMDATFolding="2"
+ TargetMachine="1"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCManifestTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCAppVerifierTool"
+ />
+ <Tool
+ Name="VCWebDeploymentTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ </Configurations>
+ <References>
+ </References>
+ <Files>
+ <Filter
+ Name="Source Files"
+ Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+ UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+ >
+ <File
+ RelativePath="..\..\..\examples\dump_video.c"
+ >
+ </File>
+ <File
+ RelativePath="..\..\getopt.c"
+ >
+ </File>
+ <File
+ RelativePath="..\..\getopt1.c"
+ >
+ </File>
+ </Filter>
+ <Filter
+ Name="Header Files"
+ Filter="h;hpp;hxx;hm;inl;inc;xsd"
+ UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+ >
+ <File
+ RelativePath="..\..\..\examples\getopt.h"
+ >
+ </File>
+ <File
+ RelativePath="..\..\getopt_win.h"
+ >
+ </File>
+ </Filter>
+ <Filter
+ Name="Resource Files"
+ Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
+ UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
+ >
+ </Filter>
+ </Files>
+ <Globals>
+ </Globals>
+</VisualStudioProject>
Added: trunk/theora/win32/VS2005/encoder_example/encoder_example.vcproj
===================================================================
--- trunk/theora/win32/VS2005/encoder_example/encoder_example.vcproj 2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/win32/VS2005/encoder_example/encoder_example.vcproj 2006-05-28 18:18:26 UTC (rev 11451)
@@ -0,0 +1,211 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+ ProjectType="Visual C++"
+ Version="8.00"
+ Name="encoder_example"
+ ProjectGUID="{AD710263-EBFA-4388-BAA9-AD73C32AFF26}"
+ RootNamespace="encoder_example"
+ Keyword="Win32Proj"
+ >
+ <Platforms>
+ <Platform
+ Name="Win32"
+ />
+ </Platforms>
+ <ToolFiles>
+ </ToolFiles>
+ <Configurations>
+ <Configuration
+ Name="Debug|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="1"
+ CharacterSet="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ Optimization="0"
+ AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include;..\..\..\..\libvorbis\include"
+ PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+ MinimalRebuild="true"
+ BasicRuntimeChecks="3"
+ RuntimeLibrary="1"
+ UsePrecompiledHeader="0"
+ WarningLevel="3"
+ Detect64BitPortabilityProblems="true"
+ DebugInformationFormat="4"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLinkerTool"
+ LinkIncremental="2"
+ GenerateDebugInformation="true"
+ SubSystem="1"
+ TargetMachine="1"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCManifestTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCAppVerifierTool"
+ />
+ <Tool
+ Name="VCWebDeploymentTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ <Configuration
+ Name="Release|Win32"
+ OutputDirectory="$(SolutionDir)$(ConfigurationName)"
+ IntermediateDirectory="$(ConfigurationName)"
+ ConfigurationType="1"
+ CharacterSet="1"
+ WholeProgramOptimization="1"
+ >
+ <Tool
+ Name="VCPreBuildEventTool"
+ />
+ <Tool
+ Name="VCCustomBuildTool"
+ />
+ <Tool
+ Name="VCXMLDataGeneratorTool"
+ />
+ <Tool
+ Name="VCWebServiceProxyGeneratorTool"
+ />
+ <Tool
+ Name="VCMIDLTool"
+ />
+ <Tool
+ Name="VCCLCompilerTool"
+ InlineFunctionExpansion="2"
+ EnableIntrinsicFunctions="true"
+ FavorSizeOrSpeed="1"
+ AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include;..\..\..\..\libvorbis\include"
+ PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+ RuntimeLibrary="0"
+ UsePrecompiledHeader="0"
+ WarningLevel="4"
+ Detect64BitPortabilityProblems="true"
+ DebugInformationFormat="0"
+ />
+ <Tool
+ Name="VCManagedResourceCompilerTool"
+ />
+ <Tool
+ Name="VCResourceCompilerTool"
+ />
+ <Tool
+ Name="VCPreLinkEventTool"
+ />
+ <Tool
+ Name="VCLinkerTool"
+ LinkIncremental="1"
+ GenerateDebugInformation="false"
+ SubSystem="1"
+ OptimizeReferences="2"
+ EnableCOMDATFolding="2"
+ OptimizeForWindows98="1"
+ TargetMachine="1"
+ />
+ <Tool
+ Name="VCALinkTool"
+ />
+ <Tool
+ Name="VCManifestTool"
+ />
+ <Tool
+ Name="VCXDCMakeTool"
+ />
+ <Tool
+ Name="VCBscMakeTool"
+ />
+ <Tool
+ Name="VCFxCopTool"
+ />
+ <Tool
+ Name="VCAppVerifierTool"
+ />
+ <Tool
+ Name="VCWebDeploymentTool"
+ />
+ <Tool
+ Name="VCPostBuildEventTool"
+ />
+ </Configuration>
+ </Configurations>
+ <References>
+ </References>
+ <Files>
+ <Filter
+ Name="Source Files"
+ Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+ UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+ >
+ <File
+ RelativePath="..\..\..\examples\encoder_example.c"
+ >
+ </File>
+ <File
+ RelativePath="..\..\getopt.c"
+ >
+ </File>
+ <File
+ RelativePath="..\..\getopt1.c"
+ >
+ </File>
+ </Filter>
+ <Filter
+ Name="Header Files"
+ Filter="h;hpp;hxx;hm;inl;inc;xsd"
+ UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+ >
+ </Filter>
+ <Filter
+ Name="Resource Files"
+ Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
+ UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
+ >
+ </Filter>
+ </Files>
+ <Globals>
+ </Globals>
+</VisualStudioProject>
Modified: trunk/theora/win32/VS2005/libtheora/libtheora.vcproj
===================================================================
--- trunk/theora/win32/VS2005/libtheora/libtheora.vcproj 2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/win32/VS2005/libtheora/libtheora.vcproj 2006-05-28 18:18:26 UTC (rev 11451)
@@ -41,8 +41,8 @@
<Tool
Name="VCCLCompilerTool"
Optimization="0"
- AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include"
- PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS"
+ AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include;..\..\..\lib"
+ PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS; THEORA_USE_ASM"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
@@ -122,12 +122,12 @@
/>
<Tool
Name="VCCLCompilerTool"
- Optimization="3"
+ Optimization="2"
InlineFunctionExpansion="2"
EnableIntrinsicFunctions="true"
FavorSizeOrSpeed="1"
- AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include"
- PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS"
+ AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include;..\..\..\lib"
+ PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS; THEORA_USE_ASM"
StringPooling="true"
ExceptionHandling="0"
RuntimeLibrary="0"
@@ -135,7 +135,7 @@
UsePrecompiledHeader="0"
WarningLevel="4"
Detect64BitPortabilityProblems="true"
- DebugInformationFormat="3"
+ DebugInformationFormat="0"
CompileAs="1"
DisableSpecificWarnings="4244;4267;4057;4100;4245"
/>
@@ -153,7 +153,7 @@
OutputFile="$(OutDir)/libtheora.dll"
LinkIncremental="1"
ModuleDefinitionFile="..\..\libtheora.def"
- GenerateDebugInformation="true"
+ GenerateDebugInformation="false"
SubSystem="2"
OptimizeReferences="2"
EnableCOMDATFolding="2"
@@ -216,8 +216,8 @@
InlineFunctionExpansion="2"
EnableIntrinsicFunctions="true"
FavorSizeOrSpeed="1"
- AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include"
- PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS"
+ AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include;..\..\..\lib"
+ PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS; THEORA_USE_ASM"
StringPooling="true"
ExceptionHandling="0"
RuntimeLibrary="0"
@@ -307,8 +307,8 @@
InlineFunctionExpansion="2"
EnableIntrinsicFunctions="true"
FavorSizeOrSpeed="1"
- AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include"
- PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS"
+ AdditionalIncludeDirectories="..\..\..\include;..\..\..\..\libogg\include;..\..\..\lib"
+ PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBTHEORA_EXPORTS; THEORA_USE_ASM"
StringPooling="true"
ExceptionHandling="0"
RuntimeLibrary="0"
@@ -410,6 +410,10 @@
>
</File>
<File
+ RelativePath="..\..\..\lib\x86_32_vs\dsp_mmx.c"
+ >
+ </File>
+ <File
RelativePath="..\..\..\lib\encode.c"
>
</File>
@@ -418,6 +422,10 @@
>
</File>
<File
+ RelativePath="..\..\..\lib\x86_32_vs\fdct_mmx.c"
+ >
+ </File>
+ <File
RelativePath="..\..\..\lib\frarray.c"
>
</File>
@@ -458,6 +466,10 @@
>
</File>
<File
+ RelativePath="..\..\..\lib\x86_32_vs\recon_mmx.c"
+ >
+ </File>
+ <File
RelativePath="..\..\..\lib\reconstruct.c"
>
</File>
Added: trunk/theora/win32/getopt.c
===================================================================
--- trunk/theora/win32/getopt.c 2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/win32/getopt.c 2006-05-28 18:18:26 UTC (rev 11451)
@@ -0,0 +1,1047 @@
+/* Getopt for GNU.
+ NOTE: getopt is now part of the C library, so if you don't know what
+ "Keep this file name-space clean" means, talk to drepper at gnu.org
+ before changing it!
+
+ Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99
+ Free Software Foundation, Inc.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
+ Ditto for AIX 3.2 and <stdlib.h>. */
+#ifndef _NO_PROTO
+# define _NO_PROTO
+#endif
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#if !defined __STDC__ || !__STDC__
+/* This is a separate conditional since some stdc systems
+ reject `defined (const)'. */
+# ifndef const
+# define const
+# endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
+# include <gnu-versions.h>
+# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+# define ELIDE_CODE
+# endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+/* Don't include stdlib.h for non-GNU C libraries because some of them
+ contain conflicting prototypes for getopt. */
+# include <stdlib.h>
+# include <unistd.h>
+#endif /* GNU C library. */
+
+#ifdef VMS
+# include <unixlib.h>
+# if HAVE_STRING_H - 0
+# include <string.h>
+# endif
+#endif
+
+#ifndef _
+/* This is for other GNU distributions with internationalized messages.
+ When compiling libc, the _ macro is predefined. */
+# ifdef HAVE_LIBINTL_H
+# include <libintl.h>
+# define _(msgid) gettext (msgid)
+# else
+# define _(msgid) (msgid)
+# endif
+#endif
+
+/* This version of `getopt' appears to the caller like standard Unix `getopt'
+ but it behaves differently for the user, since it allows the user
+ to intersperse the options with the other arguments.
+
+ As `getopt' works, it permutes the elements of ARGV so that,
+ when it is done, all the options precede everything else. Thus
+ all application programs are extended to handle flexible argument order.
+
+ Setting the environment variable POSIXLY_CORRECT disables permutation.
+ Then the behavior is completely standard.
+
+ GNU application programs can use a third alternative mode in which
+ they can distinguish the relative order of options and other arguments. */
+
+#include "getopt_win.h"
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns -1, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+/* 1003.2 says this must be 1 before any call. */
+int optind = 1;
+
+/* Formerly, initialization of getopt depended on optind==0, which
+ causes problems with re-calling getopt as programs generally don't
+ know that. */
+
+int __getopt_initialized;
+
+/* The next char to be scanned in the option-element
+ in which the last option character we returned was found.
+ This allows us to pick up the scan where we left off.
+
+ If this is zero, or a null string, it means resume the scan
+ by advancing to the next ARGV-element. */
+
+static char *nextchar;
+
+/* Callers store zero here to inhibit the error message
+ for unrecognized options. */
+
+int opterr = 1;
+
+/* Set to an option character which was unrecognized.
+ This must be initialized on some systems to avoid linking in the
+ system's own getopt implementation. */
+
+int optopt = '?';
+
+/* Describe how to deal with options that follow non-option ARGV-elements.
+
+ If the caller did not specify anything,
+ the default is REQUIRE_ORDER if the environment variable
+ POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+ REQUIRE_ORDER means don't recognize them as options;
+ stop option processing when the first non-option is seen.
+ This is what Unix does.
+ This mode of operation is selected by either setting the environment
+ variable POSIXLY_CORRECT, or using `+' as the first character
+ of the list of option characters.
+
+ PERMUTE is the default. We permute the contents of ARGV as we scan,
+ so that eventually all the non-options are at the end. This allows options
+ to be given in any order, even with programs that were not written to
+ expect this.
+
+ RETURN_IN_ORDER is an option available to programs that were written
+ to expect options and other ARGV-elements in any order and that care about
+ the ordering of the two. We describe each non-option ARGV-element
+ as if it were the argument of an option with character code 1.
+ Using `-' as the first character of the list of option characters
+ selects this mode of operation.
+
+ The special argument `--' forces an end of option-scanning regardless
+ of the value of `ordering'. In the case of RETURN_IN_ORDER, only
+ `--' can cause `getopt' to return -1 with `optind' != ARGC. */
+
+static enum
+{
+ REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+} ordering;
+
+/* Value of POSIXLY_CORRECT environment variable. */
+static char *posixly_correct;
+
+#ifdef __GNU_LIBRARY__
+/* We want to avoid inclusion of string.h with non-GNU libraries
+ because there are many ways it can cause trouble.
+ On some systems, it contains special magic macros that don't work
+ in GCC. */
+# include <string.h>
+# define my_index strchr
+#else
+
+#include <string.h>
+
+/* Avoid depending on library functions or files
+ whose names are inconsistent. */
+
+#ifndef getenv
+extern char *getenv ();
+#endif
+
+static char *
+my_index (str, chr)
+ const char *str;
+ int chr;
+{
+ while (*str)
+ {
+ if (*str == chr)
+ return (char *) str;
+ str++;
+ }
+ return 0;
+}
+
+/* If using GCC, we can safely declare strlen this way.
+ If not using GCC, it is ok not to declare it. */
+#ifdef __GNUC__
+/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
+ That was relevant to code that was here before. */
+# if (!defined __STDC__ || !__STDC__) && !defined strlen
+/* gcc with -traditional declares the built-in strlen to return int,
+ and has done so at least since version 2.4.5. -- rms. */
+extern int strlen (const char *);
+# endif /* not __STDC__ */
+#endif /* __GNUC__ */
+
+#endif /* not __GNU_LIBRARY__ */
+
+/* Handle permutation of arguments. */
+
+/* Describe the part of ARGV that contains non-options that have
+ been skipped. `first_nonopt' is the index in ARGV of the first of them;
+ `last_nonopt' is the index after the last of them. */
+
+static int first_nonopt;
+static int last_nonopt;
+
+#ifdef _LIBC
+/* Bash 2.0 gives us an environment variable containing flags
+ indicating ARGV elements that should not be considered arguments. */
+
+/* Defined in getopt_init.c */
+extern char *__getopt_nonoption_flags;
+
+static int nonoption_flags_max_len;
+static int nonoption_flags_len;
+
+static int original_argc;
+static char *const *original_argv;
+
+/* Make sure the environment variable bash 2.0 puts in the environment
+ is valid for the getopt call we must make sure that the ARGV passed
+ to getopt is that one passed to the process. */
+static void
+__attribute__ ((unused))
+store_args_and_env (int argc, char *const *argv)
+{
+ /* XXX This is no good solution. We should rather copy the args so
+ that we can compare them later. But we must not use malloc(3). */
+ original_argc = argc;
+ original_argv = argv;
+}
+# ifdef text_set_element
+text_set_element (__libc_subinit, store_args_and_env);
+# endif /* text_set_element */
+
+# define SWAP_FLAGS(ch1, ch2) \
+ if (nonoption_flags_len > 0) \
+ { \
+ char __tmp = __getopt_nonoption_flags[ch1]; \
+ __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \
+ __getopt_nonoption_flags[ch2] = __tmp; \
+ }
+#else /* !_LIBC */
+# define SWAP_FLAGS(ch1, ch2)
+#endif /* _LIBC */
+
+/* Exchange two adjacent subsequences of ARGV.
+ One subsequence is elements [first_nonopt,last_nonopt)
+ which contains all the non-options that have been skipped so far.
+ The other is elements [last_nonopt,optind), which contains all
+ the options processed since those non-options were skipped.
+
+ `first_nonopt' and `last_nonopt' are relocated so that they describe
+ the new indices of the non-options in ARGV after they are moved. */
+
+#if defined __STDC__ && __STDC__
+static void exchange (char **);
+#endif
+
+static void
+exchange (argv)
+ char **argv;
+{
+ int bottom = first_nonopt;
+ int middle = last_nonopt;
+ int top = optind;
+ char *tem;
+
+ /* Exchange the shorter segment with the far end of the longer segment.
+ That puts the shorter segment into the right place.
+ It leaves the longer segment in the right place overall,
+ but it consists of two parts that need to be swapped next. */
+
+#ifdef _LIBC
+ /* First make sure the handling of the `__getopt_nonoption_flags'
+ string can work normally. Our top argument must be in the range
+ of the string. */
+ if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len)
+ {
+ /* We must extend the array. The user plays games with us and
+ presents new arguments. */
+ char *new_str = malloc (top + 1);
+ if (new_str == NULL)
+ nonoption_flags_len = nonoption_flags_max_len = 0;
+ else
+ {
+ memset (__mempcpy (new_str, __getopt_nonoption_flags,
+ nonoption_flags_max_len),
+ '\0', top + 1 - nonoption_flags_max_len);
+ nonoption_flags_max_len = top + 1;
+ __getopt_nonoption_flags = new_str;
+ }
+ }
+#endif
+
+ while (top > middle && middle > bottom)
+ {
+ if (top - middle > middle - bottom)
+ {
+ /* Bottom segment is the short one. */
+ int len = middle - bottom;
+ register int i;
+
+ /* Swap it with the top part of the top segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[top - (middle - bottom) + i];
+ argv[top - (middle - bottom) + i] = tem;
+ SWAP_FLAGS (bottom + i, top - (middle - bottom) + i);
+ }
+ /* Exclude the moved bottom segment from further swapping. */
+ top -= len;
+ }
+ else
+ {
+ /* Top segment is the short one. */
+ int len = top - middle;
+ register int i;
+
+ /* Swap it with the bottom part of the bottom segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[middle + i];
+ argv[middle + i] = tem;
+ SWAP_FLAGS (bottom + i, middle + i);
+ }
+ /* Exclude the moved top segment from further swapping. */
+ bottom += len;
+ }
+ }
+
+ /* Update records for the slots the non-options now occupy. */
+
+ first_nonopt += (optind - last_nonopt);
+ last_nonopt = optind;
+}
+
+/* Initialize the internal data when the first call is made. */
+
+#if defined __STDC__ && __STDC__
+static const char *_getopt_initialize (int, char *const *, const char *);
+#endif
+static const char *
+_getopt_initialize (argc, argv, optstring)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+{
+ /* Start processing options with ARGV-element 1 (since ARGV-element 0
+ is the program name); the sequence of previously skipped
+ non-option ARGV-elements is empty. */
+
+ first_nonopt = last_nonopt = optind;
+
+ nextchar = NULL;
+
+ posixly_correct = getenv ("POSIXLY_CORRECT");
+
+ /* Determine how to handle the ordering of options and nonoptions. */
+
+ if (optstring[0] == '-')
+ {
+ ordering = RETURN_IN_ORDER;
+ ++optstring;
+ }
+ else if (optstring[0] == '+')
+ {
+ ordering = REQUIRE_ORDER;
+ ++optstring;
+ }
+ else if (posixly_correct != NULL)
+ ordering = REQUIRE_ORDER;
+ else
+ ordering = PERMUTE;
+
+#ifdef _LIBC
+ if (posixly_correct == NULL
+ && argc == original_argc && argv == original_argv)
+ {
+ if (nonoption_flags_max_len == 0)
+ {
+ if (__getopt_nonoption_flags == NULL
+ || __getopt_nonoption_flags[0] == '\0')
+ nonoption_flags_max_len = -1;
+ else
+ {
+ const char *orig_str = __getopt_nonoption_flags;
+ int len = nonoption_flags_max_len = strlen (orig_str);
+ if (nonoption_flags_max_len < argc)
+ nonoption_flags_max_len = argc;
+ __getopt_nonoption_flags =
+ (char *) malloc (nonoption_flags_max_len);
+ if (__getopt_nonoption_flags == NULL)
+ nonoption_flags_max_len = -1;
+ else
+ memset (__mempcpy (__getopt_nonoption_flags, orig_str, len),
+ '\0', nonoption_flags_max_len - len);
+ }
+ }
+ nonoption_flags_len = nonoption_flags_max_len;
+ }
+ else
+ nonoption_flags_len = 0;
+#endif
+
+ return optstring;
+}
+
+/* Scan elements of ARGV (whose length is ARGC) for option characters
+ given in OPTSTRING.
+
+ If an element of ARGV starts with '-', and is not exactly "-" or "--",
+ then it is an option element. The characters of this element
+ (aside from the initial '-') are option characters. If `getopt'
+ is called repeatedly, it returns successively each of the option characters
+ from each of the option elements.
+
+ If `getopt' finds another option character, it returns that character,
+ updating `optind' and `nextchar' so that the next call to `getopt' can
+ resume the scan with the following option character or ARGV-element.
+
+ If there are no more option characters, `getopt' returns -1.
+ Then `optind' is the index in ARGV of the first ARGV-element
+ that is not an option. (The ARGV-elements have been permuted
+ so that those that are not options now come last.)
+
+ OPTSTRING is a string containing the legitimate option characters.
+ If an option character is seen that is not listed in OPTSTRING,
+ return '?' after printing an error message. If you set `opterr' to
+ zero, the error message is suppressed but we still return '?'.
+
+ If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+ so the following text in the same ARGV-element, or the text of the following
+ ARGV-element, is returned in `optarg'. Two colons mean an option that
+ wants an optional arg; if there is text in the current ARGV-element,
+ it is returned in `optarg', otherwise `optarg' is set to zero.
+
+ If OPTSTRING starts with `-' or `+', it requests different methods of
+ handling the non-option ARGV-elements.
+ See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+ Long-named options begin with `--' instead of `-'.
+ Their names may be abbreviated as long as the abbreviation is unique
+ or is an exact match for some defined option. If they have an
+ argument, it follows the option name in the same ARGV-element, separated
+ from the option name by a `=', or else the in next ARGV-element.
+ When `getopt' finds a long-named option, it returns 0 if that option's
+ `flag' field is nonzero, the value of the option's `val' field
+ if the `flag' field is zero.
+
+ The elements of ARGV aren't really const, because we permute them.
+ But we pretend they're const in the prototype to be compatible
+ with other systems.
+
+ LONGOPTS is a vector of `struct option' terminated by an
+ element containing a name which is zero.
+
+ LONGIND returns the index in LONGOPT of the long-named option found.
+ It is only valid when a long-named option has been found by the most
+ recent call.
+
+ If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+ long-named options. */
+
+int
+_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+ const struct option *longopts;
+ int *longind;
+ int long_only;
+{
+ optarg = NULL;
+
+ if (optind == 0 || !__getopt_initialized)
+ {
+ if (optind == 0)
+ optind = 1; /* Don't scan ARGV[0], the program name. */
+ optstring = _getopt_initialize (argc, argv, optstring);
+ __getopt_initialized = 1;
+ }
+
+ /* Test whether ARGV[optind] points to a non-option argument.
+ Either it does not have option syntax, or there is an environment flag
+ from the shell indicating it is not an option. The later information
+ is only used when the used in the GNU libc. */
+#ifdef _LIBC
+# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \
+ || (optind < nonoption_flags_len \
+ && __getopt_nonoption_flags[optind] == '1'))
+#else
+# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0')
+#endif
+
+ if (nextchar == NULL || *nextchar == '\0')
+ {
+ /* Advance to the next ARGV-element. */
+
+ /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
+ moved back by the user (who may also have changed the arguments). */
+ if (last_nonopt > optind)
+ last_nonopt = optind;
+ if (first_nonopt > optind)
+ first_nonopt = optind;
+
+ if (ordering == PERMUTE)
+ {
+ /* If we have just processed some options following some non-options,
+ exchange them so that the options come first. */
+
+ if (first_nonopt != last_nonopt && last_nonopt != optind)
+ exchange ((char **) argv);
+ else if (last_nonopt != optind)
+ first_nonopt = optind;
+
+ /* Skip any additional non-options
+ and extend the range of non-options previously skipped. */
+
+ while (optind < argc && NONOPTION_P)
+ optind++;
+ last_nonopt = optind;
+ }
+
+ /* The special ARGV-element `--' means premature end of options.
+ Skip it like a null option,
+ then exchange with previous non-options as if it were an option,
+ then skip everything else like a non-option. */
+
+ if (optind != argc && !strcmp (argv[optind], "--"))
+ {
+ optind++;
+
+ if (first_nonopt != last_nonopt && last_nonopt != optind)
+ exchange ((char **) argv);
+ else if (first_nonopt == last_nonopt)
+ first_nonopt = optind;
+ last_nonopt = argc;
+
+ optind = argc;
+ }
+
+ /* If we have done all the ARGV-elements, stop the scan
+ and back over any non-options that we skipped and permuted. */
+
+ if (optind == argc)
+ {
+ /* Set the next-arg-index to point at the non-options
+ that we previously skipped, so the caller will digest them. */
+ if (first_nonopt != last_nonopt)
+ optind = first_nonopt;
+ return -1;
+ }
+
+ /* If we have come to a non-option and did not permute it,
+ either stop the scan or describe it to the caller and pass it by. */
+
+ if (NONOPTION_P)
+ {
+ if (ordering == REQUIRE_ORDER)
+ return -1;
+ optarg = argv[optind++];
+ return 1;
+ }
+
+ /* We have found another option-ARGV-element.
+ Skip the initial punctuation. */
+
+ nextchar = (argv[optind] + 1
+ + (longopts != NULL && argv[optind][1] == '-'));
+ }
+
+ /* Decode the current option-ARGV-element. */
+
+ /* Check whether the ARGV-element is a long option.
+
+ If long_only and the ARGV-element has the form "-f", where f is
+ a valid short option, don't consider it an abbreviated form of
+ a long option that starts with f. Otherwise there would be no
+ way to give the -f short option.
+
+ On the other hand, if there's a long option "fubar" and
+ the ARGV-element is "-fu", do consider that an abbreviation of
+ the long option, just like "--fu", and not "-f" with arg "u".
+
+ This distinction seems to be the most useful approach. */
+
+ if (longopts != NULL
+ && (argv[optind][1] == '-'
+ || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1])))))
+ {
+ char *nameend;
+ const struct option *p;
+ const struct option *pfound = NULL;
+ int exact = 0;
+ int ambig = 0;
+ int indfound = -1;
+ int option_index;
+
+ for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
+ /* Do nothing. */ ;
+
+ /* Test all long options for either exact match
+ or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
+ if (!strncmp (p->name, nextchar, nameend - nextchar))
+ {
+ if ((unsigned int) (nameend - nextchar)
+ == (unsigned int) strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else
+ /* Second or later nonexact match found. */
+ ambig = 1;
+ }
+
+ if (ambig && !exact)
+ {
+ if (opterr)
+ fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
+ argv[0], argv[optind]);
+ nextchar += strlen (nextchar);
+ optind++;
+ optopt = 0;
+ return '?';
+ }
+
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ optind++;
+ if (*nameend)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ optarg = nameend + 1;
+ else
+ {
+ if (opterr)
+ {
+ if (argv[optind - 1][1] == '-')
+ /* --option */
+ fprintf (stderr,
+ _("%s: option `--%s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+ else
+ /* +option or -option */
+ fprintf (stderr,
+ _("%s: option `%c%s' doesn't allow an argument\n"),
+ argv[0], argv[optind - 1][0], pfound->name);
+ }
+
+ nextchar += strlen (nextchar);
+
+ optopt = pfound->val;
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (optind < argc)
+ optarg = argv[optind++];
+ else
+ {
+ if (opterr)
+ fprintf (stderr,
+ _("%s: option `%s' requires an argument\n"),
+ argv[0], argv[optind - 1]);
+ nextchar += strlen (nextchar);
+ optopt = pfound->val;
+ return optstring[0] == ':' ? ':' : '?';
+ }
+ }
+ nextchar += strlen (nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+
+ /* Can't find it as a long option. If this is not getopt_long_only,
+ or the option starts with '--' or is not a valid short
+ option, then it's an error.
+ Otherwise interpret it as a short option. */
+ if (!long_only || argv[optind][1] == '-'
+ || my_index (optstring, *nextchar) == NULL)
+ {
+ if (opterr)
+ {
+ if (argv[optind][1] == '-')
+ /* --option */
+ fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
+ argv[0], nextchar);
+ else
+ /* +option or -option */
+ fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
+ argv[0], argv[optind][0], nextchar);
+ }
+ nextchar = (char *) "";
+ optind++;
+ optopt = 0;
+ return '?';
+ }
+ }
+
+ /* Look at and handle the next short option-character. */
+
+ {
+ char c = *nextchar++;
+ char *temp = my_index (optstring, c);
+
+ /* Increment `optind' when we start to process its last character. */
+ if (*nextchar == '\0')
+ ++optind;
+
+ if (temp == NULL || c == ':')
+ {
+ if (opterr)
+ {
+ if (posixly_correct)
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr, _("%s: illegal option -- %c\n"),
+ argv[0], c);
+ else
+ fprintf (stderr, _("%s: invalid option -- %c\n"),
+ argv[0], c);
+ }
+ optopt = c;
+ return '?';
+ }
+ /* Convenience. Treat POSIX -W foo same as long option --foo */
+ if (temp[0] == 'W' && temp[1] == ';')
+ {
+ char *nameend;
+ const struct option *p;
+ const struct option *pfound = NULL;
+ int exact = 0;
+ int ambig = 0;
+ int indfound = 0;
+ int option_index;
+
+ /* This is an option that requires an argument. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ optind++;
+ }
+ else if (optind == argc)
+ {
+ if (opterr)
+ {
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr, _("%s: option requires an argument -- %c\n"),
+ argv[0], c);
+ }
+ optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
+ return c;
+ }
+ else
+ /* We already incremented `optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ optarg = argv[optind++];
+
+ /* optarg is now the argument, see if it's in the
+ table of longopts. */
+
+ for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++)
+ /* Do nothing. */ ;
+
+ /* Test all long options for either exact match
+ or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
+ if (!strncmp (p->name, nextchar, nameend - nextchar))
+ {
+ if ((unsigned int) (nameend - nextchar) == strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else
+ /* Second or later nonexact match found. */
+ ambig = 1;
+ }
+ if (ambig && !exact)
+ {
+ if (opterr)
+ fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
+ argv[0], argv[optind]);
+ nextchar += strlen (nextchar);
+ optind++;
+ return '?';
+ }
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ if (*nameend)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ optarg = nameend + 1;
+ else
+ {
+ if (opterr)
+ fprintf (stderr, _("\
+%s: option `-W %s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+
+ nextchar += strlen (nextchar);
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (optind < argc)
+ optarg = argv[optind++];
+ else
+ {
+ if (opterr)
+ fprintf (stderr,
+ _("%s: option `%s' requires an argument\n"),
+ argv[0], argv[optind - 1]);
+ nextchar += strlen (nextchar);
+ return optstring[0] == ':' ? ':' : '?';
+ }
+ }
+ nextchar += strlen (nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+ nextchar = NULL;
+ return 'W'; /* Let the application handle it. */
+ }
+ if (temp[1] == ':')
+ {
+ if (temp[2] == ':')
+ {
+ /* This is an option that accepts an argument optionally. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ optind++;
+ }
+ else
+ optarg = NULL;
+ nextchar = NULL;
+ }
+ else
+ {
+ /* This is an option that requires an argument. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ optind++;
+ }
+ else if (optind == argc)
+ {
+ if (opterr)
+ {
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr,
+ _("%s: option requires an argument -- %c\n"),
+ argv[0], c);
+ }
+ optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
+ }
+ else
+ /* We already incremented `optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ optarg = argv[optind++];
+ nextchar = NULL;
+ }
+ }
+ return c;
+ }
+}
+
+int
+getopt (argc, argv, optstring)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+{
+ return _getopt_internal (argc, argv, optstring,
+ (const struct option *) 0,
+ (int *) 0,
+ 0);
+}
+
+#endif /* Not ELIDE_CODE. */
+
+#ifdef TEST
+
+/* Compile with -DTEST to make an executable for use in testing
+ the above definition of `getopt'. */
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+
+ c = getopt (argc, argv, "abc:d:0123456789");
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
Added: trunk/theora/win32/getopt1.c
===================================================================
--- trunk/theora/win32/getopt1.c 2006-05-28 14:12:15 UTC (rev 11450)
+++ trunk/theora/win32/getopt1.c 2006-05-28 18:18:26 UTC (rev 11451)
@@ -0,0 +1,188 @@
+/* getopt_long and getopt_long_only entry points for GNU getopt.
+ Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "getopt_win.h"
+
+#if !defined __STDC__ || !__STDC__
+/* This is a separate conditional since some stdc systems
+ reject `defined (const)'. */
+#ifndef const
+#define const
+#endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
+#include <gnu-versions.h>
+#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+#define ELIDE_CODE
+#endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+int
+getopt_long (argc, argv, options, long_options, opt_index)
+ int argc;
+ char *const *argv;
+ const char *options;
+ const struct option *long_options;
+ int *opt_index;
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+ If an option that starts with '-' (not '--') doesn't match a long option,
+ but does match a short option, it is parsed as a short option
+ instead. */
+
+int
+getopt_long_only (argc, argv, options, long_options, opt_index)
+ int argc;
+ char *const *argv;
+ const char *options;
+ const struct option *long_options;
+ int *opt_index;
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
+}
+
+
+#endif /* Not ELIDE_CODE. */
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+ int option_index = 0;
+ static struct option long_options[] =
+ {
+ {"add", 1, 0, 0},
+ {"append", 0, 0, 0},
+ {"delete", 1, 0, 0},
+ {"verbose", 0, 0, 0},
+ {"create", 0, 0, 0},
+ {"file", 1, 0, 0},
+ {0, 0, 0, 0}
+ };
+
+ c = getopt_long (argc, argv, "abc:d:0123456789",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case 0:
+ printf ("option %s", long_options[option_index].name);
+ if (optarg)
+ printf (" with arg %s", optarg);
+ printf ("\n");
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case 'd':
+ printf ("option d with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
More information about the commits
mailing list