[xiph-commits] r12840 - in experimental/j/theora-mashup: . lib lib/enc lib/enc/x86_32 lib/enc/x86_64

Mon Apr 9 11:09:02 PDT 2007

Author: j
Date: 2007-04-09 11:08:48 -0700 (Mon, 09 Apr 2007)
New Revision: 12840

Added:
   experimental/j/theora-mashup/lib/enc/x86_32/dct_decode_mmx.c
   experimental/j/theora-mashup/lib/enc/x86_32/idct_mmx.c
   experimental/j/theora-mashup/lib/enc/x86_64/dct_decode_mmx.c
   experimental/j/theora-mashup/lib/enc/x86_64/idct_mmx.c
Modified:
   experimental/j/theora-mashup/configure.ac
   experimental/j/theora-mashup/lib/Makefile.am
   experimental/j/theora-mashup/lib/enc/codec_internal.h
   experimental/j/theora-mashup/lib/enc/dct.c
   experimental/j/theora-mashup/lib/enc/dct_decode.c
   experimental/j/theora-mashup/lib/enc/dsp.c
   experimental/j/theora-mashup/lib/enc/dsp.h
   experimental/j/theora-mashup/lib/enc/encoder_idct.c
   experimental/j/theora-mashup/lib/enc/encoder_quant.c
   experimental/j/theora-mashup/lib/enc/mcomp.c
   experimental/j/theora-mashup/lib/enc/reconstruct.c
Log:
merge changes from trunk



Modified: experimental/j/theora-mashup/configure.ac
===================================================================

--- experimental/j/theora-mashup/configure.ac	2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/configure.ac	2007-04-09 18:08:48 UTC (rev 12840)
@@ -24,7 +24,7 @@
 AC_SUBST(V_LIB_AGE)
 
 dnl Extra linker options (for version script)
-SHLIB_VERSION_ARG=""
+THEORA_LDFLAGS=""
 
 dnl --------------------------------------------------  
 dnl Check for programs
@@ -115,6 +115,10 @@
 		cpu_x86_32=yes 
 		cpu_optimization="32 bit x86"
 		AC_DEFINE([USE_ASM], [],  [make use of asm optimization])
+		if test "x$target_vendor" = "xapple"; then
+			THEORA_LDFLAGS="$THEORA_LDFLAGS  -Wl,-read_only_relocs,suppress"
+		fi
+		
 		AC_DEFINE([OC_X86ASM], [], [enable x86 assambler optimization])
 		AM_CONDITIONAL(OC_X86ASM,true)
     	;;
@@ -146,9 +150,10 @@
 	*)
 		;;
    esac
+   THEORA_LDFLAGS="$THEORA_LDFLAGS SHLIB_VERSION_ARG"
 fi
 
-AC_SUBST(SHLIB_VERSION_ARG)
+AC_SUBST(THEORA_LDFLAGS)
 
 dnl --------------------------------------------------
 dnl Checks for support libraries and headers

Modified: experimental/j/theora-mashup/lib/Makefile.am
===================================================================
--- experimental/j/theora-mashup/lib/Makefile.am	2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/Makefile.am	2007-04-09 18:08:48 UTC (rev 12840)
@@ -1,14 +1,17 @@
 INCLUDES = -I$(top_srcdir)/include -I$(top_srcdir)/lib -I$(top_srcdir)/lib/dec -I$(top_srcdir)/lib/enc
 
 EXTRA_DIST = Version_script.in \
+        enc/x86_32/dct_decode_mmx.c \
         enc/x86_32/dsp_mmx.c \
         enc/x86_32/dsp_mmxext.c \
         enc/x86_32/recon_mmx.c \
         enc/x86_32/fdct_mmx.c \
+        enc/x86_32/idct_mmx.c \
         enc/x86_64/dsp_mmx.c \
         enc/x86_64/dsp_mmxext.c \
         enc/x86_64/recon_mmx.c \
         enc/x86_64/fdct_mmx.c \
+        enc/x86_64/idct_mmx.c \
         enc/x86_32_vs/dsp_mmx.c \
         enc/x86_32_vs/fdct_mmx.c \
         enc/x86_32_vs/recon_mmx.c \
@@ -46,17 +49,21 @@
 if CPU_x86_64
 enc_arch_dir = enc/x86_64
 encoder_arch_sources= \
+	$(arch_dir)/dct_decode_mmx.c \
 	$(arch_dir)/dsp_mmx.c \
 	$(arch_dir)/dsp_mmxext.c \
 	$(arch_dir)/recon_mmx.c \
+	$(arch_dir)/idct_mmx.c \
 	$(arch_dir)/fdct_mmx.c
 else
 if CPU_x86_32
 arch_dir = enc/x86_32
 encoder_arch_sources= \
+	$(arch_dir)/dct_decode_mmx.c \
 	$(arch_dir)/dsp_mmx.c \
 	$(arch_dir)/dsp_mmxext.c \
 	$(arch_dir)/recon_mmx.c \
+	$(arch_dir)/idct_mmx.c \
 	$(arch_dir)/fdct_mmx.c
 endif
 endif
@@ -124,7 +131,7 @@
 	
 
 libtheora_la_CFLAGS = $(OGG_CFLAGS)
-libtheora_la_LDFLAGS = -version-info @V_LIB_CURRENT@:@V_LIB_REVISION@:@V_LIB_AGE@ @SHLIB_VERSION_ARG@
+libtheora_la_LDFLAGS = -version-info @V_LIB_CURRENT@:@V_LIB_REVISION@:@V_LIB_AGE@ @THEORA_LDFLAGS@
 libtheora_la_LIBADD = $(OGG_LIBS)
 
 debug:

Modified: experimental/j/theora-mashup/lib/enc/codec_internal.h
===================================================================
--- experimental/j/theora-mashup/lib/enc/codec_internal.h	2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/codec_internal.h	2007-04-09 18:08:48 UTC (rev 12840)
@@ -453,7 +453,7 @@
 
   /* Loop filter bounding values */
   unsigned char  LoopFilterLimits[Q_TABLE_SIZE];
-  ogg_int32_t    FiltBoundingValue[512];
+  ogg_int16_t    FiltBoundingValue[256];
 
   /* Dequantiser and rounding tables */
   ogg_uint32_t   QThreshTable[Q_TABLE_SIZE];
@@ -706,15 +706,6 @@
 extern void InitPBInstance(PB_INSTANCE *pbi);
 extern void ClearPBInstance(PB_INSTANCE *pbi);
 
-
-extern void IDctSlow(  Q_LIST_ENTRY * InputData,
-                       ogg_int16_t *QuantMatrix,
-                       ogg_int16_t * OutputData ) ;
-
-extern void IDct10( Q_LIST_ENTRY * InputData,
-                    ogg_int16_t *QuantMatrix,
-                    ogg_int16_t * OutputData );
-
 extern void IDct1( Q_LIST_ENTRY * InputData,
                    ogg_int16_t *QuantMatrix,
                    ogg_int16_t * OutputData );

Modified: experimental/j/theora-mashup/lib/enc/dct.c
===================================================================
--- experimental/j/theora-mashup/lib/enc/dct.c	2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/dct.c	2007-04-09 18:08:48 UTC (rev 12840)
@@ -257,6 +257,8 @@
 void dsp_dct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
 {
   funcs->fdct_short = fdct_short__c;
+  dsp_dct_decode_init(funcs, cpu_flags);
+  dsp_idct_init(funcs, cpu_flags);
 #if defined(USE_ASM)
   if (cpu_flags & OC_CPU_X86_MMX) {
     dsp_mmx_fdct_init(funcs);

Modified: experimental/j/theora-mashup/lib/enc/dct_decode.c
===================================================================
--- experimental/j/theora-mashup/lib/enc/dct_decode.c	2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/dct_decode.c	2007-04-09 18:08:48 UTC (rev 12840)
@@ -18,7 +18,6 @@
 #include <stdlib.h>
 #include <string.h>
 #include "codec_internal.h"
-#include "dsp.h"
 
 
 #define GOLDEN_FRAME_THRESH_Q   50
@@ -28,7 +27,7 @@
 #define PL 1
 #define HIGHBITDUPPED(X) (((signed short) X)  >> 15)
 
-/* in-loop filter tables. one of these is used in dct_decode.c */
+/* in-loop filter tables. */
 
 static const unsigned char LoopFilterLimitValuesV1[Q_TABLE_SIZE] = {
   30, 25, 20, 20, 15, 15, 14, 14,
@@ -41,27 +40,16 @@
   0,  0,  0,  0,  0,  0,  0,  0
 };
 
-static const unsigned char LoopFilterLimitValuesV2[Q_TABLE_SIZE] = {
-  30, 25, 20, 20, 15, 15, 14, 14,
-  13, 13, 12, 12, 11, 11, 10, 10,
-  9,  9,  8,  8,  7,  7,  7,  7,
-  6,  6,  6,  6,  5,  5,  5,  5,
-  4,  4,  4,  4,  3,  3,  3,  3,
-  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,
-  1,  1,  1,  1,  1,  1,  1,  1
-};
-
 static const int ModeUsesMC[MAX_MODES] = { 0, 0, 1, 1, 1, 0, 1, 1 };
 
 static void SetupBoundingValueArray_Generic(PB_INSTANCE *pbi,
                                             ogg_int32_t FLimit){
 
-  ogg_int32_t * BoundingValuePtr = pbi->FiltBoundingValue+256;
+  ogg_int16_t * BoundingValuePtr = pbi->FiltBoundingValue+127;
   ogg_int32_t i;
 
   /* Set up the bounding value array. */
-  memset ( pbi->FiltBoundingValue, 0, (512*sizeof(*pbi->FiltBoundingValue)) );
+  memset ( pbi->FiltBoundingValue, 0, (256*sizeof(*pbi->FiltBoundingValue)) );
   for ( i = 0; i < FLimit; i++ ){
     BoundingValuePtr[-i-FLimit] = (-FLimit+i);
     BoundingValuePtr[-i] = -i;
@@ -107,8 +95,6 @@
 void SetupLoopFilter(PB_INSTANCE *pbi){
   ogg_int32_t FLimit;
 
-  /* nb: this was using the V2 values rather than V1
-     we think is was a mistake; the results were not used */
   FLimit = pbi->LoopFilterLimits[pbi->FrameQIndex];
   SetupBoundingValueArray_Generic(pbi, FLimit);
 }
@@ -137,11 +123,14 @@
   case 0:case 1:
     IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
     break;
-  case 2: case 3:case 4:case 5:case 6:case 7:case 8: case 9:case 10:
-    IDct10( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+  case 2: case 3:
+    dsp_IDct3(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
     break;
+  case 4:case 5:case 6:case 7:case 8: case 9:case 10:
+    dsp_IDct10(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+    break;
   default:
-    IDctSlow( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+    dsp_IDctSlow(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
   }
 
   /* Convert fragment number to a pixel offset in a reconstruction buffer. */
@@ -217,11 +206,14 @@
   case 0:case 1:
     IDct1( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
     break;
-  case 2: case 3:case 4:case 5:case 6:case 7:case 8: case 9:case 10:
-    IDct10( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+  case 2: case 3:
+    dsp_IDct3(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
     break;
+  case 4:case 5:case 6:case 7:case 8: case 9:case 10:
+    dsp_IDct10(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+    break;
   default:
-    IDctSlow( pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
+    dsp_IDctSlow(pbi->dsp, pbi->quantized_list, pbi->dequant_coeffs, pbi->ReconDataBuffer );
   }
 
   /* Convert fragment number to a pixel offset in a reconstruction buffer. */
@@ -283,7 +275,7 @@
       /* Reconstruct the pixel dats from the reference frame and change data
          (no half pixel in this case as the two references were the same. */
       dsp_recon_inter8x8 (pbi->dsp,
-		  &pbi->ThisFrameRecon[ReconPixelIndex],
+          &pbi->ThisFrameRecon[ReconPixelIndex],
                   LastFrameRecPtr, pbi->ReconDataBuffer,
                   ReconPixelsPerLine);
     }else{
@@ -673,9 +665,9 @@
   }
 }
 
-static void FilterHoriz(unsigned char * PixelPtr,
+static void FilterHoriz__c(unsigned char * PixelPtr,
                         ogg_int32_t LineLength,
-                        ogg_int32_t *BoundingValuePtr){
+                        ogg_int16_t *BoundingValuePtr){
   ogg_int32_t j;
   ogg_int32_t FiltVal;
 
@@ -695,18 +687,16 @@
   }
 }
 
-static void FilterVert(unsigned char * PixelPtr,
+static void FilterVert__c(unsigned char * PixelPtr,
                 ogg_int32_t LineLength,
-                ogg_int32_t *BoundingValuePtr){
+                ogg_int16_t *BoundingValuePtr){
   ogg_int32_t j;
   ogg_int32_t FiltVal;
-
+  PixelPtr -= 2*LineLength;
   /* the math was correct, but negative array indicies are forbidden
      by ANSI/C99 and will break optimization on several modern
      compilers */
 
-  PixelPtr -= 2*LineLength;
-
   for ( j = 0; j < 8; j++ ) {
     FiltVal = ( (ogg_int32_t)PixelPtr[0] ) -
       ( (ogg_int32_t)PixelPtr[LineLength] * 3 ) +
@@ -725,7 +715,7 @@
 void LoopFilter(PB_INSTANCE *pbi){
   ogg_int32_t i;
 
-  ogg_int32_t * BoundingValuePtr=pbi->FiltBoundingValue+256;
+  ogg_int16_t * BoundingValuePtr=pbi->FiltBoundingValue+127;
   int FragsAcross=pbi->HFragments;
   int FromFragment,ToFragment;
   int FragsDown = pbi->VFragments;
@@ -790,14 +780,14 @@
       /* Filter right hand border only if the block to the right is
          not coded */
       if ( !pbi->display_fragments[ i + 1 ] ){
-        FilterHoriz(pbi->LastFrameRecon+
+        dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
                     pbi->recon_pixel_index_table[i]+6,
                     LineLength,BoundingValuePtr);
       }
 
       /* Bottom done if next row set */
       if( !pbi->display_fragments[ i + LineFragments] ){
-        FilterVert(pbi->LastFrameRecon+
+        dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
                    pbi->recon_pixel_index_table[i+LineFragments],
                    LineLength, BoundingValuePtr);
       }
@@ -809,21 +799,21 @@
     for ( n = 1 ; n < FragsAcross - 1 ; n++, i++) {
       if( pbi->display_fragments[i]){
         /* Filter Left edge always */
-        FilterHoriz(pbi->LastFrameRecon+
+        dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
                     pbi->recon_pixel_index_table[i]-2,
                     LineLength, BoundingValuePtr);
 
         /* Filter right hand border only if the block to the right is
            not coded */
         if ( !pbi->display_fragments[ i + 1 ] ){
-          FilterHoriz(pbi->LastFrameRecon+
+          dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
                       pbi->recon_pixel_index_table[i]+6,
                       LineLength, BoundingValuePtr);
         }
 
         /* Bottom done if next row set */
         if( !pbi->display_fragments[ i + LineFragments] ){
-          FilterVert(pbi->LastFrameRecon+
+          dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
                      pbi->recon_pixel_index_table[i + LineFragments],
                      LineLength, BoundingValuePtr);
         }
@@ -835,13 +825,13 @@
     /* Last Column */
     if( pbi->display_fragments[i]){
       /* Filter Left edge always */
-      FilterHoriz(pbi->LastFrameRecon+
+      dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
                   pbi->recon_pixel_index_table[i] - 2 ,
                   LineLength, BoundingValuePtr);
 
       /* Bottom done if next row set */
       if( !pbi->display_fragments[ i + LineFragments] ){
-        FilterVert(pbi->LastFrameRecon+
+        dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
                    pbi->recon_pixel_index_table[i + LineFragments],
                    LineLength, BoundingValuePtr);
       }
@@ -859,21 +849,21 @@
          all fragments are intra */
       if( pbi->display_fragments[i]){
         /* TopRow is always done */
-        FilterVert(pbi->LastFrameRecon+
+        dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
                    pbi->recon_pixel_index_table[i],
                    LineLength, BoundingValuePtr);
 
         /* Filter right hand border only if the block to the right is
            not coded */
         if ( !pbi->display_fragments[ i + 1 ] ){
-          FilterHoriz(pbi->LastFrameRecon+
+          dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
                       pbi->recon_pixel_index_table[i] + 6,
                       LineLength, BoundingValuePtr);
         }
 
         /* Bottom done if next row set */
         if( !pbi->display_fragments[ i + LineFragments] ){
-          FilterVert(pbi->LastFrameRecon+
+          dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
                      pbi->recon_pixel_index_table[i + LineFragments],
                      LineLength, BoundingValuePtr);
         }
@@ -885,26 +875,26 @@
       for ( n = 1 ; n < FragsAcross - 1 ; n++, i++){
         if( pbi->display_fragments[i]){
           /* Filter Left edge always */
-          FilterHoriz(pbi->LastFrameRecon+
+          dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
                       pbi->recon_pixel_index_table[i] - 2,
                       LineLength, BoundingValuePtr);
 
           /* TopRow is always done */
-          FilterVert(pbi->LastFrameRecon+
+          dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
                      pbi->recon_pixel_index_table[i],
                      LineLength, BoundingValuePtr);
 
           /* Filter right hand border only if the block to the right
              is not coded */
           if ( !pbi->display_fragments[ i + 1 ] ){
-            FilterHoriz(pbi->LastFrameRecon+
+            dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
                         pbi->recon_pixel_index_table[i] + 6,
                         LineLength, BoundingValuePtr);
           }
 
           /* Bottom done if next row set */
           if( !pbi->display_fragments[ i + LineFragments] ){
-            FilterVert(pbi->LastFrameRecon+
+            dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
                        pbi->recon_pixel_index_table[i + LineFragments],
                        LineLength, BoundingValuePtr);
           }
@@ -915,18 +905,18 @@
       /* Last Column */
       if( pbi->display_fragments[i]){
         /* Filter Left edge always*/
-        FilterHoriz(pbi->LastFrameRecon+
+        dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
                     pbi->recon_pixel_index_table[i] - 2,
                     LineLength, BoundingValuePtr);
 
         /* TopRow is always done */
-        FilterVert(pbi->LastFrameRecon+
+        dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
                    pbi->recon_pixel_index_table[i],
                    LineLength, BoundingValuePtr);
 
         /* Bottom done if next row set */
         if( !pbi->display_fragments[ i + LineFragments] ){
-          FilterVert(pbi->LastFrameRecon+
+          dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
                      pbi->recon_pixel_index_table[i + LineFragments],
                      LineLength, BoundingValuePtr);
         }
@@ -944,14 +934,14 @@
     if( pbi->display_fragments[i]){
 
       /* TopRow is always done */
-      FilterVert(pbi->LastFrameRecon+
+      dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
                  pbi->recon_pixel_index_table[i],
                  LineLength, BoundingValuePtr);
 
       /* Filter right hand border only if the block to the right is
          not coded */
       if ( !pbi->display_fragments[ i + 1 ] ){
-        FilterHoriz(pbi->LastFrameRecon+
+        dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
                     pbi->recon_pixel_index_table[i] + 6,
                     LineLength, BoundingValuePtr);
       }
@@ -963,19 +953,19 @@
     for ( n = 1 ; n < FragsAcross - 1 ; n++, i++){
       if( pbi->display_fragments[i]){
         /* Filter Left edge always */
-        FilterHoriz(pbi->LastFrameRecon+
+        dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
                     pbi->recon_pixel_index_table[i] - 2,
                     LineLength, BoundingValuePtr);
 
         /* TopRow is always done */
-        FilterVert(pbi->LastFrameRecon+
+        dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
                    pbi->recon_pixel_index_table[i],
                    LineLength, BoundingValuePtr);
 
         /* Filter right hand border only if the block to the right is
            not coded */
         if ( !pbi->display_fragments[ i + 1 ] ){
-          FilterHoriz(pbi->LastFrameRecon+
+          dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
                       pbi->recon_pixel_index_table[i] + 6,
                       LineLength, BoundingValuePtr);
         }
@@ -986,12 +976,12 @@
     /* Last Column */
     if( pbi->display_fragments[i]){
       /* Filter Left edge always */
-      FilterHoriz(pbi->LastFrameRecon+
+      dsp_FilterHoriz(pbi->dsp,pbi->LastFrameRecon+
                   pbi->recon_pixel_index_table[i] - 2,
                   LineLength, BoundingValuePtr);
 
       /* TopRow is always done */
-      FilterVert(pbi->LastFrameRecon+
+      dsp_FilterVert(pbi->dsp,pbi->LastFrameRecon+
                  pbi->recon_pixel_index_table[i],
                  LineLength, BoundingValuePtr);
 
@@ -1222,3 +1212,14 @@
     UpdateUMVBorder(pbi, pbi->GoldenFrame);
   }
 }
+
+void dsp_dct_decode_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
+{
+  funcs->FilterVert = FilterVert__c;
+  funcs->FilterHoriz = FilterHoriz__c;
+#if defined(USE_ASM)
+  if (cpu_flags & OC_CPU_X86_MMX) {
+    dsp_mmx_dct_decode_init(funcs);
+  }
+#endif
+}

Modified: experimental/j/theora-mashup/lib/enc/dsp.c
===================================================================
--- experimental/j/theora-mashup/lib/enc/dsp.c	2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/dsp.c	2007-04-09 18:08:48 UTC (rev 12840)
@@ -16,8 +16,6 @@
  ********************************************************************/
 
 #include <stdlib.h>
-#include "cpu.h"
-#include "dsp.h"
 #include "codec_internal.h"
 
 #define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
@@ -404,7 +402,7 @@
 {
   ogg_uint32_t cpuflags;
 
-  cpuflags = oc_cpu_flags_get();
+  cpuflags = oc_cpu_flags_get ();
   dsp_init (funcs);
 
   dsp_recon_init (funcs, cpuflags);

Modified: experimental/j/theora-mashup/lib/enc/dsp.h
===================================================================
--- experimental/j/theora-mashup/lib/enc/dsp.h	2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/dsp.h	2007-04-09 18:08:48 UTC (rev 12840)
@@ -18,7 +18,8 @@
 #ifndef DSP_H
 #define DSP_H
 
-#include <theora/theora.h>
+#include "theora/theora.h"
+#include "cpu.h"
 
 typedef struct
 {
@@ -77,10 +78,27 @@
   ogg_uint32_t (*inter8x8_err_xy2)(unsigned char *SrcData, ogg_uint32_t SrcStride,
 		                 unsigned char *RefDataPtr1,
 			         unsigned char *RefDataPtr2, ogg_uint32_t RefStride);
+			         
+  void (*FilterHoriz) (unsigned char * PixelPtr,
+                ogg_int32_t LineLength, ogg_int16_t *BoundingValuePtr);
+
+  void (*FilterVert) (unsigned char * PixelPtr,
+                 ogg_int32_t LineLength, ogg_int16_t *BoundingValuePtr);
+
+   void (*IDctSlow) (ogg_int16_t *InputData, 
+                  ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData);
+
+    void (*IDct3) (ogg_int16_t *InputData, 
+                   ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData);
+                   
+    void (*IDct10) (ogg_int16_t *InputData, 
+                  ogg_int16_t *QuantMatrix, ogg_int16_t *OutputData);
 } DspFunctions;
 
 extern void dsp_dct_init(DspFunctions *funcs, ogg_uint32_t cpu_flags);
 extern void dsp_recon_init (DspFunctions *funcs, ogg_uint32_t cpu_flags);
+extern void dsp_dct_decode_init(DspFunctions *funcs, ogg_uint32_t cpu_flags);
+extern void dsp_idct_init(DspFunctions *funcs, ogg_uint32_t cpu_flags);
 
 void dsp_init(DspFunctions *funcs);
 void dsp_static_init(DspFunctions *funcs);
@@ -89,6 +107,8 @@
 extern void dsp_mmxext_init(DspFunctions *funcs);
 extern void dsp_mmx_fdct_init(DspFunctions *funcs);
 extern void dsp_mmx_recon_init(DspFunctions *funcs);
+extern void dsp_mmx_dct_decode_init(DspFunctions *funcs);
+extern void dsp_mmx_idct_init(DspFunctions *funcs);
 #endif
 
 #define dsp_save_fpu(funcs) (funcs.save_fpu ())
@@ -132,5 +152,19 @@
 #define dsp_inter8x8_err_xy2(funcs,ptr1,str1,ptr2,ptr3,str2) \
 	(funcs.inter8x8_err_xy2 (ptr1,str1,ptr2,ptr3,str2))
 
+#define dsp_FilterHoriz(funcs, ptr1, ptr2, ptr3) \
+  (funcs.FilterHoriz(ptr1, ptr2, ptr3))
 
+#define dsp_FilterVert(funcs, ptr1, ptr2, ptr3) \
+  (funcs.FilterVert(ptr1, ptr2, ptr3))
+
+#define dsp_IDctSlow(funcs, ptr1, ptr2, ptr3) \
+    (funcs.IDctSlow(ptr1, ptr2, ptr3))
+
+#define dsp_IDct3(funcs, ptr1, ptr2, ptr3) \
+    (funcs.IDctSlow(ptr1, ptr2, ptr3))
+
+#define dsp_IDct10(funcs, ptr1, ptr2, ptr3) \
+   (funcs.IDctSlow(ptr1, ptr2, ptr3))
+
 #endif /* DSP_H */

Modified: experimental/j/theora-mashup/lib/enc/encoder_idct.c
===================================================================
--- experimental/j/theora-mashup/lib/enc/encoder_idct.c	2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/encoder_idct.c	2007-04-09 18:08:48 UTC (rev 12840)
@@ -17,10 +17,10 @@
 
 #include <string.h>
 #include "codec_internal.h"
+
 #include "quant_lookup.h"
 
 #define IdctAdjustBeforeShift 8
-
 /* cos(n*pi/16) or sin(8-n)*pi/16) */
 #define xC1S7 64277
 #define xC2S6 60547
@@ -31,6 +31,7 @@
 #define xC7S1 12785
 
 /* compute the 16 bit signed 1D inverse DCT - spec version */
+/*
 static void idct_short__c ( ogg_int16_t * InputData, ogg_int16_t * OutputData ) {
   ogg_int32_t t[8], r;
   ogg_int16_t *y = InputData;
@@ -108,6 +109,7 @@
   x[7] = r;
 
 }
+*/
 
 static void dequant_slow( ogg_int16_t * dequant_coeffs,
                    ogg_int16_t * quantized_list,
@@ -119,7 +121,7 @@
 
 
 
-void IDctSlow(  Q_LIST_ENTRY * InputData,
+void IDctSlow__c(  Q_LIST_ENTRY * InputData,
                 ogg_int16_t *QuantMatrix,
                 ogg_int16_t * OutputData ) {
   ogg_int32_t IntermediateData[64];
@@ -348,7 +350,7 @@
 
 }
 
-void IDct10( Q_LIST_ENTRY * InputData,
+void IDct10__c( Q_LIST_ENTRY * InputData,
              ogg_int16_t *QuantMatrix,
              ogg_int16_t * OutputData ){
   ogg_int32_t IntermediateData[64];
@@ -553,3 +555,15 @@
     OutputData[loop]=OutD;
 
 }
+
+void dsp_idct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
+{
+  funcs->IDctSlow = IDctSlow__c;
+  funcs->IDct10 = IDct10__c;
+  funcs->IDct3 = IDct10__c;
+#if defined(USE_ASM)
+  if (cpu_flags & OC_CPU_X86_MMX) {
+    dsp_mmx_idct_init(funcs);
+  }
+#endif
+}

Modified: experimental/j/theora-mashup/lib/enc/encoder_quant.c
===================================================================
--- experimental/j/theora-mashup/lib/enc/encoder_quant.c	2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/encoder_quant.c	2007-04-09 18:08:48 UTC (rev 12840)
@@ -551,7 +551,7 @@
 void select_UV_quantiser ( PB_INSTANCE *pbi ){
   pbi->fquant_coeffs = pbi->fp_quant_UV_coeffs;
   pbi->fquant_round = pbi->fp_quant_UV_round;
-  pbi->fquant_ZbSize = pbi->fp_quant_UV_round;
+  pbi->fquant_ZbSize = pbi->fp_ZeroBinSize_UV;
 }
 
 void select_InterUV_quantiser ( PB_INSTANCE *pbi ){

Modified: experimental/j/theora-mashup/lib/enc/mcomp.c
===================================================================
--- experimental/j/theora-mashup/lib/enc/mcomp.c	2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/mcomp.c	2007-04-09 18:08:48 UTC (rev 12840)
@@ -17,7 +17,6 @@
 
 #include <stdlib.h>
 #include <stdio.h>
-#include "dsp.h"
 #include "codec_internal.h"
 
 /* Initialises motion compentsation. */

Modified: experimental/j/theora-mashup/lib/enc/reconstruct.c
===================================================================
--- experimental/j/theora-mashup/lib/enc/reconstruct.c	2007-04-09 17:35:23 UTC (rev 12839)
+++ experimental/j/theora-mashup/lib/enc/reconstruct.c	2007-04-09 18:08:48 UTC (rev 12840)
@@ -16,8 +16,6 @@
  ********************************************************************/
 
 #include "codec_internal.h"
-#include "dsp.h"
-#include "cpu.h"
 
 static void copy8x8__c (unsigned char *src,
 	                unsigned char *dest,

Copied: experimental/j/theora-mashup/lib/enc/x86_32/dct_decode_mmx.c (from rev 12835, trunk/theora/lib/x86_32/dct_decode_mmx.c)
===================================================================
--- trunk/theora/lib/x86_32/dct_decode_mmx.c	2007-04-08 13:04:56 UTC (rev 12835)
+++ experimental/j/theora-mashup/lib/enc/x86_32/dct_decode_mmx.c	2007-04-09 18:08:48 UTC (rev 12840)
@@ -0,0 +1,184 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: dct_decode_mmx.c 12440 2007-02-06 16:36:26Z j $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+
+#include "codec_internal.h"
+
+#if defined(USE_ASM)
+
+static const __attribute__((aligned(8),used)) ogg_int64_t V3= 0x0003000300030003LL;
+static const __attribute__((aligned(8),used)) ogg_int64_t V804= 0x0804080408040804LL;
+
+#if defined(__APPLE__)
+#define MANGLE(x) "_"#x
+#else
+#define MANGLE(x) #x
+#endif
+
+static void FilterHoriz__mmx(unsigned char * PixelPtr,
+                        ogg_int32_t LineLength,
+                        ogg_int16_t *BoundingValuePtr){
+
+#define OC_LOOP_H_4x4                                                   \
+    __asm__ __volatile__(                                               \
+    "lea (%1,%1,2),%%esi\n"     /* esi = ystride*3 */                   \
+    "movd (%0), %%mm0\n"        /* 0 0 0 0 3 2 1 0 */                   \
+    "movd (%0,%1),%%mm1\n"      /* 0 0 0 0 7 6 5 4 */                   \
+    "movd (%0,%1,2),%%mm2\n"    /* 0 0 0 0 b a 9 8 */                   \
+    "movd (%0,%%esi),%%mm3\n"   /* 0 0 0 0 f e d c */                   \
+    "punpcklbw %%mm1,%%mm0\n"   /* mm0 = 7 3 6 2 5 1 4 0 */             \
+    "punpcklbw %%mm3,%%mm2\n"   /* mm2 = f b e a d 9 c 8 */             \
+    "movq %%mm0,%%mm1\n"        /* mm1 = 7 3 6 2 5 1 4 0 */             \
+    "punpcklwd %%mm2,%%mm1\n"   /* mm1 = d 9 5 1 c 8 4 0 */             \
+    "punpckhwd %%mm2,%%mm0\n"   /* mm0 = f b 7 3 e a 6 2 */             \
+    "pxor %%mm7,%%mm7\n"                                                \
+    "movq %%mm1,%%mm5\n"        /* mm5 = d 9 5 1 c 8 4 0 */             \
+    "punpckhbw %%mm7,%%mm5\n"   /* mm5 = 0 d 0 9 0 5 0 1 = pix[1]*/     \
+    "punpcklbw %%mm7,%%mm1\n"   /* mm1 = 0 c 0 8 0 4 0 0 = pix[0]*/     \
+    "movq %%mm0,%%mm3\n"        /* mm3 = f b 7 3 e a 6 2 */             \
+    "punpckhbw %%mm7,%%mm3\n"   /* mm3 = 0 f 0 b 0 7 0 3 = pix[3]*/     \
+    "punpcklbw %%mm7,%%mm0\n"       /* mm0 = 0 e 0 a 0 6 0 2 = pix[2]*/ \
+                                                                        \
+    "psubw %%mm3,%%mm1\n"       /* mm1 = pix[0]-pix[3] mm1 - mm3 */     \
+    "movq %%mm0,%%mm7\n"        /* mm7 = pix[2]*/                       \
+    "psubw %%mm5,%%mm0\n"       /* mm0 = pix[2]-pix[1] mm0 - mm5*/      \
+    "PMULLW "MANGLE(V3)",%%mm0\n" /* *3 */                              \
+    "paddw %%mm0,%%mm1\n"         /* mm1 has f[0] ... f[4]*/            \
+    "paddw "MANGLE(V804)",%%mm1\n"/* add 4 */ /* add 256 after shift */ \
+    "psraw $3,%%mm1\n"          /* >>3 */                               \
+    " pextrw $0,%%mm1,%%esi\n"  /* In MM1 we have 4 f coefs (16bits) */ \
+    " pextrw $1,%%mm1,%%edi\n"  /* now perform MM4 = *(_bv+ f) */       \
+    " pinsrw $0,(%2,%%esi,2),%%mm4\n"                                   \
+    " pextrw $2,%%mm1,%%esi\n"                                          \
+    " pinsrw $1,(%2,%%edi,2),%%mm4\n"                                   \
+    " pextrw $3,%%mm1,%%edi\n"                                          \
+    " pinsrw $2,(%2,%%esi,2),%%mm4\n"                                   \
+    " pinsrw $3,(%2,%%edi,2),%%mm4\n" /* new f vals loaded */           \
+    "pxor %%mm0,%%mm0\n"                                                \
+    " paddw %%mm4,%%mm5\n"      /*(pix[1]+f);*/                         \
+    " psubw %%mm4,%%mm7\n"      /* (pix[2]-f); */                       \
+    " packuswb %%mm0,%%mm5\n"   /* mm5 = x x x x newpix1 */             \
+    " packuswb %%mm0,%%mm7\n"   /* mm7 = x x x x newpix2 */             \
+    " punpcklbw %%mm7,%%mm5\n"  /* 2 1 2 1 2 1 2 1 */                   \
+    " movd %%mm5,%%eax\n"       /* eax = newpix21 */                    \
+    " movw %%ax,1(%0)\n"                                                \
+    " psrlq $32,%%mm5\n"        /* why is so big stall here ? */        \
+    " shrl $16,%%eax\n"                                                 \
+    " lea 1(%0,%1,2),%%edi\n"                                           \
+    " movw %%ax,1(%0,%1,1)\n"                                           \
+    " movd %%mm5,%%eax\n"       /* eax = newpix21 high part */          \
+    " lea (%1,%1,2),%%esi\n"                                            \
+    " movw %%ax,(%%edi)\n"                                              \
+    " shrl $16,%%eax\n"                                                 \
+    " movw %%ax,1(%0,%%esi)\n"                                          \
+    :                                                                   \
+    : "r" (PixelPtr), "r" (LineLength), "r" (BoundingValuePtr-256)      \
+    : "esi", "edi" , "memory", "eax"                                    \
+    );
+
+    OC_LOOP_H_4x4
+    PixelPtr += LineLength*4;
+    OC_LOOP_H_4x4
+    __asm__ __volatile__("emms\n");
+}
+
+static void FilterVert__mmx(unsigned char * PixelPtr,
+                ogg_int32_t LineLength,
+                ogg_int16_t *BoundingValuePtr){
+    __asm__ __volatile__(
+    "pxor %%mm0,%%mm0\n"        /* mm0 = 0 */
+    "movq (%0),%%mm7\n"         /* mm7 = pix[0..7] */
+    "lea (%1,%1,2),%%esi\n"     /* esi = ystride*3 */
+    "movq (%0,%%esi),%%mm4\n"   /* mm4 = pix[0..7+ystride*3] */
+    "movq %%mm7,%%mm6\n"        /* mm6 = pix[0..7] */
+    "punpcklbw %%mm0,%%mm6\n"   /* expand unsigned pix[0..3] to 16 bits */
+    "movq %%mm4,%%mm5\n"
+    "punpckhbw %%mm0,%%mm7\n"   /* expand unsigned pix[4..7] to 16 bits */
+    "punpcklbw %%mm0,%%mm4\n"   /* expand other arrays too */
+    "punpckhbw %%mm0,%%mm5\n"
+    "psubw %%mm4,%%mm6\n"       /* mm6 = mm6 - mm4 */
+    "psubw %%mm5,%%mm7\n"       /* mm7 = mm7 - mm5 */
+                /* mm7:mm6 = _p[0]-_p[ystride*3] */
+    "movq (%0,%1),%%mm4\n"      /* mm4 = pix[0..7+ystride] */
+    "movq %%mm4,%%mm5\n"
+    "movq (%0,%1,2),%%mm2\n"    /* mm2 = pix[0..7+ystride*2] */
+    "movq %%mm2,%%mm3\n"
+    "movq %%mm2,%%mm1\n"        //ystride*2
+    "punpckhbw %%mm0,%%mm5\n"
+    "punpcklbw %%mm0,%%mm4\n"
+    "punpckhbw %%mm0,%%mm3\n"
+    "punpcklbw %%mm0,%%mm2\n"
+    "psubw %%mm5,%%mm3\n"
+    "psubw %%mm4,%%mm2\n"
+                /* mm3:mm2 = (pix[ystride*2]-pix[ystride]); */
+    "PMULLW "MANGLE(V3)",%%mm3\n"    /* *3 */
+    "PMULLW "MANGLE(V3)",%%mm2\n"    /* *3 */
+    "paddw %%mm7,%%mm3\n"            /* highpart */
+    "paddw %%mm6,%%mm2\n"            /* lowpart of pix[0]-pix[ystride*3]+3*(pix[ystride*2]-pix[ystride]);  */
+    "paddw "MANGLE(V804)",%%mm3\n"   /* add 4 */ /* add 256 after shift */
+    "paddw "MANGLE(V804)",%%mm2\n"   /* add 4 */ /* add 256 after shift */
+    "psraw $3,%%mm3\n"               /* >>3 f coefs high */
+    "psraw $3,%%mm2\n"               /* >>3 f coefs low */
+
+    " pextrw $0,%%mm2,%%esi\n"  /* In MM3:MM2 we have f coefs (16bits) */
+    " pextrw $1,%%mm2,%%edi\n"  /* now perform MM7:MM6 = *(_bv+ f) */
+    " pinsrw $0,(%2,%%esi,2),%%mm6\n"
+    " pinsrw $1,(%2,%%edi,2),%%mm6\n"
+
+    " pextrw $2,%%mm2,%%esi\n"
+    " pextrw $3,%%mm2,%%edi\n"
+    " pinsrw $2,(%2,%%esi,2),%%mm6\n"
+    " pinsrw $3,(%2,%%edi,2),%%mm6\n"
+
+    " pextrw $0,%%mm3,%%esi\n"
+    " pextrw $1,%%mm3,%%edi\n"
+    " pinsrw $0,(%2,%%esi,2),%%mm7\n"
+    " pinsrw $1,(%2,%%edi,2),%%mm7\n"
+
+    " pextrw $2,%%mm3,%%esi\n"
+    " pextrw $3,%%mm3,%%edi\n"
+    " pinsrw $2,(%2,%%esi,2),%%mm7\n"
+    " pinsrw $3,(%2,%%edi,2),%%mm7\n"   //MM7 MM6   f=*(_bv+(f+4>>3));
+
+    "paddw %%mm6,%%mm4\n"       /* (pix[ystride]+f); */
+    "paddw %%mm7,%%mm5\n"       /* (pix[ystride]+f); */
+    "movq %%mm1,%%mm2\n"
+    "punpcklbw %%mm0,%%mm1\n"
+    "punpckhbw %%mm0,%%mm2\n"   //[ystride*2]
+    "psubw %%mm6,%%mm1\n"       /* (pix[ystride*2]-f); */
+    "psubw %%mm7,%%mm2\n"       /* (pix[ystride*2]-f); */
+    "packuswb %%mm2,%%mm1\n"
+    "packuswb %%mm5,%%mm4\n"
+    "movq %%mm1,(%0,%1,2)\n"    /* pix[ystride*2]= */
+    "movq %%mm4,(%0,%1)\n"      /* pix[ystride]= */
+    "emms\n"
+    :
+    : "r" (PixelPtr-2*LineLength), "r" (LineLength), "r" (BoundingValuePtr-256)
+    : "esi", "edi" , "memory"
+    );
+}
+
+/* install our implementation in the function table */
+void dsp_mmx_dct_decode_init(DspFunctions *funcs)
+{
+  TH_DEBUG("enabling accelerated x86_32 mmx dct decode functions.\n");
+  funcs->FilterVert = FilterVert__mmx;
+  funcs->FilterHoriz = FilterHoriz__mmx;
+}
+
+#endif /* USE_ASM */

Copied: experimental/j/theora-mashup/lib/enc/x86_32/idct_mmx.c (from rev 12835, trunk/theora/lib/x86_32/idct_mmx.c)

Copied: experimental/j/theora-mashup/lib/enc/x86_64/dct_decode_mmx.c (from rev 12835, trunk/theora/lib/x86_64/dct_decode_mmx.c)

Copied: experimental/j/theora-mashup/lib/enc/x86_64/idct_mmx.c (from rev 12835, trunk/theora/lib/x86_64/idct_mmx.c)