[xiph-commits] r17378 - in experimental/derf/theora-ptalarbvorm/lib: . c64x x86

tterribe at svn.xiph.org tterribe at svn.xiph.org
Thu Sep 2 13:17:35 PDT 2010


Author: tterribe
Date: 2010-09-02 13:17:34 -0700 (Thu, 02 Sep 2010)
New Revision: 17378

Added:
   experimental/derf/theora-ptalarbvorm/lib/state.h
Modified:
   experimental/derf/theora-ptalarbvorm/lib/Makefile.am
   experimental/derf/theora-ptalarbvorm/lib/apiwrapper.h
   experimental/derf/theora-ptalarbvorm/lib/c64x/c64xdec.c
   experimental/derf/theora-ptalarbvorm/lib/c64x/c64xdec.h
   experimental/derf/theora-ptalarbvorm/lib/c64x/c64xint.h
   experimental/derf/theora-ptalarbvorm/lib/c64x/c64xstate.c
   experimental/derf/theora-ptalarbvorm/lib/decint.h
   experimental/derf/theora-ptalarbvorm/lib/decode.c
   experimental/derf/theora-ptalarbvorm/lib/encinfo.c
   experimental/derf/theora-ptalarbvorm/lib/encint.h
   experimental/derf/theora-ptalarbvorm/lib/encode.c
   experimental/derf/theora-ptalarbvorm/lib/internal.c
   experimental/derf/theora-ptalarbvorm/lib/internal.h
   experimental/derf/theora-ptalarbvorm/lib/state.c
   experimental/derf/theora-ptalarbvorm/lib/x86/sse2idct.c
   experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.c
   experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.h
   experimental/derf/theora-ptalarbvorm/lib/x86/x86int.h
   experimental/derf/theora-ptalarbvorm/lib/x86/x86state.c
Log:
Make the function pointer tables for accelerated functions completely optional.

This required splitting off a new state.h from internal.h (which we should have
 done a long time ago) to get the typedef's available at the proper times.
Most notably, this allows the pure C functions (for platforms with no
 acceleration) and the x86-64 functions that don't use anything later than SSE2
 (which is currently all of them) to avoid any function pointer overhead at all.
On x86-64, at least, this made no measurable performance difference whatsoever.


Modified: experimental/derf/theora-ptalarbvorm/lib/Makefile.am
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/Makefile.am	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/Makefile.am	2010-09-02 20:17:34 UTC (rev 17378)
@@ -144,21 +144,22 @@
 	$(decoder_arch_sources)
 
 noinst_HEADERS = \
-	internal.h \
-	encint.h \
-	enquant.h \
-	huffenc.h \
-	mathops.h \
-	modedec.h \
 	apiwrapper.h \
 	bitpack.h \
 	dct.h \
 	decint.h \
 	dequant.h \
+	encint.h \
+	enquant.h \
 	huffdec.h \
+	huffenc.h \
 	huffman.h \
+	internal.h \
+	mathops.h \
+	modedec.h \
 	ocintrin.h \
 	quant.h \
+	state.h \
 	x86/cpu.h \
 	x86/mmxfrag.h \
 	x86/mmxloop.h \

Modified: experimental/derf/theora-ptalarbvorm/lib/apiwrapper.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/apiwrapper.h	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/apiwrapper.h	2010-09-02 20:17:34 UTC (rev 17378)
@@ -21,7 +21,7 @@
 # include <theora/theora.h>
 # include "theora/theoradec.h"
 # include "theora/theoraenc.h"
-# include "internal.h"
+# include "state.h"
 
 typedef struct th_api_wrapper th_api_wrapper;
 typedef struct th_api_info    th_api_info;

Modified: experimental/derf/theora-ptalarbvorm/lib/c64x/c64xdec.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/c64x/c64xdec.c	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/c64x/c64xdec.c	2010-09-02 20:17:34 UTC (rev 17378)
@@ -18,8 +18,10 @@
 
 #if defined(OC_C64X_ASM)
 
-void oc_dec_vtable_init_c64x(oc_dec_ctx *_dec){
+void oc_dec_accel_init_c64x(oc_dec_ctx *_dec){
+# if defined(OC_DEC_USE_VTABLE)
   _dec->opt_vtable.dc_unpredict_mcu_plane=oc_dec_dc_unpredict_mcu_plane_c64x;
+# endif
 }
 
 

Modified: experimental/derf/theora-ptalarbvorm/lib/c64x/c64xdec.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/c64x/c64xdec.h	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/c64x/c64xdec.h	2010-09-02 20:17:34 UTC (rev 17378)
@@ -19,14 +19,13 @@
 # include "c64xint.h"
 
 # if defined(OC_C64X_ASM)
-#  if !defined(oc_dec_dc_unpredict_mcu_plane)
-#   define oc_dec_dc_unpredict_mcu_plane oc_dec_dc_unpredict_mcu_plane_c64x
-#  endif
+#  define oc_dec_accel_init oc_dec_accel_init_c64x
+#  define oc_dec_dc_unpredict_mcu_plane oc_dec_dc_unpredict_mcu_plane_c64x
 # endif
 
 # include "../decint.h"
 
-void oc_dec_vtable_init_c64x(oc_dec_ctx *_dec);
+void oc_dec_accel_init_c64x(oc_dec_ctx *_dec);
 
 void oc_dec_dc_unpredict_mcu_plane_c64x(oc_dec_ctx *_dec,
  oc_dec_pipeline_state *_pipe,int _pli);

Modified: experimental/derf/theora-ptalarbvorm/lib/c64x/c64xint.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/c64x/c64xint.h	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/c64x/c64xint.h	2010-09-02 20:17:34 UTC (rev 17378)
@@ -17,45 +17,29 @@
 
 #if !defined(_c64x_c64xint_H)
 # define _c64x_c64xint_H (1)
+# include "../internal.h"
 
 # if defined(OC_C64X_ASM)
-#  if !defined(oc_frag_copy)
-#   define oc_frag_copy(_state,_dst,_src,_ystride) \
-     oc_frag_copy_c64x(_dst,_src,_ystride)
-#  endif
-#  if !defined(oc_frag_recon_intra)
-#   define oc_frag_recon_intra(_state,_dst,_dst_ystride,_residue) \
-     oc_frag_recon_intra_c64x(_dst,_dst_ystride,_residue)
-#  endif
-#  if !defined(oc_frag_recon_inter)
-#   define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
-     oc_frag_recon_inter_c64x(_dst,_src,_ystride,_residue)
-#  endif
-#  if !defined(oc_frag_recon_inter2)
-#   define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
-     oc_frag_recon_inter2_c64x(_dst,_src1,_src2,_ystride,_residue)
-#  endif
-#  if !defined(oc_idct8x8)
-#   define oc_idct8x8(_state,_y,_last_zzi) \
-     define oc_idct8x8_c64x(_y,_last_zzi)
-#  endif
-#  if !defined(oc_state_frag_recon)
-#   define oc_state_frag_recon oc_state_frag_recon_c64x
-#  endif
-#  if !defined(oc_state_frag_copy_list)
-#   define oc_state_frag_copy_list oc_state_frag_copy_list_c64x
-#  endif
-#  if !defined(oc_state_loop_filter_frag_rows)
-#   define oc_state_loop_filter_frag_rows oc_state_loop_filter_frag_rows_c64x
-#  endif
-#  if !defined(oc_restore_fpu)
-#   define oc_restore_fpu(_state) do{}while(0)
-#  endif
+#  define oc_state_accel_init oc_state_accel_init_c64x
+#  define oc_frag_copy(_state,_dst,_src,_ystride) \
+  oc_frag_copy_c64x(_dst,_src,_ystride)
+#  define oc_frag_recon_intra(_state,_dst,_dst_ystride,_residue) \
+  oc_frag_recon_intra_c64x(_dst,_dst_ystride,_residue)
+#  define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
+  oc_frag_recon_inter_c64x(_dst,_src,_ystride,_residue)
+#  define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
+  oc_frag_recon_inter2_c64x(_dst,_src1,_src2,_ystride,_residue)
+#  define oc_idct8x8(_state,_y,_last_zzi) \
+  oc_idct8x8_c64x(_y,_last_zzi)
+#  define oc_state_frag_recon oc_state_frag_recon_c64x
+#  define oc_state_frag_copy_list oc_state_frag_copy_list_c64x
+#  define oc_state_loop_filter_frag_rows oc_state_loop_filter_frag_rows_c64x
+#  define oc_restore_fpu(_state) do{}while(0)
 # endif
 
-# include "../internal.h"
+# include "../state.h"
 
-void oc_state_vtable_init_c64x(oc_theora_state *_state);
+void oc_state_accel_init_c64x(oc_theora_state *_state);
 
 void oc_frag_copy_c64x(unsigned char *_dst,
  const unsigned char *_src,int _ystride);

Modified: experimental/derf/theora-ptalarbvorm/lib/c64x/c64xstate.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/c64x/c64xstate.c	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/c64x/c64xstate.c	2010-09-02 20:17:34 UTC (rev 17378)
@@ -19,7 +19,9 @@
 
 #if defined(OC_C64X_ASM)
 
-void oc_state_vtable_init_c64x(oc_theora_state *_state){
+void oc_state_accel_init_c64x(oc_theora_state *_state){
+  _state->cpu_flags=0;
+# if defined(OC_STATE_USE_VTABLE)
   _state->opt_vtable.frag_copy=oc_frag_copy_c64x;
   _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c64x;
   _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c64x;
@@ -30,6 +32,7 @@
   _state->opt_vtable.state_loop_filter_frag_rows=
    oc_state_loop_filter_frag_rows_c64x;
   _state->opt_vtable.restore_fpu=oc_restore_fpu_c;
+# endif
   _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG;
 }
 

Modified: experimental/derf/theora-ptalarbvorm/lib/decint.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/decint.h	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/decint.h	2010-09-02 20:17:34 UTC (rev 17378)
@@ -19,17 +19,39 @@
 #if !defined(_decint_H)
 # define _decint_H (1)
 # include "theora/theoradec.h"
-# include "internal.h"
+# include "state.h"
 # include "bitpack.h"
+# include "huffdec.h"
+# include "dequant.h"
 
 typedef struct th_setup_info         oc_setup_info;
 typedef struct oc_dec_opt_vtable     oc_dec_opt_vtable;
 typedef struct oc_dec_pipeline_state oc_dec_pipeline_state;
 typedef struct th_dec_ctx            oc_dec_ctx;
 
-# include "huffdec.h"
-# include "dequant.h"
 
+
+/*Decoder-specific accelerated functions.*/
+# if defined(OC_C64X_ASM)
+#  include "c64x/c64xdec.h"
+# endif
+
+# if !defined(oc_dec_accel_init)
+#  define oc_dec_accel_init oc_dec_accel_init_c
+# endif
+# if defined(OC_DEC_USE_VTABLE)
+#  if !defined(oc_dec_dc_unpredict_mcu_plane)
+#   define oc_dec_dc_unpredict_mcu_plane(_dec,_pipe,_pli) \
+ ((*(_dec)->opt_vtable.dc_unpredict_mcu_plane)(_dec,_pipe,_pli))
+#  endif
+# else
+#  if !defined(oc_dec_dc_unpredict_mcu_plane)
+#   define oc_dec_dc_unpredict_mcu_plane oc_dec_dc_unpredict_mcu_plane_c
+#  endif
+# endif
+
+
+
 /*Constants for the packet-in state machine specific to the decoder.*/
 
 /*Next packet to read: Data packet.*/
@@ -117,8 +139,10 @@
   /*The striped decode callback function.*/
   th_stripe_callback   stripe_cb;
   oc_dec_pipeline_state pipe;
+# if defined(OC_DEC_USE_VTABLE)
   /*Table for decoder acceleration functions.*/
   oc_dec_opt_vtable    opt_vtable;
+# endif
 # if defined(HAVE_CAIRO)
   /*Output metrics for debugging.*/
   int                  telemetry;
@@ -136,19 +160,9 @@
 # endif
 };
 
-/*Decoder-specific accelerated functions.*/
-# if defined(OC_C64X_ASM)
-#  include "c64x/c64xdec.h"
-# endif
+/*Default pure-C implementations of decoder-specific accelerated functions.*/
+void oc_dec_accel_init_c(oc_dec_ctx *_dec);
 
-# if !defined(oc_dec_dc_unpredict_mcu_plane)
-#  define oc_dec_dc_unpredict_mcu_plane(_dec,_pipe,_pli) \
- ((*(_dec)->opt_vtable.dc_unpredict_mcu_plane)(_dec,_pipe,_pli))
-# endif
-
-/*Default pure-C implementations.*/
-void oc_dec_vtable_init_c(oc_dec_ctx *_dec);
-
 void oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx *_dec,
  oc_dec_pipeline_state *_pipe,int _pli);
 

Modified: experimental/derf/theora-ptalarbvorm/lib/decode.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/decode.c	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/decode.c	2010-09-02 20:17:34 UTC (rev 17378)
@@ -358,9 +358,11 @@
 
 
 
-void oc_dec_vtable_init_c(oc_dec_ctx *_dec){
+void oc_dec_accel_init_c(oc_dec_ctx *_dec){
+# if defined(OC_DEC_USE_VTABLE)
   _dec->opt_vtable.dc_unpredict_mcu_plane=
    oc_dec_dc_unpredict_mcu_plane_c;
+# endif
 }
 
 static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
@@ -407,11 +409,7 @@
   }
   memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
    sizeof(_dec->state.loop_filter_limits));
-#if defined(OC_C64X_ASM)
-  oc_dec_vtable_init_c64x(_dec);
-#else
-  oc_dec_vtable_init_c(_dec);
-#endif
+  oc_dec_accel_init(_dec);
   _dec->pp_level=OC_PP_LEVEL_DISABLED;
   _dec->dc_qis=NULL;
   _dec->variances=NULL;

Modified: experimental/derf/theora-ptalarbvorm/lib/encinfo.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/encinfo.c	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/encinfo.c	2010-09-02 20:17:34 UTC (rev 17378)
@@ -1,6 +1,6 @@
 #include <stdlib.h>
 #include <string.h>
-#include "internal.h"
+#include "state.h"
 #include "enquant.h"
 #include "huffenc.h"
 

Modified: experimental/derf/theora-ptalarbvorm/lib/encint.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/encint.h	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/encint.h	2010-09-02 20:17:34 UTC (rev 17378)
@@ -17,8 +17,7 @@
 #if !defined(_encint_H)
 # define _encint_H (1)
 # include "theora/theoraenc.h"
-# include "internal.h"
-# include "ocintrin.h"
+# include "state.h"
 # include "mathops.h"
 # include "enquant.h"
 # include "huffenc.h"
@@ -41,6 +40,155 @@
 
 
 
+/*Encoder-specific accelerated functions.*/
+# if defined(OC_X86_ASM)
+#  include "x86/x86enc.h"
+# endif
+
+# if !defined(oc_enc_accel_init)
+#  define oc_enc_accel_init oc_enc_accel_init_c
+# endif
+# if defined(OC_ENC_USE_VTABLE)
+#  if !defined(oc_enc_frag_sub)
+#   define oc_enc_frag_sub(_enc,_diff,_src,_ref,_ystride) \
+  ((*(_enc)->opt_vtable.frag_sub)(_diff,_src,_ref,_ystride))
+#  endif
+#  if !defined(oc_enc_frag_sub_128)
+#   define oc_enc_frag_sub_128(_enc,_diff,_src,_ystride) \
+  ((*(_enc)->opt_vtable.frag_sub_128)(_diff,_src,_ystride))
+#  endif
+#  if !defined(oc_enc_frag_sad)
+#   define oc_enc_frag_sad(_enc,_src,_ref,_ystride) \
+  ((*(_enc)->opt_vtable.frag_sad)(_src,_ref,_ystride))
+#  endif
+#  if !defined(oc_enc_frag_sad_thresh)
+#   define oc_enc_frag_sad_thresh(_enc,_src,_ref,_ystride,_thresh) \
+  ((*(_enc)->opt_vtable.frag_sad_thresh)(_src,_ref,_ystride,_thresh))
+#  endif
+#  if !defined(oc_enc_frag_sad2_thresh)
+#   define oc_enc_frag_sad2_thresh(_enc,_src,_ref1,_ref2,_ystride,_thresh) \
+  ((*(_enc)->opt_vtable.frag_sad2_thresh)(_src,_ref1,_ref2,_ystride,_thresh))
+#  endif
+#  if !defined(oc_enc_frag_satd)
+#   define oc_enc_frag_satd(_enc,_dc,_src,_ref,_ystride) \
+  ((*(_enc)->opt_vtable.frag_satd)(_dc,_src,_ref,_ystride))
+#  endif
+#  if !defined(oc_enc_frag_satd2)
+#   define oc_enc_frag_satd2(_enc,_dc,_src,_ref1,_ref2,_ystride) \
+  ((*(_enc)->opt_vtable.frag_satd2)(_dc,_src,_ref1,_ref2,_ystride))
+#  endif
+#  if !defined(oc_enc_frag_intra_satd)
+#   define oc_enc_frag_intra_satd(_enc,_dc,_src,_ystride) \
+  ((*(_enc)->opt_vtable.frag_intra_satd)(_dc,_src,_ystride))
+#  endif
+#  if !defined(oc_enc_frag_ssd)
+#   define oc_enc_frag_ssd(_enc,_src,_ref,_ystride) \
+  ((*(_enc)->opt_vtable.frag_ssd)(_src,_ref,_ystride))
+#  endif
+#  if !defined(oc_enc_frag_border_ssd)
+#   define oc_enc_frag_border_ssd(_enc,_src,_ref,_ystride,_mask) \
+  ((*(_enc)->opt_vtable.frag_border_ssd)(_src,_ref,_ystride,_mask))
+#  endif
+#  if !defined(oc_enc_frag_copy2)
+#   define oc_enc_frag_copy2(_enc,_dst,_src1,_src2,_ystride) \
+  ((*(_enc)->opt_vtable.frag_copy2)(_dst,_src1,_src2,_ystride))
+#  endif
+#  if !defined(oc_enc_enquant_table_init)
+#   define oc_enc_enquant_table_init(_enc,_enquant,_dequant) \
+  ((*(_enc)->opt_vtable.enquant_table_init)(_enquant,_dequant))
+#  endif
+#  if !defined(oc_enc_enquant_table_fixup)
+#   define oc_enc_enquant_table_fixup(_enc,_enquant,_nqis) \
+  ((*(_enc)->opt_vtable.enquant_table_fixup)(_enquant,_nqis))
+#  endif
+#  if !defined(oc_enc_quantize)
+#   define oc_enc_quantize(_enc,_qdct,_dct,_dequant,_enquant) \
+  ((*(_enc)->opt_vtable.quantize)(_qdct,_dct,_dequant,_enquant))
+#  endif
+#  if !defined(oc_enc_frag_recon_intra)
+#   define oc_enc_frag_recon_intra(_enc,_dst,_ystride,_residue) \
+  ((*(_enc)->opt_vtable.frag_recon_intra)(_dst,_ystride,_residue))
+#  endif
+#  if !defined(oc_enc_frag_recon_inter)
+#   define oc_enc_frag_recon_inter(_enc,_dst,_src,_ystride,_residue) \
+  ((*(_enc)->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue))
+#  endif
+#  if !defined(oc_enc_fdct8x8)
+#   define oc_enc_fdct8x8(_enc,_y,_x) \
+  ((*(_enc)->opt_vtable.fdct8x8)(_y,_x))
+#  endif
+# else
+#  if !defined(oc_enc_frag_sub)
+#   define oc_enc_frag_sub(_enc,_diff,_src,_ref,_ystride) \
+  oc_enc_frag_sub_c(_diff,_src,_ref,_ystride)
+#  endif
+#  if !defined(oc_enc_frag_sub_128)
+#   define oc_enc_frag_sub_128(_enc,_diff,_src,_ystride) \
+  oc_enc_frag_sub_128_c(_diff,_src,_ystride)
+#  endif
+#  if !defined(oc_enc_frag_sad)
+#   define oc_enc_frag_sad(_enc,_src,_ref,_ystride) \
+  oc_enc_frag_sad_c(_src,_ref,_ystride)
+#  endif
+#  if !defined(oc_enc_frag_sad_thresh)
+#   define oc_enc_frag_sad_thresh(_enc,_src,_ref,_ystride,_thresh) \
+  oc_enc_frag_sad_thresh_c(_src,_ref,_ystride,_thresh)
+#  endif
+#  if !defined(oc_enc_frag_sad2_thresh)
+#   define oc_enc_frag_sad2_thresh(_enc,_src,_ref1,_ref2,_ystride,_thresh) \
+  oc_enc_frag_sad2_thresh_c(_src,_ref1,_ref2,_ystride,_thresh)
+#  endif
+#  if !defined(oc_enc_frag_satd)
+#   define oc_enc_frag_satd(_enc,_dc,_src,_ref,_ystride) \
+  oc_enc_frag_satd_c(_dc,_src,_ref,_ystride)
+#  endif
+#  if !defined(oc_enc_frag_satd2)
+#   define oc_enc_frag_satd2(_enc,_dc,_src,_ref1,_ref2,_ystride) \
+  oc_enc_frag_satd2_c(_dc,_src,_ref1,_ref2,_ystride)
+#  endif
+#  if !defined(oc_enc_frag_intra_satd)
+#   define oc_enc_frag_intra_satd(_enc,_dc,_src,_ystride) \
+  oc_enc_frag_intra_satd_c(_dc,_src,_ystride)
+#  endif
+#  if !defined(oc_enc_frag_ssd)
+#   define oc_enc_frag_ssd(_enc,_src,_ref,_ystride) \
+  oc_enc_frag_ssd_c(_src,_ref,_ystride)
+#  endif
+#  if !defined(oc_enc_frag_border_ssd)
+#   define oc_enc_frag_border_ssd(_enc,_src,_ref,_ystride,_mask) \
+  oc_enc_frag_border_ssd_c(_src,_ref,_ystride,_mask)
+#  endif
+#  if !defined(oc_enc_frag_copy2)
+#   define oc_enc_frag_copy2(_enc,_dst,_src1,_src2,_ystride) \
+  oc_enc_frag_copy2_c(_dst,_src1,_src2,_ystride)
+#  endif
+#  if !defined(oc_enc_enquant_table_init)
+#   define oc_enc_enquant_table_init(_enc,_enquant,_dequant) \
+  oc_enc_enquant_table_init_c(_enquant,_dequant)
+#  endif
+#  if !defined(oc_enc_enquant_table_fixup)
+#   define oc_enc_enquant_table_fixup(_enc,_enquant,_nqis) \
+  oc_enc_enquant_table_fixup_c(_enquant,_nqis)
+#  endif
+#  if !defined(oc_enc_quantize)
+#   define oc_enc_quantize(_enc,_qdct,_dct,_dequant,_enquant) \
+  oc_enc_quantize_c(_qdct,_dct,_dequant,_enquant)
+#  endif
+#  if !defined(oc_enc_frag_recon_intra)
+#   define oc_enc_frag_recon_intra(_enc,_dst,_ystride,_residue) \
+  oc_frag_recon_intra_c(_dst,_ystride,_residue)
+#  endif
+#  if !defined(oc_enc_frag_recon_inter)
+#   define oc_enc_frag_recon_inter(_enc,_dst,_src,_ystride,_residue) \
+  oc_frag_recon_inter_c(_dst,_src,_ystride,_residue)
+#  endif
+#  if !defined(oc_enc_fdct8x8)
+#   define oc_enc_fdct8x8(_enc,_y,_x) oc_enc_fdct8x8_c(_y,_x)
+#  endif
+# endif
+
+
+
 /*Constants for the packet-out state machine specific to the encoder.*/
 
 /*Next packet to emit: Data packet, but none are ready yet.*/
@@ -171,7 +319,7 @@
 };
 
 
-void oc_enc_vtable_init(oc_enc_ctx *_enc);
+void oc_enc_accel_init(oc_enc_ctx *_enc);
 
 
 
@@ -483,8 +631,10 @@
   oc_mode_rd               mode_rd[3][3][2][OC_SAD_BINS];
   /*The buffer state used to drive rate control.*/
   oc_rc_state              rc;
+# if defined(OC_ENC_USE_VTABLE)
   /*Table for encoder acceleration functions.*/
   oc_enc_opt_vtable        opt_vtable;
+# endif
   /*Table for encoder data used by accelerated functions.*/
   oc_enc_opt_data          opt_data;
 };
@@ -546,79 +696,9 @@
 
 
 
-/*Encoder-specific accelerated functions.*/
-# if !defined(oc_enc_frag_sub)
-#  define oc_enc_frag_sub(_enc,_diff,_src,_ref,_ystride) \
-  ((*(_enc)->opt_vtable.frag_sub)(_diff,_src,_ref,_ystride))
-# endif
-#if !defined(oc_enc_frag_sub_128)
-#  define oc_enc_frag_sub_128(_enc,_diff,_src,_ystride) \
-  ((*(_enc)->opt_vtable.frag_sub_128)(_diff,_src,_ystride))
-# endif
-#if !defined(oc_enc_frag_sad)
-#  define oc_enc_frag_sad(_enc,_src,_ref,_ystride) \
-  ((*(_enc)->opt_vtable.frag_sad)(_src,_ref,_ystride))
-#endif
-#if !defined(oc_enc_frag_sad_thresh)
-#  define oc_enc_frag_sad_thresh(_enc,_src,_ref,_ystride,_thresh) \
-  ((*(_enc)->opt_vtable.frag_sad_thresh)(_src,_ref,_ystride,_thresh))
-#endif
-#if !defined(oc_enc_frag_sad2_thresh)
-#  define oc_enc_frag_sad2_thresh(_enc,_src,_ref1,_ref2,_ystride,_thresh) \
-  ((*(_enc)->opt_vtable.frag_sad2_thresh)(_src,_ref1,_ref2,_ystride,_thresh))
-#endif
-#if !defined(oc_enc_frag_satd)
-#  define oc_enc_frag_satd(_enc,_dc,_src,_ref,_ystride) \
-  ((*(_enc)->opt_vtable.frag_satd)(_dc,_src,_ref,_ystride))
-#endif
-#if !defined(oc_enc_frag_satd2)
-#  define oc_enc_frag_satd2(_enc,_dc,_src,_ref1,_ref2,_ystride) \
-  ((*(_enc)->opt_vtable.frag_satd2)(_dc,_src,_ref1,_ref2,_ystride))
-#endif
-#if !defined(oc_enc_frag_intra_satd)
-#  define oc_enc_frag_intra_satd(_enc,_dc,_src,_ystride) \
-  ((*(_enc)->opt_vtable.frag_intra_satd)(_dc,_src,_ystride))
-#endif
-#if !defined(oc_enc_frag_ssd)
-#  define oc_enc_frag_ssd(_enc,_src,_ref,_ystride) \
-  ((*(_enc)->opt_vtable.frag_ssd)(_src,_ref,_ystride))
-#endif
-#if !defined(oc_enc_frag_border_ssd)
-#  define oc_enc_frag_border_ssd(_enc,_src,_ref,_ystride,_mask) \
-  ((*(_enc)->opt_vtable.frag_border_ssd)(_src,_ref,_ystride,_mask))
-#endif
-#if !defined(oc_enc_frag_copy2)
-#  define oc_enc_frag_copy2(_enc,_dst,_src1,_src2,_ystride) \
-  ((*(_enc)->opt_vtable.frag_copy2)(_dst,_src1,_src2,_ystride))
-#endif
-#if !defined(oc_enc_enquant_table_init)
-#  define oc_enc_enquant_table_init(_enc,_enquant,_dequant) \
-  ((*(_enc)->opt_vtable.enquant_table_init)(_enquant,_dequant))
-#endif
-#if !defined(oc_enc_enquant_table_fixup)
-#  define oc_enc_enquant_table_fixup(_enc,_enquant,_nqis) \
-  ((*(_enc)->opt_vtable.enquant_table_fixup)(_enquant,_nqis))
-#endif
-#if !defined(oc_enc_quantize)
-#  define oc_enc_quantize(_enc,_qdct,_dct,_dequant,_enquant) \
-  ((*(_enc)->opt_vtable.quantize)(_qdct,_dct,_dequant,_enquant))
-#endif
-#if !defined(oc_enc_frag_recon_intra)
-#  define oc_enc_frag_recon_intra(_enc,_dst,_ystride,_residue) \
-  ((*(_enc)->opt_vtable.frag_recon_intra)(_dst,_ystride,_residue))
-#endif
-#if !defined(oc_enc_frag_recon_inter)
-#  define oc_enc_frag_recon_inter(_enc,_dst,_src,_ystride,_residue) \
-  ((*(_enc)->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue))
-#endif
-#if !defined(oc_enc_fdct8x8)
-#  define oc_enc_fdct8x8(_enc,_y,_x) \
-  ((*(_enc)->opt_vtable.fdct8x8)(_y,_x))
-#endif
+/*Default pure-C implementations of encoder-specific accelerated functions.*/
+void oc_enc_accel_init_c(oc_enc_ctx *_enc);
 
-/*Default pure-C implementations.*/
-void oc_enc_vtable_init_c(oc_enc_ctx *_enc);
-
 void oc_enc_frag_sub_c(ogg_int16_t _diff[64],
  const unsigned char *_src,const unsigned char *_ref,int _ystride);
 void oc_enc_frag_sub_128_c(ogg_int16_t _diff[64],

Modified: experimental/derf/theora-ptalarbvorm/lib/encode.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/encode.c	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/encode.c	2010-09-02 20:17:34 UTC (rev 17378)
@@ -18,9 +18,6 @@
 #include <string.h>
 #include "encint.h"
 #include "dequant.h"
-#if defined(OC_X86_ASM)
-# include "x86/x86enc.h"
-#endif
 
 
 
@@ -934,9 +931,10 @@
 }
 
 
-void oc_enc_vtable_init_c(oc_enc_ctx *_enc){
+void oc_enc_accel_init_c(oc_enc_ctx *_enc){
   /*The implementations prefixed with oc_enc_ are encoder-specific.
     The rest we re-use from the decoder.*/
+# if defined(OC_ENC_USE_VTABLE)
   _enc->opt_vtable.frag_sub=oc_enc_frag_sub_c;
   _enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_c;
   _enc->opt_vtable.frag_sad=oc_enc_frag_sad_c;
@@ -948,14 +946,15 @@
   _enc->opt_vtable.frag_ssd=oc_enc_frag_ssd_c;
   _enc->opt_vtable.frag_border_ssd=oc_enc_frag_border_ssd_c;
   _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_c;
-  _enc->opt_data.enquant_table_size=64*sizeof(oc_iquant);
-  _enc->opt_data.enquant_table_alignment=16;
   _enc->opt_vtable.enquant_table_init=oc_enc_enquant_table_init_c;
   _enc->opt_vtable.enquant_table_fixup=oc_enc_enquant_table_fixup_c;
   _enc->opt_vtable.quantize=oc_enc_quantize_c;
   _enc->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
   _enc->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
   _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_c;
+# endif
+  _enc->opt_data.enquant_table_size=64*sizeof(oc_iquant);
+  _enc->opt_data.enquant_table_alignment=16;
 }
 
 /*Initialize the macro block neighbor lists for MC analysis.
@@ -1153,6 +1152,7 @@
   /*Initialize the shared encoder/decoder state.*/
   ret=oc_state_init(&_enc->state,&info,6);
   if(ret<0)return ret;
+  oc_enc_accel_init(_enc);
   _enc->mb_info=_ogg_calloc(_enc->state.nmbs,sizeof(*_enc->mb_info));
   _enc->frag_dc=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_dc));
   _enc->coded_mbis=
@@ -1181,11 +1181,6 @@
   _enc->frag_satd=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_satd));
   _enc->frag_ssd=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_ssd));
 #endif
-#if defined(OC_X86_ASM)
-  oc_enc_vtable_init_x86(_enc);
-#else
-  oc_enc_vtable_init_c(_enc);
-#endif
   _enc->enquant_table_data=(unsigned char *)_ogg_malloc(
    (64+3)*3*2*_enc->opt_data.enquant_table_size
    +_enc->opt_data.enquant_table_alignment-1);

Modified: experimental/derf/theora-ptalarbvorm/lib/internal.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/internal.c	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/internal.c	2010-09-02 20:17:34 UTC (rev 17378)
@@ -97,80 +97,6 @@
 
 
 
-/*The function used to fill in the chroma plane motion vectors for a macro
-   block when 4 different motion vectors are specified in the luma plane.
-  This version is for use with chroma decimated in the X and Y directions
-   (4:2:0).
-  _cbmvs: The chroma block-level motion vectors to fill in.
-  _lbmvs: The luma block-level motion vectors.*/
-static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
-  int dx;
-  int dy;
-  dx=_lbmvs[0][0]+_lbmvs[1][0]+_lbmvs[2][0]+_lbmvs[3][0];
-  dy=_lbmvs[0][1]+_lbmvs[1][1]+_lbmvs[2][1]+_lbmvs[3][1];
-  _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,2,2);
-  _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,2,2);
-}
-
-/*The function used to fill in the chroma plane motion vectors for a macro
-   block when 4 different motion vectors are specified in the luma plane.
-  This version is for use with chroma decimated in the Y direction.
-  _cbmvs: The chroma block-level motion vectors to fill in.
-  _lbmvs: The luma block-level motion vectors.*/
-static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
-  int dx;
-  int dy;
-  dx=_lbmvs[0][0]+_lbmvs[2][0];
-  dy=_lbmvs[0][1]+_lbmvs[2][1];
-  _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
-  _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
-  dx=_lbmvs[1][0]+_lbmvs[3][0];
-  dy=_lbmvs[1][1]+_lbmvs[3][1];
-  _cbmvs[1][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
-  _cbmvs[1][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
-}
-
-/*The function used to fill in the chroma plane motion vectors for a macro
-   block when 4 different motion vectors are specified in the luma plane.
-  This version is for use with chroma decimated in the X direction (4:2:2).
-  _cbmvs: The chroma block-level motion vectors to fill in.
-  _lbmvs: The luma block-level motion vectors.*/
-static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
-  int dx;
-  int dy;
-  dx=_lbmvs[0][0]+_lbmvs[1][0];
-  dy=_lbmvs[0][1]+_lbmvs[1][1];
-  _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
-  _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
-  dx=_lbmvs[2][0]+_lbmvs[3][0];
-  dy=_lbmvs[2][1]+_lbmvs[3][1];
-  _cbmvs[2][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
-  _cbmvs[2][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
-}
-
-/*The function used to fill in the chroma plane motion vectors for a macro
-   block when 4 different motion vectors are specified in the luma plane.
-  This version is for use with no chroma decimation (4:4:4).
-  _cbmvs: The chroma block-level motion vectors to fill in.
-  _lmbmv: The luma macro-block level motion vector to fill in for use in
-           prediction.
-  _lbmvs: The luma block-level motion vectors.*/
-static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
-  memcpy(_cbmvs,_lbmvs,4*sizeof(_lbmvs[0]));
-}
-
-/*A table of functions used to fill in the chroma plane motion vectors for a
-   macro block when 4 different motion vectors are specified in the luma
-   plane.*/
-const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={
-  (oc_set_chroma_mvs_func)oc_set_chroma_mvs00,
-  (oc_set_chroma_mvs_func)oc_set_chroma_mvs01,
-  (oc_set_chroma_mvs_func)oc_set_chroma_mvs10,
-  (oc_set_chroma_mvs_func)oc_set_chroma_mvs11
-};
-
-
-
 void *oc_aligned_malloc(size_t _sz,size_t _align){
   unsigned char *p;
   if(_align>UCHAR_MAX||(_align&_align-1)||_sz>~(size_t)0-_align)return NULL;

Modified: experimental/derf/theora-ptalarbvorm/lib/internal.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/internal.h	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/internal.h	2010-09-02 20:17:34 UTC (rev 17378)
@@ -23,7 +23,10 @@
 # endif
 # include "theora/codec.h"
 # include "theora/theora.h"
+# include "ocintrin.h"
 
+#define OC_DUMP_IMAGES (1)
+
 # if defined(_MSC_VER)
 /*Disable missing EMMS warnings.*/
 #  pragma warning(disable:4799)
@@ -37,10 +40,6 @@
 #  endif
 # endif
 
-# include "ocintrin.h"
-# include "huffman.h"
-# include "quant.h"
-
 /*Some assembly constructs require aligned operands.*/
 # if defined(OC_X86_ASM)
 #  if defined(__GNUC__)
@@ -60,17 +59,6 @@
 
 
 
-typedef struct oc_sb_flags              oc_sb_flags;
-typedef struct oc_border_info           oc_border_info;
-typedef struct oc_fragment              oc_fragment;
-typedef struct oc_fragment_plane        oc_fragment_plane;
-typedef struct oc_base_opt_vtable       oc_base_opt_vtable;
-typedef struct oc_base_opt_data         oc_base_opt_data;
-typedef struct oc_state_dispatch_vtable oc_state_dispatch_vtable;
-typedef struct oc_theora_state          oc_theora_state;
-
-
-
 /*This library's version.*/
 # define OC_VENDOR_STRING "Xiph.Org libtheora 1.1+ 20100314 (Ptalarbvorm)"
 
@@ -83,321 +71,8 @@
  ((_info)->version_minor>(_min)||(_info)->version_minor==(_min)&& \
  (_info)->version_subminor>=(_sub)))
 
-/*A keyframe.*/
-# define OC_INTRA_FRAME (0)
-/*A predicted frame.*/
-# define OC_INTER_FRAME (1)
-/*A frame of unknown type (frame type decision has not yet been made).*/
-# define OC_UNKWN_FRAME (-1)
 
-/*The amount of padding to add to the reconstructed frame buffers on all
-   sides.
-  This is used to allow unrestricted motion vectors without special casing.
-  This must be a multiple of 2.*/
-# define OC_UMV_PADDING (16)
 
-/*Frame classification indices.*/
-/*The previous golden frame.*/
-# define OC_FRAME_GOLD      (0)
-/*The previous frame.*/
-# define OC_FRAME_PREV      (1)
-/*The current frame.*/
-# define OC_FRAME_SELF      (2)
-
-/*The input or output buffer.*/
-# define OC_FRAME_IO        (3)
-/*Uncompressed prev golden frame.*/
-# define OC_FRAME_GOLD_ORIG (4)
-/*Uncompressed previous frame. */
-# define OC_FRAME_PREV_ORIG (5)
-
-/*Macroblock modes.*/
-/*Macro block is invalid: It is never coded.*/
-# define OC_MODE_INVALID        (-1)
-/*Encoded difference from the same macro block in the previous frame.*/
-# define OC_MODE_INTER_NOMV     (0)
-/*Encoded with no motion compensated prediction.*/
-# define OC_MODE_INTRA          (1)
-/*Encoded difference from the previous frame offset by the given motion
-   vector.*/
-# define OC_MODE_INTER_MV       (2)
-/*Encoded difference from the previous frame offset by the last coded motion
-   vector.*/
-# define OC_MODE_INTER_MV_LAST  (3)
-/*Encoded difference from the previous frame offset by the second to last
-   coded motion vector.*/
-# define OC_MODE_INTER_MV_LAST2 (4)
-/*Encoded difference from the same macro block in the previous golden
-   frame.*/
-# define OC_MODE_GOLDEN_NOMV    (5)
-/*Encoded difference from the previous golden frame offset by the given motion
-   vector.*/
-# define OC_MODE_GOLDEN_MV      (6)
-/*Encoded difference from the previous frame offset by the individual motion
-   vectors given for each block.*/
-# define OC_MODE_INTER_MV_FOUR  (7)
-/*The number of (coded) modes.*/
-# define OC_NMODES              (8)
-
-/*Determines the reference frame used for a given MB mode.*/
-# define OC_FRAME_FOR_MODE(_x) \
- OC_UNIBBLE_TABLE32(OC_FRAME_PREV,OC_FRAME_SELF,OC_FRAME_PREV,OC_FRAME_PREV, \
-  OC_FRAME_PREV,OC_FRAME_GOLD,OC_FRAME_GOLD,OC_FRAME_PREV,(_x))
-
-/*Constants for the packet state machine common between encoder and decoder.*/
-
-/*Next packet to emit/read: Codec info header.*/
-# define OC_PACKET_INFO_HDR    (-3)
-/*Next packet to emit/read: Comment header.*/
-# define OC_PACKET_COMMENT_HDR (-2)
-/*Next packet to emit/read: Codec setup header.*/
-# define OC_PACKET_SETUP_HDR   (-1)
-/*No more packets to emit/read.*/
-# define OC_PACKET_DONE        (INT_MAX)
-
-
-
-/*Super blocks are 32x32 segments of pixels in a single color plane indexed
-   in image order.
-  Internally, super blocks are broken up into four quadrants, each of which
-   contains a 2x2 pattern of blocks, each of which is an 8x8 block of pixels.
-  Quadrants, and the blocks within them, are indexed in a special order called
-   a "Hilbert curve" within the super block.
-
-  In order to differentiate between the Hilbert-curve indexing strategy and
-   the regular image order indexing strategy, blocks indexed in image order
-   are called "fragments".
-  Fragments are indexed in image order, left to right, then bottom to top,
-   from Y' plane to Cb plane to Cr plane.
-
-  The co-located fragments in all image planes corresponding to the location
-   of a single quadrant of a luma plane super block form a macro block.
-  Thus there is only a single set of macro blocks for all planes, each of which
-   contains between 6 and 12 fragments, depending on the pixel format.
-  Therefore macro block information is kept in a separate set of arrays from
-   super blocks to avoid unused space in the other planes.
-  The lists are indexed in super block order.
-  That is, the macro block corresponding to the macro block mbi in (luma plane)
-   super block sbi is at index (sbi<<2|mbi).
-  Thus the number of macro blocks in each dimension is always twice the number
-   of super blocks, even when only an odd number fall inside the coded frame.
-  These "extra" macro blocks are just an artifact of our internal data layout,
-   and not part of the coded stream; they are flagged with a negative MB mode.*/
-
-
-
-/*A single quadrant of the map from a super block to fragment numbers.*/
-typedef ptrdiff_t       oc_sb_map_quad[4];
-/*A map from a super block to fragment numbers.*/
-typedef oc_sb_map_quad  oc_sb_map[4];
-/*A single plane of the map from a macro block to fragment numbers.*/
-typedef ptrdiff_t       oc_mb_map_plane[4];
-/*A map from a macro block to fragment numbers.*/
-typedef oc_mb_map_plane oc_mb_map[3];
-/*A motion vector.*/
-typedef signed char     oc_mv[2];
-
-
-
-/*Super block information.*/
-struct oc_sb_flags{
-  unsigned char coded_fully:1;
-  unsigned char coded_partially:1;
-  unsigned char quad_valid:4;
-};
-
-
-
-/*Information about a fragment which intersects the border of the displayable
-   region.
-  This marks which pixels belong to the displayable region.*/
-struct oc_border_info{
-  /*A bit mask marking which pixels are in the displayable region.
-    Pixel (x,y) corresponds to bit (y<<3|x).*/
-  ogg_int64_t mask;
-  /*The number of pixels in the displayable region.
-    This is always positive, and always less than 64.*/
-  int         npixels;
-};
-
-
-
-/*Fragment information.*/
-struct oc_fragment{
-  /*A flag indicating whether or not this fragment is coded.*/
-  unsigned   coded:1;
-  /*A flag indicating that this entire fragment lies outside the displayable
-     region of the frame.
-    Note the contrast with an invalid macro block, which is outside the coded
-     frame, not just the displayable one.
-    There are no fragments outside the coded frame by construction.*/
-  unsigned   invalid:1;
-  /*The index of the quality index used for this fragment's AC coefficients.*/
-  unsigned   qii:6;
-  /*The mode of the macroblock this fragment belongs to.*/
-  unsigned   mb_mode:3;
-  /*The index of the associated border information for fragments which lie
-     partially outside the displayable region.
-    For fragments completely inside or outside this region, this is -1.
-    Note that the C standard requires an explicit signed keyword for bitfield
-     types, since some compilers may treat them as unsigned without it.*/
-  signed int borderi:5;
-  /*The prediction-corrected DC component.
-    Note that the C standard requires an explicit signed keyword for bitfield
-     types, since some compilers may treat them as unsigned without it.*/
-  signed int dc:16;
-};
-
-
-
-/*A description of each fragment plane.*/
-struct oc_fragment_plane{
-  /*The number of fragments in the horizontal direction.*/
-  int       nhfrags;
-  /*The number of fragments in the vertical direction.*/
-  int       nvfrags;
-  /*The offset of the first fragment in the plane.*/
-  ptrdiff_t froffset;
-  /*The total number of fragments in the plane.*/
-  ptrdiff_t nfrags;
-  /*The number of super blocks in the horizontal direction.*/
-  unsigned  nhsbs;
-  /*The number of super blocks in the vertical direction.*/
-  unsigned  nvsbs;
-  /*The offset of the first super block in the plane.*/
-  unsigned  sboffset;
-  /*The total number of super blocks in the plane.*/
-  unsigned  nsbs;
-};
-
-
-
-/*The shared (encoder and decoder) functions that have accelerated variants.*/
-struct oc_base_opt_vtable{
-  void (*frag_copy)(unsigned char *_dst,
-   const unsigned char *_src,int _ystride);
-  void (*frag_recon_intra)(unsigned char *_dst,int _ystride,
-   const ogg_int16_t _residue[64]);
-  void (*frag_recon_inter)(unsigned char *_dst,
-   const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
-  void (*frag_recon_inter2)(unsigned char *_dst,const unsigned char *_src1,
-   const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
-  void (*idct8x8)(ogg_int16_t _y[64],int _last_zzi);
-  void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi,
-   int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
-  void (*state_frag_copy_list)(const oc_theora_state *_state,
-   const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
-   int _dst_frame,int _src_frame,int _pli);
-  void (*state_loop_filter_frag_rows)(const oc_theora_state *_state,
-   int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
-  void (*restore_fpu)(void);
-};
-
-/*The shared (encoder and decoder) tables that vary according to which variants
-   of the above functions are used.*/
-struct oc_base_opt_data{
-  const unsigned char *dct_fzig_zag;
-};
-
-
-/*State information common to both the encoder and decoder.*/
-struct oc_theora_state{
-  /*The stream information.*/
-  th_info             info;
-  /*Table for shared accelerated functions.*/
-  oc_base_opt_vtable  opt_vtable;
-  /*Table for shared data used by accelerated functions.*/
-  oc_base_opt_data    opt_data;
-  /*CPU flags to detect the presence of extended instruction sets.*/
-  ogg_uint32_t        cpu_flags;
-  /*The fragment plane descriptions.*/
-  oc_fragment_plane   fplanes[3];
-  /*The list of fragments, indexed in image order.*/
-  oc_fragment        *frags;
-  /*The the offset into the reference frame buffer to the upper-left pixel of
-     each fragment.*/
-  ptrdiff_t          *frag_buf_offs;
-  /*The motion vector for each fragment.*/
-  oc_mv              *frag_mvs;
-  /*The total number of fragments in a single frame.*/
-  ptrdiff_t           nfrags;
-  /*The list of super block maps, indexed in image order.*/
-  oc_sb_map          *sb_maps;
-  /*The list of super block flags, indexed in image order.*/
-  oc_sb_flags        *sb_flags;
-  /*The total number of super blocks in a single frame.*/
-  unsigned            nsbs;
-  /*The fragments from each color plane that belong to each macro block.
-    Fragments are stored in image order (left to right then top to bottom).
-    When chroma components are decimated, the extra fragments have an index of
-     -1.*/
-  oc_mb_map          *mb_maps;
-  /*The list of macro block modes.
-    A negative number indicates the macro block lies entirely outside the
-     coded frame.*/
-  signed char        *mb_modes;
-  /*The number of macro blocks in the X direction.*/
-  unsigned            nhmbs;
-  /*The number of macro blocks in the Y direction.*/
-  unsigned            nvmbs;
-  /*The total number of macro blocks.*/
-  size_t              nmbs;
-  /*The list of coded fragments, in coded order.
-    Uncoded fragments are stored in reverse order from the end of the list.*/
-  ptrdiff_t          *coded_fragis;
-  /*The number of coded fragments in each plane.*/
-  ptrdiff_t           ncoded_fragis[3];
-  /*The total number of coded fragments.*/
-  ptrdiff_t           ntotal_coded_fragis;
-  /*The index of the buffers being used for each OC_FRAME_* reference frame.*/
-  int                 ref_frame_idx[6];
-  /*The actual buffers used for the reference frames.*/
-  th_ycbcr_buffer     ref_frame_bufs[6];
-  /*The storage for the reference frame buffers.*/
-  unsigned char      *ref_frame_data[6];
-  /*The strides for each plane in the reference frames.*/
-  int                 ref_ystride[3];
-  /*The number of unique border patterns.*/
-  int                 nborders;
-  /*The unique border patterns for all border fragments.
-    The borderi field of fragments which straddle the border indexes this
-     list.*/
-  oc_border_info      borders[16];
-  /*The frame number of the last keyframe.*/
-  ogg_int64_t         keyframe_num;
-  /*The frame number of the current frame.*/
-  ogg_int64_t         curframe_num;
-  /*The granpos of the current frame.*/
-  ogg_int64_t         granpos;
-  /*The type of the current frame.*/
-  signed char         frame_type;
-  /*The bias to add to the frame count when computing granule positions.*/
-  unsigned char       granpos_bias;
-  /*The number of quality indices used in the current frame.*/
-  unsigned char       nqis;
-  /*The quality indices of the current frame.*/
-  unsigned char       qis[3];
-  /*The dequantization tables, stored in zig-zag order, and indexed by
-     qi, pli, qti, and zzi.*/
-  ogg_uint16_t       *dequant_tables[64][3][2];
-  OC_ALIGN16(oc_quant_table      dequant_table_data[64][3][2]);
-  /*Loop filter strength parameters.*/
-  unsigned char       loop_filter_limits[64];
-};
-
-
-
-/*The function type used to fill in the chroma plane motion vectors for a
-   macro block when 4 different motion vectors are specified in the luma
-   plane.
-  _cbmvs: The chroma block-level motion vectors to fill in.
-  _lmbmv: The luma macro-block level motion vector to fill in for use in
-           prediction.
-  _lbmvs: The luma block-level motion vectors.*/
-typedef void (*oc_set_chroma_mvs_func)(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]);
-
-
-
 /*A map from the index in the zig zag scan to the coefficient number in a
    block.*/
 extern const unsigned char OC_FZIG_ZAG[128];
@@ -413,10 +88,6 @@
 /*The number of indices in the oc_mb_map array that can be valid for each of
    the various chroma decimation types.*/
 extern const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS];
-/*A table of functions used to fill in the Cb,Cr plane motion vectors for a
-   macro block when 4 different motion vectors are specified in the luma
-   plane.*/
-extern const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS];
 
 
 
@@ -430,111 +101,4 @@
 void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst,
  const th_ycbcr_buffer _src);
 
-int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs);
-void oc_state_clear(oc_theora_state *_state);
-void oc_state_vtable_init_c(oc_theora_state *_state);
-void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
- int _y0,int _yend);
-void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli);
-void oc_state_borders_fill(oc_theora_state *_state,int _refi);
-void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx,
- th_ycbcr_buffer _img);
-int oc_state_mbi_for_pos(oc_theora_state *_state,int _mbx,int _mby);
-int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
- int _pli,int _dx,int _dy);
-
-int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv);
-void oc_state_loop_filter(oc_theora_state *_state,int _frame);
-# if defined(OC_DUMP_IMAGES)
-int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
- const char *_suf);
-# endif
-
-/*Shared accelerated functions.*/
-# if !defined(oc_frag_copy)
-#  define oc_frag_copy(_state,_dst,_src,_ystride) \
-  ((*(_state)->opt_vtable.frag_copy)(_dst,_src,_ystride))
-# endif
-# if !defined(oc_frag_recon_intra)
-#  define oc_frag_recon_intra(_state,_dst,_dst_ystride,_residue) \
-  ((*(_state)->opt_vtable.frag_recon_intra)(_dst,_dst_ystride,_residue))
-# endif
-# if !defined(oc_frag_recon_inter)
-#  define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
-  ((*(_state)->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue))
-# endif
-# if !defined(oc_frag_recon_inter2)
-#  define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
-  ((*(_state)->opt_vtable.frag_recon_inter2)(_dst, \
-   _src1,_src2,_ystride,_residue))
-# endif
-# if !defined(oc_idct8x8)
-#  define oc_idct8x8(_state,_y,_last_zzi) \
-  ((*(_state)->opt_vtable.idct8x8)(_y,_last_zzi))
-# endif
-# if !defined(oc_state_frag_recon)
-#  define oc_state_frag_recon(_state,_fragi, \
- _pli,_dct_coeffs,_last_zzi,_dc_quant) \
-  ((*(_state)->opt_vtable.state_frag_recon)(_state,_fragi, \
-   _pli,_dct_coeffs,_last_zzi,_dc_quant))
-# endif
-# if !defined(oc_state_frag_copy_list)
-#  define oc_state_frag_copy_list(_state,_fragis,_nfragis, \
- _dst_frame,_src_frame,_pli) \
-  ((*(_state)->opt_vtable.state_frag_copy_list)(_state,_fragis,_nfragis, \
-   _dst_frame,_src_frame,_pli))
-# endif
-# if !defined(oc_state_loop_filter_frag_rows)
-#  define oc_state_loop_filter_frag_rows(_state, \
- _bv,_refi,_pli,_fragy0,_fragy_end) \
-  ((*(_state)->opt_vtable.state_loop_filter_frag_rows)(_state, \
-   _bv,_refi,_pli,_fragy0,_fragy_end))
-# endif
-# if !defined(oc_restore_fpu)
-#  define oc_restore_fpu(_state) \
-  ((*(_state)->opt_vtable.restore_fpu)())
-# endif
-
-/*Default pure-C implementations.*/
-void oc_frag_copy_c(unsigned char *_dst,
- const unsigned char *_src,int _src_ystride);
-void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride,
- const ogg_int16_t _residue[64]);
-void oc_frag_recon_inter_c(unsigned char *_dst,
- const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
-void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1,
- const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
-void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi);
-void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
-void oc_state_frag_copy_list_c(const oc_theora_state *_state,
- const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
- int _dst_frame,int _src_frame,int _pli);
-void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
- int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
-void oc_restore_fpu_c(void);
-
-/*We need a way to call a few encoder functions without introducing a link-time
-   dependency into the decoder, while still allowing the old alpha API which
-   does not distinguish between encoder and decoder objects to be used.
-  We do this by placing a function table at the start of the encoder object
-   which can dispatch into the encoder library.
-  We do a similar thing for the decoder in case we ever decide to split off a
-   common base library.*/
-typedef void (*oc_state_clear_func)(theora_state *_th);
-typedef int (*oc_state_control_func)(theora_state *th,int _req,
- void *_buf,size_t _buf_sz);
-typedef ogg_int64_t (*oc_state_granule_frame_func)(theora_state *_th,
- ogg_int64_t _granulepos);
-typedef double (*oc_state_granule_time_func)(theora_state *_th,
- ogg_int64_t _granulepos);
-
-
-struct oc_state_dispatch_vtable{
-  oc_state_clear_func         clear;
-  oc_state_control_func       control;
-  oc_state_granule_frame_func granule_frame;
-  oc_state_granule_time_func  granule_time;
-};
-
 #endif

Modified: experimental/derf/theora-ptalarbvorm/lib/state.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/state.c	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/state.c	2010-09-02 20:17:34 UTC (rev 17378)
@@ -17,22 +17,86 @@
 
 #include <stdlib.h>
 #include <string.h>
-#include "internal.h"
-#if defined(OC_X86_ASM)
-#if defined(_MSC_VER)
-# include "x86_vc/x86int.h"
-#else
-# include "x86/x86int.h"
-#endif
-#endif
-#if defined(OC_C64X_ASM)
-# include "c64x/c64xint.h"
-#endif
+#include "state.h"
 #if defined(OC_DUMP_IMAGES)
 # include <stdio.h>
 # include "png.h"
 #endif
 
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the X and Y directions
+   (4:2:0).
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=_lbmvs[0][0]+_lbmvs[1][0]+_lbmvs[2][0]+_lbmvs[3][0];
+  dy=_lbmvs[0][1]+_lbmvs[1][1]+_lbmvs[2][1]+_lbmvs[3][1];
+  _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,2,2);
+  _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,2,2);
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the Y direction.
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=_lbmvs[0][0]+_lbmvs[2][0];
+  dy=_lbmvs[0][1]+_lbmvs[2][1];
+  _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+  _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+  dx=_lbmvs[1][0]+_lbmvs[3][0];
+  dy=_lbmvs[1][1]+_lbmvs[3][1];
+  _cbmvs[1][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+  _cbmvs[1][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the X direction (4:2:2).
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=_lbmvs[0][0]+_lbmvs[1][0];
+  dy=_lbmvs[0][1]+_lbmvs[1][1];
+  _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+  _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+  dx=_lbmvs[2][0]+_lbmvs[3][0];
+  dy=_lbmvs[2][1]+_lbmvs[3][1];
+  _cbmvs[2][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+  _cbmvs[2][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with no chroma decimation (4:4:4).
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lmbmv: The luma macro-block level motion vector to fill in for use in
+           prediction.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  memcpy(_cbmvs,_lbmvs,4*sizeof(_lbmvs[0]));
+}
+
+/*A table of functions used to fill in the chroma plane motion vectors for a
+   macro block when 4 different motion vectors are specified in the luma
+   plane.*/
+const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs00,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs01,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs10,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs11
+};
+
+
+
 /*Returns the fragment index of the top-left block in a macro block.
   This can be used to test whether or not the whole macro block is valid.
   _sb_map: The super block map.
@@ -595,7 +659,9 @@
 }
 
 
-void oc_state_vtable_init_c(oc_theora_state *_state){
+void oc_state_accel_init_c(oc_theora_state *_state){
+  _state->cpu_flags=0;
+#if defined(OC_STATE_USE_VTABLE)
   _state->opt_vtable.frag_copy=oc_frag_copy_c;
   _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
   _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
@@ -606,21 +672,11 @@
   _state->opt_vtable.state_loop_filter_frag_rows=
    oc_state_loop_filter_frag_rows_c;
   _state->opt_vtable.restore_fpu=oc_restore_fpu_c;
+#endif
   _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG;
 }
 
-/*Initialize the accelerated function pointers.*/
-void oc_state_vtable_init(oc_theora_state *_state){
-#if defined(OC_X86_ASM)
-  oc_state_vtable_init_x86(_state);
-#elif defined(OC_C64X_ASM)
-  oc_state_vtable_init_c64x(_state);
-#else
-  oc_state_vtable_init_c(_state);
-#endif
-}
 
-
 int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){
   int ret;
   /*First validate the parameters.*/
@@ -655,7 +711,7 @@
      system.*/
   _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
   _state->frame_type=OC_UNKWN_FRAME;
-  oc_state_vtable_init(_state);
+  oc_state_accel_init(_state);
   ret=oc_state_frarray_init(_state);
   if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs);
   if(ret<0){

Added: experimental/derf/theora-ptalarbvorm/lib/state.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/state.h	                        (rev 0)
+++ experimental/derf/theora-ptalarbvorm/lib/state.h	2010-09-02 20:17:34 UTC (rev 17378)
@@ -0,0 +1,515 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: internal.h 17337 2010-07-19 16:08:54Z tterribe $
+
+ ********************************************************************/
+#if !defined(_state_H)
+# define _state_H (1)
+# include "internal.h"
+# include "huffman.h"
+# include "quant.h"
+
+
+
+/*A single quadrant of the map from a super block to fragment numbers.*/
+typedef ptrdiff_t       oc_sb_map_quad[4];
+/*A map from a super block to fragment numbers.*/
+typedef oc_sb_map_quad  oc_sb_map[4];
+/*A single plane of the map from a macro block to fragment numbers.*/
+typedef ptrdiff_t       oc_mb_map_plane[4];
+/*A map from a macro block to fragment numbers.*/
+typedef oc_mb_map_plane oc_mb_map[3];
+/*A motion vector.*/
+typedef signed char     oc_mv[2];
+
+typedef struct oc_sb_flags              oc_sb_flags;
+typedef struct oc_border_info           oc_border_info;
+typedef struct oc_fragment              oc_fragment;
+typedef struct oc_fragment_plane        oc_fragment_plane;
+typedef struct oc_base_opt_vtable       oc_base_opt_vtable;
+typedef struct oc_base_opt_data         oc_base_opt_data;
+typedef struct oc_state_dispatch_vtable oc_state_dispatch_vtable;
+typedef struct oc_theora_state          oc_theora_state;
+
+
+
+/*Shared accelerated functions.*/
+# if defined(OC_X86_ASM)
+#  if defined(_MSC_VER)
+#   include "x86_vc/x86int.h"
+#  else
+#   include "x86/x86int.h"
+#  endif
+# endif
+# if defined(OC_C64X_ASM)
+#  include "c64x/c64xint.h"
+# endif
+
+# if !defined(oc_state_accel_init)
+#  define oc_state_accel_init oc_state_accel_init_c
+# endif
+# if defined(OC_STATE_USE_VTABLE)
+#  if !defined(oc_frag_copy)
+#   define oc_frag_copy(_state,_dst,_src,_ystride) \
+  ((*(_state)->opt_vtable.frag_copy)(_dst,_src,_ystride))
+#  endif
+#  if !defined(oc_frag_recon_intra)
+#   define oc_frag_recon_intra(_state,_dst,_dst_ystride,_residue) \
+  ((*(_state)->opt_vtable.frag_recon_intra)(_dst,_dst_ystride,_residue))
+#  endif
+#  if !defined(oc_frag_recon_inter)
+#   define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
+  ((*(_state)->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue))
+#  endif
+#  if !defined(oc_frag_recon_inter2)
+#   define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
+  ((*(_state)->opt_vtable.frag_recon_inter2)(_dst, \
+   _src1,_src2,_ystride,_residue))
+#  endif
+# if !defined(oc_idct8x8)
+#   define oc_idct8x8(_state,_y,_last_zzi) \
+  ((*(_state)->opt_vtable.idct8x8)(_y,_last_zzi))
+#  endif
+#  if !defined(oc_state_frag_recon)
+#   define oc_state_frag_recon(_state,_fragi, \
+ _pli,_dct_coeffs,_last_zzi,_dc_quant) \
+  ((*(_state)->opt_vtable.state_frag_recon)(_state,_fragi, \
+   _pli,_dct_coeffs,_last_zzi,_dc_quant))
+#  endif
+#  if !defined(oc_state_frag_copy_list)
+#   define oc_state_frag_copy_list(_state,_fragis,_nfragis, \
+ _dst_frame,_src_frame,_pli) \
+ ((*(_state)->opt_vtable.state_frag_copy_list)(_state,_fragis,_nfragis, \
+   _dst_frame,_src_frame,_pli))
+#  endif
+#  if !defined(oc_state_loop_filter_frag_rows)
+#   define oc_state_loop_filter_frag_rows(_state, \
+ _bv,_refi,_pli,_fragy0,_fragy_end) \
+  ((*(_state)->opt_vtable.state_loop_filter_frag_rows)(_state, \
+   _bv,_refi,_pli,_fragy0,_fragy_end))
+#  endif
+#  if !defined(oc_restore_fpu)
+#   define oc_restore_fpu(_state) \
+  ((*(_state)->opt_vtable.restore_fpu)())
+#  endif
+# else
+#  if !defined(oc_frag_copy)
+#   define oc_frag_copy(_state,_dst,_src,_ystride) \
+  oc_frag_copy_c(_dst,_src,_ystride)
+#  endif
+#  if !defined(oc_frag_recon_intra)
+#   define oc_frag_recon_intra(_state,_dst,_dst_ystride,_residue) \
+  oc_frag_recon_intra_c(_dst,_dst_ystride,_residue)
+#  endif
+#  if !defined(oc_frag_recon_inter)
+#   define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
+  oc_frag_recon_inter_c(_dst,_src,_ystride,_residue)
+#  endif
+#  if !defined(oc_frag_recon_inter2)
+#   define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
+  oc_frag_recon_inter2_c(_dst,_src1,_src2,_ystride,_residue)
+#  endif
+#  if !defined(oc_idct8x8)
+#   define oc_idct8x8(_state,_y,_last_zzi) oc_idct8x8_c(_y,_last_zzi)
+#  endif
+#  if !defined(oc_state_frag_recon)
+#   define oc_state_frag_recon oc_state_frag_recon_c
+#  endif
+#  if !defined(oc_state_frag_copy_list)
+#   define oc_state_frag_copy_list oc_state_frag_copy_list_c
+#  endif
+#  if !defined(oc_state_loop_filter_frag_rows)
+#   define oc_state_loop_filter_frag_rows oc_state_loop_filter_frag_rows_c
+#  endif
+#  if !defined(oc_restore_fpu)
+#   define oc_restore_fpu(_state) do{}while(0)
+#  endif
+# endif
+
+
+
+/*A keyframe.*/
+# define OC_INTRA_FRAME (0)
+/*A predicted frame.*/
+# define OC_INTER_FRAME (1)
+/*A frame of unknown type (frame type decision has not yet been made).*/
+# define OC_UNKWN_FRAME (-1)
+
+/*The amount of padding to add to the reconstructed frame buffers on all
+   sides.
+  This is used to allow unrestricted motion vectors without special casing.
+  This must be a multiple of 2.*/
+# define OC_UMV_PADDING (16)
+
+/*Frame classification indices.*/
+/*The previous golden frame.*/
+# define OC_FRAME_GOLD      (0)
+/*The previous frame.*/
+# define OC_FRAME_PREV      (1)
+/*The current frame.*/
+# define OC_FRAME_SELF      (2)
+
+/*The input or output buffer.*/
+# define OC_FRAME_IO        (3)
+/*Uncompressed prev golden frame.*/
+# define OC_FRAME_GOLD_ORIG (4)
+/*Uncompressed previous frame. */
+# define OC_FRAME_PREV_ORIG (5)
+
+/*Macroblock modes.*/
+/*Macro block is invalid: It is never coded.*/
+# define OC_MODE_INVALID        (-1)
+/*Encoded difference from the same macro block in the previous frame.*/
+# define OC_MODE_INTER_NOMV     (0)
+/*Encoded with no motion compensated prediction.*/
+# define OC_MODE_INTRA          (1)
+/*Encoded difference from the previous frame offset by the given motion
+   vector.*/
+# define OC_MODE_INTER_MV       (2)
+/*Encoded difference from the previous frame offset by the last coded motion
+   vector.*/
+# define OC_MODE_INTER_MV_LAST  (3)
+/*Encoded difference from the previous frame offset by the second to last
+   coded motion vector.*/
+# define OC_MODE_INTER_MV_LAST2 (4)
+/*Encoded difference from the same macro block in the previous golden
+   frame.*/
+# define OC_MODE_GOLDEN_NOMV    (5)
+/*Encoded difference from the previous golden frame offset by the given motion
+   vector.*/
+# define OC_MODE_GOLDEN_MV      (6)
+/*Encoded difference from the previous frame offset by the individual motion
+   vectors given for each block.*/
+# define OC_MODE_INTER_MV_FOUR  (7)
+/*The number of (coded) modes.*/
+# define OC_NMODES              (8)
+
+/*Determines the reference frame used for a given MB mode.*/
+# define OC_FRAME_FOR_MODE(_x) \
+ OC_UNIBBLE_TABLE32(OC_FRAME_PREV,OC_FRAME_SELF,OC_FRAME_PREV,OC_FRAME_PREV, \
+  OC_FRAME_PREV,OC_FRAME_GOLD,OC_FRAME_GOLD,OC_FRAME_PREV,(_x))
+
+/*Constants for the packet state machine common between encoder and decoder.*/
+
+/*Next packet to emit/read: Codec info header.*/
+# define OC_PACKET_INFO_HDR    (-3)
+/*Next packet to emit/read: Comment header.*/
+# define OC_PACKET_COMMENT_HDR (-2)
+/*Next packet to emit/read: Codec setup header.*/
+# define OC_PACKET_SETUP_HDR   (-1)
+/*No more packets to emit/read.*/
+# define OC_PACKET_DONE        (INT_MAX)
+
+
+
+/*Super blocks are 32x32 segments of pixels in a single color plane indexed
+   in image order.
+  Internally, super blocks are broken up into four quadrants, each of which
+   contains a 2x2 pattern of blocks, each of which is an 8x8 block of pixels.
+  Quadrants, and the blocks within them, are indexed in a special order called
+   a "Hilbert curve" within the super block.
+
+  In order to differentiate between the Hilbert-curve indexing strategy and
+   the regular image order indexing strategy, blocks indexed in image order
+   are called "fragments".
+  Fragments are indexed in image order, left to right, then bottom to top,
+   from Y' plane to Cb plane to Cr plane.
+
+  The co-located fragments in all image planes corresponding to the location
+   of a single quadrant of a luma plane super block form a macro block.
+  Thus there is only a single set of macro blocks for all planes, each of which
+   contains between 6 and 12 fragments, depending on the pixel format.
+  Therefore macro block information is kept in a separate set of arrays from
+   super blocks to avoid unused space in the other planes.
+  The lists are indexed in super block order.
+  That is, the macro block corresponding to the macro block mbi in (luma plane)
+   super block sbi is at index (sbi<<2|mbi).
+  Thus the number of macro blocks in each dimension is always twice the number
+   of super blocks, even when only an odd number fall inside the coded frame.
+  These "extra" macro blocks are just an artifact of our internal data layout,
+   and not part of the coded stream; they are flagged with a negative MB mode.*/
+
+
+
+/*Super block information.*/
+struct oc_sb_flags{
+  unsigned char coded_fully:1;
+  unsigned char coded_partially:1;
+  unsigned char quad_valid:4;
+};
+
+
+
+/*Information about a fragment which intersects the border of the displayable
+   region.
+  This marks which pixels belong to the displayable region.*/
+struct oc_border_info{
+  /*A bit mask marking which pixels are in the displayable region.
+    Pixel (x,y) corresponds to bit (y<<3|x).*/
+  ogg_int64_t mask;
+  /*The number of pixels in the displayable region.
+    This is always positive, and always less than 64.*/
+  int         npixels;
+};
+
+
+
+/*Fragment information.*/
+struct oc_fragment{
+  /*A flag indicating whether or not this fragment is coded.*/
+  unsigned   coded:1;
+  /*A flag indicating that this entire fragment lies outside the displayable
+     region of the frame.
+    Note the contrast with an invalid macro block, which is outside the coded
+     frame, not just the displayable one.
+    There are no fragments outside the coded frame by construction.*/
+  unsigned   invalid:1;
+  /*The index of the quality index used for this fragment's AC coefficients.*/
+  unsigned   qii:6;
+  /*The mode of the macroblock this fragment belongs to.*/
+  unsigned   mb_mode:3;
+  /*The index of the associated border information for fragments which lie
+     partially outside the displayable region.
+    For fragments completely inside or outside this region, this is -1.
+    Note that the C standard requires an explicit signed keyword for bitfield
+     types, since some compilers may treat them as unsigned without it.*/
+  signed int borderi:5;
+  /*The prediction-corrected DC component.
+    Note that the C standard requires an explicit signed keyword for bitfield
+     types, since some compilers may treat them as unsigned without it.*/
+  signed int dc:16;
+};
+
+
+
+/*A description of each fragment plane.*/
+struct oc_fragment_plane{
+  /*The number of fragments in the horizontal direction.*/
+  int       nhfrags;
+  /*The number of fragments in the vertical direction.*/
+  int       nvfrags;
+  /*The offset of the first fragment in the plane.*/
+  ptrdiff_t froffset;
+  /*The total number of fragments in the plane.*/
+  ptrdiff_t nfrags;
+  /*The number of super blocks in the horizontal direction.*/
+  unsigned  nhsbs;
+  /*The number of super blocks in the vertical direction.*/
+  unsigned  nvsbs;
+  /*The offset of the first super block in the plane.*/
+  unsigned  sboffset;
+  /*The total number of super blocks in the plane.*/
+  unsigned  nsbs;
+};
+
+
+
+/*The shared (encoder and decoder) functions that have accelerated variants.*/
+struct oc_base_opt_vtable{
+  void (*frag_copy)(unsigned char *_dst,
+   const unsigned char *_src,int _ystride);
+  void (*frag_recon_intra)(unsigned char *_dst,int _ystride,
+   const ogg_int16_t _residue[64]);
+  void (*frag_recon_inter)(unsigned char *_dst,
+   const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
+  void (*frag_recon_inter2)(unsigned char *_dst,const unsigned char *_src1,
+   const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
+  void (*idct8x8)(ogg_int16_t _y[64],int _last_zzi);
+  void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi,
+   int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
+  void (*state_frag_copy_list)(const oc_theora_state *_state,
+   const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+   int _dst_frame,int _src_frame,int _pli);
+  void (*state_loop_filter_frag_rows)(const oc_theora_state *_state,
+   int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
+  void (*restore_fpu)(void);
+};
+
+/*The shared (encoder and decoder) tables that vary according to which variants
+   of the above functions are used.*/
+struct oc_base_opt_data{
+  const unsigned char *dct_fzig_zag;
+};
+
+
+/*State information common to both the encoder and decoder.*/
+struct oc_theora_state{
+  /*The stream information.*/
+  th_info             info;
+# if defined(OC_STATE_USE_VTABLE)
+  /*Table for shared accelerated functions.*/
+  oc_base_opt_vtable  opt_vtable;
+# endif
+  /*Table for shared data used by accelerated functions.*/
+  oc_base_opt_data    opt_data;
+  /*CPU flags to detect the presence of extended instruction sets.*/
+  ogg_uint32_t        cpu_flags;
+  /*The fragment plane descriptions.*/
+  oc_fragment_plane   fplanes[3];
+  /*The list of fragments, indexed in image order.*/
+  oc_fragment        *frags;
+  /*The the offset into the reference frame buffer to the upper-left pixel of
+     each fragment.*/
+  ptrdiff_t          *frag_buf_offs;
+  /*The motion vector for each fragment.*/
+  oc_mv              *frag_mvs;
+  /*The total number of fragments in a single frame.*/
+  ptrdiff_t           nfrags;
+  /*The list of super block maps, indexed in image order.*/
+  oc_sb_map          *sb_maps;
+  /*The list of super block flags, indexed in image order.*/
+  oc_sb_flags        *sb_flags;
+  /*The total number of super blocks in a single frame.*/
+  unsigned            nsbs;
+  /*The fragments from each color plane that belong to each macro block.
+    Fragments are stored in image order (left to right then top to bottom).
+    When chroma components are decimated, the extra fragments have an index of
+     -1.*/
+  oc_mb_map          *mb_maps;
+  /*The list of macro block modes.
+    A negative number indicates the macro block lies entirely outside the
+     coded frame.*/
+  signed char        *mb_modes;
+  /*The number of macro blocks in the X direction.*/
+  unsigned            nhmbs;
+  /*The number of macro blocks in the Y direction.*/
+  unsigned            nvmbs;
+  /*The total number of macro blocks.*/
+  size_t              nmbs;
+  /*The list of coded fragments, in coded order.
+    Uncoded fragments are stored in reverse order from the end of the list.*/
+  ptrdiff_t          *coded_fragis;
+  /*The number of coded fragments in each plane.*/
+  ptrdiff_t           ncoded_fragis[3];
+  /*The total number of coded fragments.*/
+  ptrdiff_t           ntotal_coded_fragis;
+  /*The index of the buffers being used for each OC_FRAME_* reference frame.*/
+  int                 ref_frame_idx[6];
+  /*The actual buffers used for the reference frames.*/
+  th_ycbcr_buffer     ref_frame_bufs[6];
+  /*The storage for the reference frame buffers.*/
+  unsigned char      *ref_frame_data[6];
+  /*The strides for each plane in the reference frames.*/
+  int                 ref_ystride[3];
+  /*The number of unique border patterns.*/
+  int                 nborders;
+  /*The unique border patterns for all border fragments.
+    The borderi field of fragments which straddle the border indexes this
+     list.*/
+  oc_border_info      borders[16];
+  /*The frame number of the last keyframe.*/
+  ogg_int64_t         keyframe_num;
+  /*The frame number of the current frame.*/
+  ogg_int64_t         curframe_num;
+  /*The granpos of the current frame.*/
+  ogg_int64_t         granpos;
+  /*The type of the current frame.*/
+  signed char         frame_type;
+  /*The bias to add to the frame count when computing granule positions.*/
+  unsigned char       granpos_bias;
+  /*The number of quality indices used in the current frame.*/
+  unsigned char       nqis;
+  /*The quality indices of the current frame.*/
+  unsigned char       qis[3];
+  /*The dequantization tables, stored in zig-zag order, and indexed by
+     qi, pli, qti, and zzi.*/
+  ogg_uint16_t       *dequant_tables[64][3][2];
+  OC_ALIGN16(oc_quant_table      dequant_table_data[64][3][2]);
+  /*Loop filter strength parameters.*/
+  unsigned char       loop_filter_limits[64];
+};
+
+
+
+/*The function type used to fill in the chroma plane motion vectors for a
+   macro block when 4 different motion vectors are specified in the luma
+   plane.
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lmbmv: The luma macro-block level motion vector to fill in for use in
+           prediction.
+  _lbmvs: The luma block-level motion vectors.*/
+typedef void (*oc_set_chroma_mvs_func)(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]);
+
+
+
+/*A table of functions used to fill in the Cb,Cr plane motion vectors for a
+   macro block when 4 different motion vectors are specified in the luma
+   plane.*/
+extern const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS];
+
+
+
+int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs);
+void oc_state_clear(oc_theora_state *_state);
+void oc_state_accel_init_c(oc_theora_state *_state);
+void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
+ int _y0,int _yend);
+void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli);
+void oc_state_borders_fill(oc_theora_state *_state,int _refi);
+void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx,
+ th_ycbcr_buffer _img);
+int oc_state_mbi_for_pos(oc_theora_state *_state,int _mbx,int _mby);
+int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
+ int _pli,int _dx,int _dy);
+
+int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv);
+void oc_state_loop_filter(oc_theora_state *_state,int _frame);
+# if defined(OC_DUMP_IMAGES)
+int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
+ const char *_suf);
+# endif
+
+/*Default pure-C implementations of shared accelerated functions.*/
+void oc_frag_copy_c(unsigned char *_dst,
+ const unsigned char *_src,int _src_ystride);
+void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride,
+ const ogg_int16_t _residue[64]);
+void oc_frag_recon_inter_c(unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
+void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1,
+ const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
+void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi);
+void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
+void oc_state_frag_copy_list_c(const oc_theora_state *_state,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli);
+void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
+ int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
+void oc_restore_fpu_c(void);
+
+/*We need a way to call a few encoder functions without introducing a link-time
+   dependency into the decoder, while still allowing the old alpha API which
+   does not distinguish between encoder and decoder objects to be used.
+  We do this by placing a function table at the start of the encoder object
+   which can dispatch into the encoder library.
+  We do a similar thing for the decoder in case we ever decide to split off a
+   common base library.*/
+typedef void (*oc_state_clear_func)(theora_state *_th);
+typedef int (*oc_state_control_func)(theora_state *th,int _req,
+ void *_buf,size_t _buf_sz);
+typedef ogg_int64_t (*oc_state_granule_frame_func)(theora_state *_th,
+ ogg_int64_t _granulepos);
+typedef double (*oc_state_granule_time_func)(theora_state *_th,
+ ogg_int64_t _granulepos);
+
+
+struct oc_state_dispatch_vtable{
+  oc_state_clear_func         clear;
+  oc_state_control_func       control;
+  oc_state_granule_frame_func granule_frame;
+  oc_state_granule_time_func  granule_time;
+};
+
+#endif

Modified: experimental/derf/theora-ptalarbvorm/lib/x86/sse2idct.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86/sse2idct.c	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/x86/sse2idct.c	2010-09-02 20:17:34 UTC (rev 17378)
@@ -16,7 +16,7 @@
  ********************************************************************/
 
 /*SSE2 acceleration of Theora's iDCT.*/
-#include "x86int.h"
+#include "x86enc.h"
 #include "sse2trans.h"
 #include "../dct.h"
 

Modified: experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.c	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.c	2010-09-02 20:17:34 UTC (rev 17378)
@@ -18,10 +18,11 @@
 
 #if defined(OC_X86_ASM)
 
-void oc_enc_vtable_init_x86(oc_enc_ctx *_enc){
+void oc_enc_accel_init_x86(oc_enc_ctx *_enc){
   ogg_uint32_t cpu_flags;
   cpu_flags=_enc->state.cpu_flags;
-  oc_enc_vtable_init_c(_enc);
+  oc_enc_accel_init_c(_enc);
+# if defined(OC_ENC_USE_VTABLE)
   if(cpu_flags&OC_CPU_X86_MMX){
     _enc->opt_vtable.frag_sub=oc_enc_frag_sub_mmx;
     _enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_mmx;
@@ -39,19 +40,22 @@
     _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_mmxext;
   }
   if(cpu_flags&OC_CPU_X86_SSE2){
-# if defined(OC_X86_64_ASM)
+#  if defined(OC_X86_64_ASM)
     _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_x86_64sse2;
-# endif
+#  endif
     _enc->opt_vtable.frag_ssd=oc_enc_frag_ssd_sse2;
     _enc->opt_vtable.frag_border_ssd=oc_enc_frag_border_ssd_sse2;
     _enc->opt_vtable.frag_satd=oc_enc_frag_satd_sse2;
     _enc->opt_vtable.frag_satd2=oc_enc_frag_satd2_sse2;
     _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_sse2;
-    _enc->opt_data.enquant_table_size=128*sizeof(ogg_uint16_t);
-    _enc->opt_data.enquant_table_alignment=16;
     _enc->opt_vtable.enquant_table_init=oc_enc_enquant_table_init_x86;
     _enc->opt_vtable.enquant_table_fixup=oc_enc_enquant_table_fixup_x86;
     _enc->opt_vtable.quantize=oc_enc_quantize_sse2;
+# endif
+    _enc->opt_data.enquant_table_size=128*sizeof(ogg_uint16_t);
+    _enc->opt_data.enquant_table_alignment=16;
+# if defined(OC_ENC_USE_VTABLE)
   }
+# endif
 }
 #endif

Modified: experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.h	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.h	2010-09-02 20:17:34 UTC (rev 17378)
@@ -17,19 +17,62 @@
 
 #if !defined(_x86_x86enc_H)
 # define _x86_x86enc_H (1)
-# include "../encint.h"
 # include "x86int.h"
 
-void oc_enc_vtable_init_x86(oc_enc_ctx *_enc);
+# if defined(OC_X86_ASM)
+#  define oc_enc_accel_init oc_enc_accel_init_x86
+#  if defined(OC_X86_64_ASM)
+/*x86-64 guarantees SIMD support up through at least SSE2.
+  If the best routine we have available only needs SSE2 (which at the moment
+   covers all of them), then we can avoid runtime detection and the indirect
+   call.*/
+#   define oc_enc_frag_sub(_enc,_diff,_x,_y,_stride) \
+  oc_enc_frag_sub_mmx(_diff,_x,_y,_stride)
+#   define oc_enc_frag_sub_128(_enc,_diff,_x,_stride) \
+  oc_enc_frag_sub_128_mmx(_diff,_x,_stride)
+#   define oc_enc_frag_sad(_enc,_src,_ref,_ystride) \
+  oc_enc_frag_sad_mmxext(_src,_ref,_ystride)
+#   define oc_enc_frag_sad_thresh(_enc,_src,_ref,_ystride,_thresh) \
+  oc_enc_frag_sad_thresh_mmxext(_src,_ref,_ystride,_thresh)
+#   define oc_enc_frag_sad2_thresh(_enc,_src,_ref1,_ref2,_ystride,_thresh) \
+  oc_enc_frag_sad2_thresh_mmxext(_src,_ref1,_ref2,_ystride,_thresh)
+#   define oc_enc_frag_satd(_enc,_dc,_src,_ref,_ystride) \
+  oc_enc_frag_satd_sse2(_dc,_src,_ref,_ystride)
+#   define oc_enc_frag_satd2(_enc,_dc,_src,_ref1,_ref2,_ystride) \
+  oc_enc_frag_satd2_sse2(_dc,_src,_ref1,_ref2,_ystride)
+#   define oc_enc_frag_intra_satd(_enc,_dc,_src,_ystride) \
+  oc_enc_frag_intra_satd_sse2(_dc,_src,_ystride)
+#   define oc_enc_frag_ssd(_enc,_src,_ref,_ystride) \
+  oc_enc_frag_ssd_sse2(_src,_ref,_ystride)
+#   define oc_enc_frag_border_ssd(_enc,_src,_ref,_ystride,_mask) \
+  oc_enc_frag_border_ssd_sse2(_src,_ref,_ystride,_mask)
+#   define oc_enc_frag_copy2(_enc,_dst,_src1,_src2,_ystride) \
+  oc_int_frag_copy2_mmxext(_dst,_ystride,_src1,_src2,_ystride)
+#   define oc_enc_enquant_table_init(_enc,_enquant,_dequant) \
+  oc_enc_enquant_table_init_x86(_enquant,_dequant)
+#   define oc_enc_enquant_table_fixup(_enc,_enquant,_nqis) \
+  oc_enc_enquant_table_fixup_x86(_enquant,_nqis)
+#  define oc_enc_quantize(_enc,_qdct,_dct,_dequant,_enquant) \
+  oc_enc_quantize_sse2(_qdct,_dct,_dequant,_enquant)
+#   define oc_enc_frag_recon_intra(_enc,_dst,_ystride,_residue) \
+  oc_frag_recon_intra_mmx(_dst,_ystride,_residue)
+#   define oc_enc_frag_recon_inter(_enc,_dst,_src,_ystride,_residue) \
+  oc_frag_recon_inter_mmx(_dst,_src,_ystride,_residue)
+#   define oc_enc_fdct8x8(_enc,_y,_x) \
+  oc_enc_fdct8x8_x86_64sse2(_y,_x)
+#  else
+#   define OC_ENC_USE_VTABLE (1)
+#  endif
+# endif
 
+# include "../encint.h"
+
+void oc_enc_accel_init_x86(oc_enc_ctx *_enc);
+
 void oc_enc_frag_sub_mmx(ogg_int16_t _diff[64],
  const unsigned char *_x,const unsigned char *_y,int _stride);
 void oc_enc_frag_sub_128_mmx(ogg_int16_t _diff[64],
  const unsigned char *_x,int _stride);
-unsigned oc_enc_frag_ssd_sse2(const unsigned char *_src,
- const unsigned char *_ref,int _ystride);
-unsigned oc_enc_frag_border_ssd_sse2(const unsigned char *_src,
- const unsigned char *_ref,int _ystride,ogg_int64_t _mask);
 unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src,
  const unsigned char *_ref,int _ystride);
 unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src,
@@ -45,19 +88,23 @@
  const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
 unsigned oc_enc_frag_satd2_sse2(unsigned *_dc,const unsigned char *_src,
  const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
+unsigned oc_enc_frag_intra_satd_mmxext(unsigned *_dc,
+ const unsigned char *_src,int _ystride);
 unsigned oc_enc_frag_intra_satd_sse2(unsigned *_dc,
  const unsigned char *_src,int _ystride);
-unsigned oc_enc_frag_intra_satd_mmxext(unsigned *_dc,
- const unsigned char *_src,int _ystride);
+unsigned oc_enc_frag_ssd_sse2(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_border_ssd_sse2(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,ogg_int64_t _mask);
+void oc_int_frag_copy2_mmxext(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src1,const unsigned char *_src2,int _src_ystride);
+void oc_enc_frag_copy2_mmxext(unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride);
 void oc_enc_enquant_table_init_x86(void *_enquant,
  const ogg_uint16_t _dequant[64]);
 void oc_enc_enquant_table_fixup_x86(void *_enquant[3][3][2],int _nqis);
 int oc_enc_quantize_sse2(ogg_int16_t _qdct[64],const ogg_int16_t _dct[64],
  const ogg_uint16_t _dequant[64],const void *_enquant);
-void oc_int_frag_copy2_mmxext(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src1,const unsigned char *_src2,int _src_ystride);
-void oc_enc_frag_copy2_mmxext(unsigned char *_dst,
- const unsigned char *_src1,const unsigned char *_src2,int _ystride);
 void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
 
 # if defined(OC_X86_64_ASM)

Modified: experimental/derf/theora-ptalarbvorm/lib/x86/x86int.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86/x86int.h	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/x86/x86int.h	2010-09-02 20:17:34 UTC (rev 17378)
@@ -18,6 +18,35 @@
 #if !defined(_x86_x86int_H)
 # define _x86_x86int_H (1)
 # include "../internal.h"
+
+# if defined(OC_X86_ASM)
+#  define oc_state_accel_init oc_state_accel_init_x86
+#  if defined(OC_X86_64_ASM)
+/*x86-64 guarantees SIMD support up through at least SSE2.
+  If the best routine we have available only needs SSE2 (which at the moment
+   covers all of them), then we can avoid runtime detection and the indirect
+   call.*/
+#   define oc_frag_copy(_state,_dst,_src,_ystride) \
+  oc_frag_copy_mmx(_dst,_src,_ystride)
+#   define oc_frag_recon_intra(_state,_dst,_ystride,_residue) \
+  oc_frag_recon_intra_mmx(_dst,_ystride,_residue)
+#   define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
+  oc_frag_recon_inter_mmx(_dst,_src,_ystride,_residue)
+#   define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
+  oc_frag_recon_inter2_mmx(_dst,_src1,_src2,_ystride,_residue)
+#   define oc_idct8x8(_state,_y,_last_zzi) \
+  oc_idct8x8_sse2(_y,_last_zzi)
+#   define oc_state_frag_recon oc_state_frag_recon_mmx
+#   define oc_state_frag_copy_list oc_state_frag_copy_list_mmx
+#   define oc_state_loop_filter_frag_rows oc_state_loop_filter_frag_rows_mmxext
+#   define oc_restore_fpu(_state) \
+  oc_restore_fpu_mmx()
+#  else
+#   define OC_STATE_USE_VTABLE (1)
+#  endif
+# endif
+
+# include "../state.h"
 # include "cpu.h"
 
 /*Converts the expression in the argument to a string.*/
@@ -62,7 +91,7 @@
 
 extern const short __attribute__((aligned(16))) OC_IDCT_CONSTS[64];
 
-void oc_state_vtable_init_x86(oc_theora_state *_state);
+void oc_state_accel_init_x86(oc_theora_state *_state);
 
 void oc_frag_copy_mmx(unsigned char *_dst,
  const unsigned char *_src,int _ystride);

Modified: experimental/derf/theora-ptalarbvorm/lib/x86/x86state.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86/x86state.c	2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/x86/x86state.c	2010-09-02 20:17:34 UTC (rev 17378)
@@ -61,8 +61,10 @@
   64,64,64,64,64,64,64,64
 };
 
-void oc_state_vtable_init_x86(oc_theora_state *_state){
+void oc_state_accel_init_x86(oc_theora_state *_state){
+  oc_state_accel_init_c(_state);
   _state->cpu_flags=oc_cpu_flags_get();
+# if defined(OC_STATE_USE_VTABLE)
   if(_state->cpu_flags&OC_CPU_X86_MMX){
     _state->opt_vtable.frag_copy=oc_frag_copy_mmx;
     _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx;
@@ -76,14 +78,16 @@
     _state->opt_vtable.restore_fpu=oc_restore_fpu_mmx;
     _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_MMX;
   }
-  else oc_state_vtable_init_c(_state);
   if(_state->cpu_flags&OC_CPU_X86_MMXEXT){
     _state->opt_vtable.state_loop_filter_frag_rows=
      oc_state_loop_filter_frag_rows_mmxext;
   }
   if(_state->cpu_flags&OC_CPU_X86_SSE2){
     _state->opt_vtable.idct8x8=oc_idct8x8_sse2;
+# endif
     _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_SSE2;
+# if defined(OC_STATE_USE_VTABLE)
   }
+# endif
 }
 #endif



More information about the commits mailing list