[xiph-commits] r17378 - in experimental/derf/theora-ptalarbvorm/lib: . c64x x86
tterribe at svn.xiph.org
tterribe at svn.xiph.org
Thu Sep 2 13:17:35 PDT 2010
Author: tterribe
Date: 2010-09-02 13:17:34 -0700 (Thu, 02 Sep 2010)
New Revision: 17378
Added:
experimental/derf/theora-ptalarbvorm/lib/state.h
Modified:
experimental/derf/theora-ptalarbvorm/lib/Makefile.am
experimental/derf/theora-ptalarbvorm/lib/apiwrapper.h
experimental/derf/theora-ptalarbvorm/lib/c64x/c64xdec.c
experimental/derf/theora-ptalarbvorm/lib/c64x/c64xdec.h
experimental/derf/theora-ptalarbvorm/lib/c64x/c64xint.h
experimental/derf/theora-ptalarbvorm/lib/c64x/c64xstate.c
experimental/derf/theora-ptalarbvorm/lib/decint.h
experimental/derf/theora-ptalarbvorm/lib/decode.c
experimental/derf/theora-ptalarbvorm/lib/encinfo.c
experimental/derf/theora-ptalarbvorm/lib/encint.h
experimental/derf/theora-ptalarbvorm/lib/encode.c
experimental/derf/theora-ptalarbvorm/lib/internal.c
experimental/derf/theora-ptalarbvorm/lib/internal.h
experimental/derf/theora-ptalarbvorm/lib/state.c
experimental/derf/theora-ptalarbvorm/lib/x86/sse2idct.c
experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.c
experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.h
experimental/derf/theora-ptalarbvorm/lib/x86/x86int.h
experimental/derf/theora-ptalarbvorm/lib/x86/x86state.c
Log:
Make the function pointer tables for accelerated functions completely optional.
This required splitting off a new state.h from internal.h (which we should have
done a long time ago) to get the typedef's available at the proper times.
Most notably, this allows the pure C functions (for platforms with no
acceleration) and the x86-64 functions that don't use anything later than SSE2
(which is currently all of them) to avoid any function pointer overhead at all.
On x86-64, at least, this made no measurable performance difference whatsoever.
Modified: experimental/derf/theora-ptalarbvorm/lib/Makefile.am
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/Makefile.am 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/Makefile.am 2010-09-02 20:17:34 UTC (rev 17378)
@@ -144,21 +144,22 @@
$(decoder_arch_sources)
noinst_HEADERS = \
- internal.h \
- encint.h \
- enquant.h \
- huffenc.h \
- mathops.h \
- modedec.h \
apiwrapper.h \
bitpack.h \
dct.h \
decint.h \
dequant.h \
+ encint.h \
+ enquant.h \
huffdec.h \
+ huffenc.h \
huffman.h \
+ internal.h \
+ mathops.h \
+ modedec.h \
ocintrin.h \
quant.h \
+ state.h \
x86/cpu.h \
x86/mmxfrag.h \
x86/mmxloop.h \
Modified: experimental/derf/theora-ptalarbvorm/lib/apiwrapper.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/apiwrapper.h 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/apiwrapper.h 2010-09-02 20:17:34 UTC (rev 17378)
@@ -21,7 +21,7 @@
# include <theora/theora.h>
# include "theora/theoradec.h"
# include "theora/theoraenc.h"
-# include "internal.h"
+# include "state.h"
typedef struct th_api_wrapper th_api_wrapper;
typedef struct th_api_info th_api_info;
Modified: experimental/derf/theora-ptalarbvorm/lib/c64x/c64xdec.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/c64x/c64xdec.c 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/c64x/c64xdec.c 2010-09-02 20:17:34 UTC (rev 17378)
@@ -18,8 +18,10 @@
#if defined(OC_C64X_ASM)
-void oc_dec_vtable_init_c64x(oc_dec_ctx *_dec){
+void oc_dec_accel_init_c64x(oc_dec_ctx *_dec){
+# if defined(OC_DEC_USE_VTABLE)
_dec->opt_vtable.dc_unpredict_mcu_plane=oc_dec_dc_unpredict_mcu_plane_c64x;
+# endif
}
Modified: experimental/derf/theora-ptalarbvorm/lib/c64x/c64xdec.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/c64x/c64xdec.h 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/c64x/c64xdec.h 2010-09-02 20:17:34 UTC (rev 17378)
@@ -19,14 +19,13 @@
# include "c64xint.h"
# if defined(OC_C64X_ASM)
-# if !defined(oc_dec_dc_unpredict_mcu_plane)
-# define oc_dec_dc_unpredict_mcu_plane oc_dec_dc_unpredict_mcu_plane_c64x
-# endif
+# define oc_dec_accel_init oc_dec_accel_init_c64x
+# define oc_dec_dc_unpredict_mcu_plane oc_dec_dc_unpredict_mcu_plane_c64x
# endif
# include "../decint.h"
-void oc_dec_vtable_init_c64x(oc_dec_ctx *_dec);
+void oc_dec_accel_init_c64x(oc_dec_ctx *_dec);
void oc_dec_dc_unpredict_mcu_plane_c64x(oc_dec_ctx *_dec,
oc_dec_pipeline_state *_pipe,int _pli);
Modified: experimental/derf/theora-ptalarbvorm/lib/c64x/c64xint.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/c64x/c64xint.h 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/c64x/c64xint.h 2010-09-02 20:17:34 UTC (rev 17378)
@@ -17,45 +17,29 @@
#if !defined(_c64x_c64xint_H)
# define _c64x_c64xint_H (1)
+# include "../internal.h"
# if defined(OC_C64X_ASM)
-# if !defined(oc_frag_copy)
-# define oc_frag_copy(_state,_dst,_src,_ystride) \
- oc_frag_copy_c64x(_dst,_src,_ystride)
-# endif
-# if !defined(oc_frag_recon_intra)
-# define oc_frag_recon_intra(_state,_dst,_dst_ystride,_residue) \
- oc_frag_recon_intra_c64x(_dst,_dst_ystride,_residue)
-# endif
-# if !defined(oc_frag_recon_inter)
-# define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
- oc_frag_recon_inter_c64x(_dst,_src,_ystride,_residue)
-# endif
-# if !defined(oc_frag_recon_inter2)
-# define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
- oc_frag_recon_inter2_c64x(_dst,_src1,_src2,_ystride,_residue)
-# endif
-# if !defined(oc_idct8x8)
-# define oc_idct8x8(_state,_y,_last_zzi) \
- define oc_idct8x8_c64x(_y,_last_zzi)
-# endif
-# if !defined(oc_state_frag_recon)
-# define oc_state_frag_recon oc_state_frag_recon_c64x
-# endif
-# if !defined(oc_state_frag_copy_list)
-# define oc_state_frag_copy_list oc_state_frag_copy_list_c64x
-# endif
-# if !defined(oc_state_loop_filter_frag_rows)
-# define oc_state_loop_filter_frag_rows oc_state_loop_filter_frag_rows_c64x
-# endif
-# if !defined(oc_restore_fpu)
-# define oc_restore_fpu(_state) do{}while(0)
-# endif
+# define oc_state_accel_init oc_state_accel_init_c64x
+# define oc_frag_copy(_state,_dst,_src,_ystride) \
+ oc_frag_copy_c64x(_dst,_src,_ystride)
+# define oc_frag_recon_intra(_state,_dst,_dst_ystride,_residue) \
+ oc_frag_recon_intra_c64x(_dst,_dst_ystride,_residue)
+# define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
+ oc_frag_recon_inter_c64x(_dst,_src,_ystride,_residue)
+# define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
+ oc_frag_recon_inter2_c64x(_dst,_src1,_src2,_ystride,_residue)
+# define oc_idct8x8(_state,_y,_last_zzi) \
+ oc_idct8x8_c64x(_y,_last_zzi)
+# define oc_state_frag_recon oc_state_frag_recon_c64x
+# define oc_state_frag_copy_list oc_state_frag_copy_list_c64x
+# define oc_state_loop_filter_frag_rows oc_state_loop_filter_frag_rows_c64x
+# define oc_restore_fpu(_state) do{}while(0)
# endif
-# include "../internal.h"
+# include "../state.h"
-void oc_state_vtable_init_c64x(oc_theora_state *_state);
+void oc_state_accel_init_c64x(oc_theora_state *_state);
void oc_frag_copy_c64x(unsigned char *_dst,
const unsigned char *_src,int _ystride);
Modified: experimental/derf/theora-ptalarbvorm/lib/c64x/c64xstate.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/c64x/c64xstate.c 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/c64x/c64xstate.c 2010-09-02 20:17:34 UTC (rev 17378)
@@ -19,7 +19,9 @@
#if defined(OC_C64X_ASM)
-void oc_state_vtable_init_c64x(oc_theora_state *_state){
+void oc_state_accel_init_c64x(oc_theora_state *_state){
+ _state->cpu_flags=0;
+# if defined(OC_STATE_USE_VTABLE)
_state->opt_vtable.frag_copy=oc_frag_copy_c64x;
_state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c64x;
_state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c64x;
@@ -30,6 +32,7 @@
_state->opt_vtable.state_loop_filter_frag_rows=
oc_state_loop_filter_frag_rows_c64x;
_state->opt_vtable.restore_fpu=oc_restore_fpu_c;
+# endif
_state->opt_data.dct_fzig_zag=OC_FZIG_ZAG;
}
Modified: experimental/derf/theora-ptalarbvorm/lib/decint.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/decint.h 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/decint.h 2010-09-02 20:17:34 UTC (rev 17378)
@@ -19,17 +19,39 @@
#if !defined(_decint_H)
# define _decint_H (1)
# include "theora/theoradec.h"
-# include "internal.h"
+# include "state.h"
# include "bitpack.h"
+# include "huffdec.h"
+# include "dequant.h"
typedef struct th_setup_info oc_setup_info;
typedef struct oc_dec_opt_vtable oc_dec_opt_vtable;
typedef struct oc_dec_pipeline_state oc_dec_pipeline_state;
typedef struct th_dec_ctx oc_dec_ctx;
-# include "huffdec.h"
-# include "dequant.h"
+
+/*Decoder-specific accelerated functions.*/
+# if defined(OC_C64X_ASM)
+# include "c64x/c64xdec.h"
+# endif
+
+# if !defined(oc_dec_accel_init)
+# define oc_dec_accel_init oc_dec_accel_init_c
+# endif
+# if defined(OC_DEC_USE_VTABLE)
+# if !defined(oc_dec_dc_unpredict_mcu_plane)
+# define oc_dec_dc_unpredict_mcu_plane(_dec,_pipe,_pli) \
+ ((*(_dec)->opt_vtable.dc_unpredict_mcu_plane)(_dec,_pipe,_pli))
+# endif
+# else
+# if !defined(oc_dec_dc_unpredict_mcu_plane)
+# define oc_dec_dc_unpredict_mcu_plane oc_dec_dc_unpredict_mcu_plane_c
+# endif
+# endif
+
+
+
/*Constants for the packet-in state machine specific to the decoder.*/
/*Next packet to read: Data packet.*/
@@ -117,8 +139,10 @@
/*The striped decode callback function.*/
th_stripe_callback stripe_cb;
oc_dec_pipeline_state pipe;
+# if defined(OC_DEC_USE_VTABLE)
/*Table for decoder acceleration functions.*/
oc_dec_opt_vtable opt_vtable;
+# endif
# if defined(HAVE_CAIRO)
/*Output metrics for debugging.*/
int telemetry;
@@ -136,19 +160,9 @@
# endif
};
-/*Decoder-specific accelerated functions.*/
-# if defined(OC_C64X_ASM)
-# include "c64x/c64xdec.h"
-# endif
+/*Default pure-C implementations of decoder-specific accelerated functions.*/
+void oc_dec_accel_init_c(oc_dec_ctx *_dec);
-# if !defined(oc_dec_dc_unpredict_mcu_plane)
-# define oc_dec_dc_unpredict_mcu_plane(_dec,_pipe,_pli) \
- ((*(_dec)->opt_vtable.dc_unpredict_mcu_plane)(_dec,_pipe,_pli))
-# endif
-
-/*Default pure-C implementations.*/
-void oc_dec_vtable_init_c(oc_dec_ctx *_dec);
-
void oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx *_dec,
oc_dec_pipeline_state *_pipe,int _pli);
Modified: experimental/derf/theora-ptalarbvorm/lib/decode.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/decode.c 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/decode.c 2010-09-02 20:17:34 UTC (rev 17378)
@@ -358,9 +358,11 @@
-void oc_dec_vtable_init_c(oc_dec_ctx *_dec){
+void oc_dec_accel_init_c(oc_dec_ctx *_dec){
+# if defined(OC_DEC_USE_VTABLE)
_dec->opt_vtable.dc_unpredict_mcu_plane=
oc_dec_dc_unpredict_mcu_plane_c;
+# endif
}
static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
@@ -407,11 +409,7 @@
}
memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
sizeof(_dec->state.loop_filter_limits));
-#if defined(OC_C64X_ASM)
- oc_dec_vtable_init_c64x(_dec);
-#else
- oc_dec_vtable_init_c(_dec);
-#endif
+ oc_dec_accel_init(_dec);
_dec->pp_level=OC_PP_LEVEL_DISABLED;
_dec->dc_qis=NULL;
_dec->variances=NULL;
Modified: experimental/derf/theora-ptalarbvorm/lib/encinfo.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/encinfo.c 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/encinfo.c 2010-09-02 20:17:34 UTC (rev 17378)
@@ -1,6 +1,6 @@
#include <stdlib.h>
#include <string.h>
-#include "internal.h"
+#include "state.h"
#include "enquant.h"
#include "huffenc.h"
Modified: experimental/derf/theora-ptalarbvorm/lib/encint.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/encint.h 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/encint.h 2010-09-02 20:17:34 UTC (rev 17378)
@@ -17,8 +17,7 @@
#if !defined(_encint_H)
# define _encint_H (1)
# include "theora/theoraenc.h"
-# include "internal.h"
-# include "ocintrin.h"
+# include "state.h"
# include "mathops.h"
# include "enquant.h"
# include "huffenc.h"
@@ -41,6 +40,155 @@
+/*Encoder-specific accelerated functions.*/
+# if defined(OC_X86_ASM)
+# include "x86/x86enc.h"
+# endif
+
+# if !defined(oc_enc_accel_init)
+# define oc_enc_accel_init oc_enc_accel_init_c
+# endif
+# if defined(OC_ENC_USE_VTABLE)
+# if !defined(oc_enc_frag_sub)
+# define oc_enc_frag_sub(_enc,_diff,_src,_ref,_ystride) \
+ ((*(_enc)->opt_vtable.frag_sub)(_diff,_src,_ref,_ystride))
+# endif
+# if !defined(oc_enc_frag_sub_128)
+# define oc_enc_frag_sub_128(_enc,_diff,_src,_ystride) \
+ ((*(_enc)->opt_vtable.frag_sub_128)(_diff,_src,_ystride))
+# endif
+# if !defined(oc_enc_frag_sad)
+# define oc_enc_frag_sad(_enc,_src,_ref,_ystride) \
+ ((*(_enc)->opt_vtable.frag_sad)(_src,_ref,_ystride))
+# endif
+# if !defined(oc_enc_frag_sad_thresh)
+# define oc_enc_frag_sad_thresh(_enc,_src,_ref,_ystride,_thresh) \
+ ((*(_enc)->opt_vtable.frag_sad_thresh)(_src,_ref,_ystride,_thresh))
+# endif
+# if !defined(oc_enc_frag_sad2_thresh)
+# define oc_enc_frag_sad2_thresh(_enc,_src,_ref1,_ref2,_ystride,_thresh) \
+ ((*(_enc)->opt_vtable.frag_sad2_thresh)(_src,_ref1,_ref2,_ystride,_thresh))
+# endif
+# if !defined(oc_enc_frag_satd)
+# define oc_enc_frag_satd(_enc,_dc,_src,_ref,_ystride) \
+ ((*(_enc)->opt_vtable.frag_satd)(_dc,_src,_ref,_ystride))
+# endif
+# if !defined(oc_enc_frag_satd2)
+# define oc_enc_frag_satd2(_enc,_dc,_src,_ref1,_ref2,_ystride) \
+ ((*(_enc)->opt_vtable.frag_satd2)(_dc,_src,_ref1,_ref2,_ystride))
+# endif
+# if !defined(oc_enc_frag_intra_satd)
+# define oc_enc_frag_intra_satd(_enc,_dc,_src,_ystride) \
+ ((*(_enc)->opt_vtable.frag_intra_satd)(_dc,_src,_ystride))
+# endif
+# if !defined(oc_enc_frag_ssd)
+# define oc_enc_frag_ssd(_enc,_src,_ref,_ystride) \
+ ((*(_enc)->opt_vtable.frag_ssd)(_src,_ref,_ystride))
+# endif
+# if !defined(oc_enc_frag_border_ssd)
+# define oc_enc_frag_border_ssd(_enc,_src,_ref,_ystride,_mask) \
+ ((*(_enc)->opt_vtable.frag_border_ssd)(_src,_ref,_ystride,_mask))
+# endif
+# if !defined(oc_enc_frag_copy2)
+# define oc_enc_frag_copy2(_enc,_dst,_src1,_src2,_ystride) \
+ ((*(_enc)->opt_vtable.frag_copy2)(_dst,_src1,_src2,_ystride))
+# endif
+# if !defined(oc_enc_enquant_table_init)
+# define oc_enc_enquant_table_init(_enc,_enquant,_dequant) \
+ ((*(_enc)->opt_vtable.enquant_table_init)(_enquant,_dequant))
+# endif
+# if !defined(oc_enc_enquant_table_fixup)
+# define oc_enc_enquant_table_fixup(_enc,_enquant,_nqis) \
+ ((*(_enc)->opt_vtable.enquant_table_fixup)(_enquant,_nqis))
+# endif
+# if !defined(oc_enc_quantize)
+# define oc_enc_quantize(_enc,_qdct,_dct,_dequant,_enquant) \
+ ((*(_enc)->opt_vtable.quantize)(_qdct,_dct,_dequant,_enquant))
+# endif
+# if !defined(oc_enc_frag_recon_intra)
+# define oc_enc_frag_recon_intra(_enc,_dst,_ystride,_residue) \
+ ((*(_enc)->opt_vtable.frag_recon_intra)(_dst,_ystride,_residue))
+# endif
+# if !defined(oc_enc_frag_recon_inter)
+# define oc_enc_frag_recon_inter(_enc,_dst,_src,_ystride,_residue) \
+ ((*(_enc)->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue))
+# endif
+# if !defined(oc_enc_fdct8x8)
+# define oc_enc_fdct8x8(_enc,_y,_x) \
+ ((*(_enc)->opt_vtable.fdct8x8)(_y,_x))
+# endif
+# else
+# if !defined(oc_enc_frag_sub)
+# define oc_enc_frag_sub(_enc,_diff,_src,_ref,_ystride) \
+ oc_enc_frag_sub_c(_diff,_src,_ref,_ystride)
+# endif
+# if !defined(oc_enc_frag_sub_128)
+# define oc_enc_frag_sub_128(_enc,_diff,_src,_ystride) \
+ oc_enc_frag_sub_128_c(_diff,_src,_ystride)
+# endif
+# if !defined(oc_enc_frag_sad)
+# define oc_enc_frag_sad(_enc,_src,_ref,_ystride) \
+ oc_enc_frag_sad_c(_src,_ref,_ystride)
+# endif
+# if !defined(oc_enc_frag_sad_thresh)
+# define oc_enc_frag_sad_thresh(_enc,_src,_ref,_ystride,_thresh) \
+ oc_enc_frag_sad_thresh_c(_src,_ref,_ystride,_thresh)
+# endif
+# if !defined(oc_enc_frag_sad2_thresh)
+# define oc_enc_frag_sad2_thresh(_enc,_src,_ref1,_ref2,_ystride,_thresh) \
+ oc_enc_frag_sad2_thresh_c(_src,_ref1,_ref2,_ystride,_thresh)
+# endif
+# if !defined(oc_enc_frag_satd)
+# define oc_enc_frag_satd(_enc,_dc,_src,_ref,_ystride) \
+ oc_enc_frag_satd_c(_dc,_src,_ref,_ystride)
+# endif
+# if !defined(oc_enc_frag_satd2)
+# define oc_enc_frag_satd2(_enc,_dc,_src,_ref1,_ref2,_ystride) \
+ oc_enc_frag_satd2_c(_dc,_src,_ref1,_ref2,_ystride)
+# endif
+# if !defined(oc_enc_frag_intra_satd)
+# define oc_enc_frag_intra_satd(_enc,_dc,_src,_ystride) \
+ oc_enc_frag_intra_satd_c(_dc,_src,_ystride)
+# endif
+# if !defined(oc_enc_frag_ssd)
+# define oc_enc_frag_ssd(_enc,_src,_ref,_ystride) \
+ oc_enc_frag_ssd_c(_src,_ref,_ystride)
+# endif
+# if !defined(oc_enc_frag_border_ssd)
+# define oc_enc_frag_border_ssd(_enc,_src,_ref,_ystride,_mask) \
+ oc_enc_frag_border_ssd_c(_src,_ref,_ystride,_mask)
+# endif
+# if !defined(oc_enc_frag_copy2)
+# define oc_enc_frag_copy2(_enc,_dst,_src1,_src2,_ystride) \
+ oc_enc_frag_copy2_c(_dst,_src1,_src2,_ystride)
+# endif
+# if !defined(oc_enc_enquant_table_init)
+# define oc_enc_enquant_table_init(_enc,_enquant,_dequant) \
+ oc_enc_enquant_table_init_c(_enquant,_dequant)
+# endif
+# if !defined(oc_enc_enquant_table_fixup)
+# define oc_enc_enquant_table_fixup(_enc,_enquant,_nqis) \
+ oc_enc_enquant_table_fixup_c(_enquant,_nqis)
+# endif
+# if !defined(oc_enc_quantize)
+# define oc_enc_quantize(_enc,_qdct,_dct,_dequant,_enquant) \
+ oc_enc_quantize_c(_qdct,_dct,_dequant,_enquant)
+# endif
+# if !defined(oc_enc_frag_recon_intra)
+# define oc_enc_frag_recon_intra(_enc,_dst,_ystride,_residue) \
+ oc_frag_recon_intra_c(_dst,_ystride,_residue)
+# endif
+# if !defined(oc_enc_frag_recon_inter)
+# define oc_enc_frag_recon_inter(_enc,_dst,_src,_ystride,_residue) \
+ oc_frag_recon_inter_c(_dst,_src,_ystride,_residue)
+# endif
+# if !defined(oc_enc_fdct8x8)
+# define oc_enc_fdct8x8(_enc,_y,_x) oc_enc_fdct8x8_c(_y,_x)
+# endif
+# endif
+
+
+
/*Constants for the packet-out state machine specific to the encoder.*/
/*Next packet to emit: Data packet, but none are ready yet.*/
@@ -171,7 +319,7 @@
};
-void oc_enc_vtable_init(oc_enc_ctx *_enc);
+void oc_enc_accel_init(oc_enc_ctx *_enc);
@@ -483,8 +631,10 @@
oc_mode_rd mode_rd[3][3][2][OC_SAD_BINS];
/*The buffer state used to drive rate control.*/
oc_rc_state rc;
+# if defined(OC_ENC_USE_VTABLE)
/*Table for encoder acceleration functions.*/
oc_enc_opt_vtable opt_vtable;
+# endif
/*Table for encoder data used by accelerated functions.*/
oc_enc_opt_data opt_data;
};
@@ -546,79 +696,9 @@
-/*Encoder-specific accelerated functions.*/
-# if !defined(oc_enc_frag_sub)
-# define oc_enc_frag_sub(_enc,_diff,_src,_ref,_ystride) \
- ((*(_enc)->opt_vtable.frag_sub)(_diff,_src,_ref,_ystride))
-# endif
-#if !defined(oc_enc_frag_sub_128)
-# define oc_enc_frag_sub_128(_enc,_diff,_src,_ystride) \
- ((*(_enc)->opt_vtable.frag_sub_128)(_diff,_src,_ystride))
-# endif
-#if !defined(oc_enc_frag_sad)
-# define oc_enc_frag_sad(_enc,_src,_ref,_ystride) \
- ((*(_enc)->opt_vtable.frag_sad)(_src,_ref,_ystride))
-#endif
-#if !defined(oc_enc_frag_sad_thresh)
-# define oc_enc_frag_sad_thresh(_enc,_src,_ref,_ystride,_thresh) \
- ((*(_enc)->opt_vtable.frag_sad_thresh)(_src,_ref,_ystride,_thresh))
-#endif
-#if !defined(oc_enc_frag_sad2_thresh)
-# define oc_enc_frag_sad2_thresh(_enc,_src,_ref1,_ref2,_ystride,_thresh) \
- ((*(_enc)->opt_vtable.frag_sad2_thresh)(_src,_ref1,_ref2,_ystride,_thresh))
-#endif
-#if !defined(oc_enc_frag_satd)
-# define oc_enc_frag_satd(_enc,_dc,_src,_ref,_ystride) \
- ((*(_enc)->opt_vtable.frag_satd)(_dc,_src,_ref,_ystride))
-#endif
-#if !defined(oc_enc_frag_satd2)
-# define oc_enc_frag_satd2(_enc,_dc,_src,_ref1,_ref2,_ystride) \
- ((*(_enc)->opt_vtable.frag_satd2)(_dc,_src,_ref1,_ref2,_ystride))
-#endif
-#if !defined(oc_enc_frag_intra_satd)
-# define oc_enc_frag_intra_satd(_enc,_dc,_src,_ystride) \
- ((*(_enc)->opt_vtable.frag_intra_satd)(_dc,_src,_ystride))
-#endif
-#if !defined(oc_enc_frag_ssd)
-# define oc_enc_frag_ssd(_enc,_src,_ref,_ystride) \
- ((*(_enc)->opt_vtable.frag_ssd)(_src,_ref,_ystride))
-#endif
-#if !defined(oc_enc_frag_border_ssd)
-# define oc_enc_frag_border_ssd(_enc,_src,_ref,_ystride,_mask) \
- ((*(_enc)->opt_vtable.frag_border_ssd)(_src,_ref,_ystride,_mask))
-#endif
-#if !defined(oc_enc_frag_copy2)
-# define oc_enc_frag_copy2(_enc,_dst,_src1,_src2,_ystride) \
- ((*(_enc)->opt_vtable.frag_copy2)(_dst,_src1,_src2,_ystride))
-#endif
-#if !defined(oc_enc_enquant_table_init)
-# define oc_enc_enquant_table_init(_enc,_enquant,_dequant) \
- ((*(_enc)->opt_vtable.enquant_table_init)(_enquant,_dequant))
-#endif
-#if !defined(oc_enc_enquant_table_fixup)
-# define oc_enc_enquant_table_fixup(_enc,_enquant,_nqis) \
- ((*(_enc)->opt_vtable.enquant_table_fixup)(_enquant,_nqis))
-#endif
-#if !defined(oc_enc_quantize)
-# define oc_enc_quantize(_enc,_qdct,_dct,_dequant,_enquant) \
- ((*(_enc)->opt_vtable.quantize)(_qdct,_dct,_dequant,_enquant))
-#endif
-#if !defined(oc_enc_frag_recon_intra)
-# define oc_enc_frag_recon_intra(_enc,_dst,_ystride,_residue) \
- ((*(_enc)->opt_vtable.frag_recon_intra)(_dst,_ystride,_residue))
-#endif
-#if !defined(oc_enc_frag_recon_inter)
-# define oc_enc_frag_recon_inter(_enc,_dst,_src,_ystride,_residue) \
- ((*(_enc)->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue))
-#endif
-#if !defined(oc_enc_fdct8x8)
-# define oc_enc_fdct8x8(_enc,_y,_x) \
- ((*(_enc)->opt_vtable.fdct8x8)(_y,_x))
-#endif
+/*Default pure-C implementations of encoder-specific accelerated functions.*/
+void oc_enc_accel_init_c(oc_enc_ctx *_enc);
-/*Default pure-C implementations.*/
-void oc_enc_vtable_init_c(oc_enc_ctx *_enc);
-
void oc_enc_frag_sub_c(ogg_int16_t _diff[64],
const unsigned char *_src,const unsigned char *_ref,int _ystride);
void oc_enc_frag_sub_128_c(ogg_int16_t _diff[64],
Modified: experimental/derf/theora-ptalarbvorm/lib/encode.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/encode.c 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/encode.c 2010-09-02 20:17:34 UTC (rev 17378)
@@ -18,9 +18,6 @@
#include <string.h>
#include "encint.h"
#include "dequant.h"
-#if defined(OC_X86_ASM)
-# include "x86/x86enc.h"
-#endif
@@ -934,9 +931,10 @@
}
-void oc_enc_vtable_init_c(oc_enc_ctx *_enc){
+void oc_enc_accel_init_c(oc_enc_ctx *_enc){
/*The implementations prefixed with oc_enc_ are encoder-specific.
The rest we re-use from the decoder.*/
+# if defined(OC_ENC_USE_VTABLE)
_enc->opt_vtable.frag_sub=oc_enc_frag_sub_c;
_enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_c;
_enc->opt_vtable.frag_sad=oc_enc_frag_sad_c;
@@ -948,14 +946,15 @@
_enc->opt_vtable.frag_ssd=oc_enc_frag_ssd_c;
_enc->opt_vtable.frag_border_ssd=oc_enc_frag_border_ssd_c;
_enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_c;
- _enc->opt_data.enquant_table_size=64*sizeof(oc_iquant);
- _enc->opt_data.enquant_table_alignment=16;
_enc->opt_vtable.enquant_table_init=oc_enc_enquant_table_init_c;
_enc->opt_vtable.enquant_table_fixup=oc_enc_enquant_table_fixup_c;
_enc->opt_vtable.quantize=oc_enc_quantize_c;
_enc->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
_enc->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
_enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_c;
+# endif
+ _enc->opt_data.enquant_table_size=64*sizeof(oc_iquant);
+ _enc->opt_data.enquant_table_alignment=16;
}
/*Initialize the macro block neighbor lists for MC analysis.
@@ -1153,6 +1152,7 @@
/*Initialize the shared encoder/decoder state.*/
ret=oc_state_init(&_enc->state,&info,6);
if(ret<0)return ret;
+ oc_enc_accel_init(_enc);
_enc->mb_info=_ogg_calloc(_enc->state.nmbs,sizeof(*_enc->mb_info));
_enc->frag_dc=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_dc));
_enc->coded_mbis=
@@ -1181,11 +1181,6 @@
_enc->frag_satd=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_satd));
_enc->frag_ssd=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_ssd));
#endif
-#if defined(OC_X86_ASM)
- oc_enc_vtable_init_x86(_enc);
-#else
- oc_enc_vtable_init_c(_enc);
-#endif
_enc->enquant_table_data=(unsigned char *)_ogg_malloc(
(64+3)*3*2*_enc->opt_data.enquant_table_size
+_enc->opt_data.enquant_table_alignment-1);
Modified: experimental/derf/theora-ptalarbvorm/lib/internal.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/internal.c 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/internal.c 2010-09-02 20:17:34 UTC (rev 17378)
@@ -97,80 +97,6 @@
-/*The function used to fill in the chroma plane motion vectors for a macro
- block when 4 different motion vectors are specified in the luma plane.
- This version is for use with chroma decimated in the X and Y directions
- (4:2:0).
- _cbmvs: The chroma block-level motion vectors to fill in.
- _lbmvs: The luma block-level motion vectors.*/
-static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
- int dx;
- int dy;
- dx=_lbmvs[0][0]+_lbmvs[1][0]+_lbmvs[2][0]+_lbmvs[3][0];
- dy=_lbmvs[0][1]+_lbmvs[1][1]+_lbmvs[2][1]+_lbmvs[3][1];
- _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,2,2);
- _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,2,2);
-}
-
-/*The function used to fill in the chroma plane motion vectors for a macro
- block when 4 different motion vectors are specified in the luma plane.
- This version is for use with chroma decimated in the Y direction.
- _cbmvs: The chroma block-level motion vectors to fill in.
- _lbmvs: The luma block-level motion vectors.*/
-static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
- int dx;
- int dy;
- dx=_lbmvs[0][0]+_lbmvs[2][0];
- dy=_lbmvs[0][1]+_lbmvs[2][1];
- _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
- _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
- dx=_lbmvs[1][0]+_lbmvs[3][0];
- dy=_lbmvs[1][1]+_lbmvs[3][1];
- _cbmvs[1][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
- _cbmvs[1][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
-}
-
-/*The function used to fill in the chroma plane motion vectors for a macro
- block when 4 different motion vectors are specified in the luma plane.
- This version is for use with chroma decimated in the X direction (4:2:2).
- _cbmvs: The chroma block-level motion vectors to fill in.
- _lbmvs: The luma block-level motion vectors.*/
-static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
- int dx;
- int dy;
- dx=_lbmvs[0][0]+_lbmvs[1][0];
- dy=_lbmvs[0][1]+_lbmvs[1][1];
- _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
- _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
- dx=_lbmvs[2][0]+_lbmvs[3][0];
- dy=_lbmvs[2][1]+_lbmvs[3][1];
- _cbmvs[2][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
- _cbmvs[2][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
-}
-
-/*The function used to fill in the chroma plane motion vectors for a macro
- block when 4 different motion vectors are specified in the luma plane.
- This version is for use with no chroma decimation (4:4:4).
- _cbmvs: The chroma block-level motion vectors to fill in.
- _lmbmv: The luma macro-block level motion vector to fill in for use in
- prediction.
- _lbmvs: The luma block-level motion vectors.*/
-static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
- memcpy(_cbmvs,_lbmvs,4*sizeof(_lbmvs[0]));
-}
-
-/*A table of functions used to fill in the chroma plane motion vectors for a
- macro block when 4 different motion vectors are specified in the luma
- plane.*/
-const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={
- (oc_set_chroma_mvs_func)oc_set_chroma_mvs00,
- (oc_set_chroma_mvs_func)oc_set_chroma_mvs01,
- (oc_set_chroma_mvs_func)oc_set_chroma_mvs10,
- (oc_set_chroma_mvs_func)oc_set_chroma_mvs11
-};
-
-
-
void *oc_aligned_malloc(size_t _sz,size_t _align){
unsigned char *p;
if(_align>UCHAR_MAX||(_align&_align-1)||_sz>~(size_t)0-_align)return NULL;
Modified: experimental/derf/theora-ptalarbvorm/lib/internal.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/internal.h 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/internal.h 2010-09-02 20:17:34 UTC (rev 17378)
@@ -23,7 +23,10 @@
# endif
# include "theora/codec.h"
# include "theora/theora.h"
+# include "ocintrin.h"
+#define OC_DUMP_IMAGES (1)
+
# if defined(_MSC_VER)
/*Disable missing EMMS warnings.*/
# pragma warning(disable:4799)
@@ -37,10 +40,6 @@
# endif
# endif
-# include "ocintrin.h"
-# include "huffman.h"
-# include "quant.h"
-
/*Some assembly constructs require aligned operands.*/
# if defined(OC_X86_ASM)
# if defined(__GNUC__)
@@ -60,17 +59,6 @@
-typedef struct oc_sb_flags oc_sb_flags;
-typedef struct oc_border_info oc_border_info;
-typedef struct oc_fragment oc_fragment;
-typedef struct oc_fragment_plane oc_fragment_plane;
-typedef struct oc_base_opt_vtable oc_base_opt_vtable;
-typedef struct oc_base_opt_data oc_base_opt_data;
-typedef struct oc_state_dispatch_vtable oc_state_dispatch_vtable;
-typedef struct oc_theora_state oc_theora_state;
-
-
-
/*This library's version.*/
# define OC_VENDOR_STRING "Xiph.Org libtheora 1.1+ 20100314 (Ptalarbvorm)"
@@ -83,321 +71,8 @@
((_info)->version_minor>(_min)||(_info)->version_minor==(_min)&& \
(_info)->version_subminor>=(_sub)))
-/*A keyframe.*/
-# define OC_INTRA_FRAME (0)
-/*A predicted frame.*/
-# define OC_INTER_FRAME (1)
-/*A frame of unknown type (frame type decision has not yet been made).*/
-# define OC_UNKWN_FRAME (-1)
-/*The amount of padding to add to the reconstructed frame buffers on all
- sides.
- This is used to allow unrestricted motion vectors without special casing.
- This must be a multiple of 2.*/
-# define OC_UMV_PADDING (16)
-/*Frame classification indices.*/
-/*The previous golden frame.*/
-# define OC_FRAME_GOLD (0)
-/*The previous frame.*/
-# define OC_FRAME_PREV (1)
-/*The current frame.*/
-# define OC_FRAME_SELF (2)
-
-/*The input or output buffer.*/
-# define OC_FRAME_IO (3)
-/*Uncompressed prev golden frame.*/
-# define OC_FRAME_GOLD_ORIG (4)
-/*Uncompressed previous frame. */
-# define OC_FRAME_PREV_ORIG (5)
-
-/*Macroblock modes.*/
-/*Macro block is invalid: It is never coded.*/
-# define OC_MODE_INVALID (-1)
-/*Encoded difference from the same macro block in the previous frame.*/
-# define OC_MODE_INTER_NOMV (0)
-/*Encoded with no motion compensated prediction.*/
-# define OC_MODE_INTRA (1)
-/*Encoded difference from the previous frame offset by the given motion
- vector.*/
-# define OC_MODE_INTER_MV (2)
-/*Encoded difference from the previous frame offset by the last coded motion
- vector.*/
-# define OC_MODE_INTER_MV_LAST (3)
-/*Encoded difference from the previous frame offset by the second to last
- coded motion vector.*/
-# define OC_MODE_INTER_MV_LAST2 (4)
-/*Encoded difference from the same macro block in the previous golden
- frame.*/
-# define OC_MODE_GOLDEN_NOMV (5)
-/*Encoded difference from the previous golden frame offset by the given motion
- vector.*/
-# define OC_MODE_GOLDEN_MV (6)
-/*Encoded difference from the previous frame offset by the individual motion
- vectors given for each block.*/
-# define OC_MODE_INTER_MV_FOUR (7)
-/*The number of (coded) modes.*/
-# define OC_NMODES (8)
-
-/*Determines the reference frame used for a given MB mode.*/
-# define OC_FRAME_FOR_MODE(_x) \
- OC_UNIBBLE_TABLE32(OC_FRAME_PREV,OC_FRAME_SELF,OC_FRAME_PREV,OC_FRAME_PREV, \
- OC_FRAME_PREV,OC_FRAME_GOLD,OC_FRAME_GOLD,OC_FRAME_PREV,(_x))
-
-/*Constants for the packet state machine common between encoder and decoder.*/
-
-/*Next packet to emit/read: Codec info header.*/
-# define OC_PACKET_INFO_HDR (-3)
-/*Next packet to emit/read: Comment header.*/
-# define OC_PACKET_COMMENT_HDR (-2)
-/*Next packet to emit/read: Codec setup header.*/
-# define OC_PACKET_SETUP_HDR (-1)
-/*No more packets to emit/read.*/
-# define OC_PACKET_DONE (INT_MAX)
-
-
-
-/*Super blocks are 32x32 segments of pixels in a single color plane indexed
- in image order.
- Internally, super blocks are broken up into four quadrants, each of which
- contains a 2x2 pattern of blocks, each of which is an 8x8 block of pixels.
- Quadrants, and the blocks within them, are indexed in a special order called
- a "Hilbert curve" within the super block.
-
- In order to differentiate between the Hilbert-curve indexing strategy and
- the regular image order indexing strategy, blocks indexed in image order
- are called "fragments".
- Fragments are indexed in image order, left to right, then bottom to top,
- from Y' plane to Cb plane to Cr plane.
-
- The co-located fragments in all image planes corresponding to the location
- of a single quadrant of a luma plane super block form a macro block.
- Thus there is only a single set of macro blocks for all planes, each of which
- contains between 6 and 12 fragments, depending on the pixel format.
- Therefore macro block information is kept in a separate set of arrays from
- super blocks to avoid unused space in the other planes.
- The lists are indexed in super block order.
- That is, the macro block corresponding to the macro block mbi in (luma plane)
- super block sbi is at index (sbi<<2|mbi).
- Thus the number of macro blocks in each dimension is always twice the number
- of super blocks, even when only an odd number fall inside the coded frame.
- These "extra" macro blocks are just an artifact of our internal data layout,
- and not part of the coded stream; they are flagged with a negative MB mode.*/
-
-
-
-/*A single quadrant of the map from a super block to fragment numbers.*/
-typedef ptrdiff_t oc_sb_map_quad[4];
-/*A map from a super block to fragment numbers.*/
-typedef oc_sb_map_quad oc_sb_map[4];
-/*A single plane of the map from a macro block to fragment numbers.*/
-typedef ptrdiff_t oc_mb_map_plane[4];
-/*A map from a macro block to fragment numbers.*/
-typedef oc_mb_map_plane oc_mb_map[3];
-/*A motion vector.*/
-typedef signed char oc_mv[2];
-
-
-
-/*Super block information.*/
-struct oc_sb_flags{
- unsigned char coded_fully:1;
- unsigned char coded_partially:1;
- unsigned char quad_valid:4;
-};
-
-
-
-/*Information about a fragment which intersects the border of the displayable
- region.
- This marks which pixels belong to the displayable region.*/
-struct oc_border_info{
- /*A bit mask marking which pixels are in the displayable region.
- Pixel (x,y) corresponds to bit (y<<3|x).*/
- ogg_int64_t mask;
- /*The number of pixels in the displayable region.
- This is always positive, and always less than 64.*/
- int npixels;
-};
-
-
-
-/*Fragment information.*/
-struct oc_fragment{
- /*A flag indicating whether or not this fragment is coded.*/
- unsigned coded:1;
- /*A flag indicating that this entire fragment lies outside the displayable
- region of the frame.
- Note the contrast with an invalid macro block, which is outside the coded
- frame, not just the displayable one.
- There are no fragments outside the coded frame by construction.*/
- unsigned invalid:1;
- /*The index of the quality index used for this fragment's AC coefficients.*/
- unsigned qii:6;
- /*The mode of the macroblock this fragment belongs to.*/
- unsigned mb_mode:3;
- /*The index of the associated border information for fragments which lie
- partially outside the displayable region.
- For fragments completely inside or outside this region, this is -1.
- Note that the C standard requires an explicit signed keyword for bitfield
- types, since some compilers may treat them as unsigned without it.*/
- signed int borderi:5;
- /*The prediction-corrected DC component.
- Note that the C standard requires an explicit signed keyword for bitfield
- types, since some compilers may treat them as unsigned without it.*/
- signed int dc:16;
-};
-
-
-
-/*A description of each fragment plane.*/
-struct oc_fragment_plane{
- /*The number of fragments in the horizontal direction.*/
- int nhfrags;
- /*The number of fragments in the vertical direction.*/
- int nvfrags;
- /*The offset of the first fragment in the plane.*/
- ptrdiff_t froffset;
- /*The total number of fragments in the plane.*/
- ptrdiff_t nfrags;
- /*The number of super blocks in the horizontal direction.*/
- unsigned nhsbs;
- /*The number of super blocks in the vertical direction.*/
- unsigned nvsbs;
- /*The offset of the first super block in the plane.*/
- unsigned sboffset;
- /*The total number of super blocks in the plane.*/
- unsigned nsbs;
-};
-
-
-
-/*The shared (encoder and decoder) functions that have accelerated variants.*/
-struct oc_base_opt_vtable{
- void (*frag_copy)(unsigned char *_dst,
- const unsigned char *_src,int _ystride);
- void (*frag_recon_intra)(unsigned char *_dst,int _ystride,
- const ogg_int16_t _residue[64]);
- void (*frag_recon_inter)(unsigned char *_dst,
- const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
- void (*frag_recon_inter2)(unsigned char *_dst,const unsigned char *_src1,
- const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
- void (*idct8x8)(ogg_int16_t _y[64],int _last_zzi);
- void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi,
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
- void (*state_frag_copy_list)(const oc_theora_state *_state,
- const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
- int _dst_frame,int _src_frame,int _pli);
- void (*state_loop_filter_frag_rows)(const oc_theora_state *_state,
- int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
- void (*restore_fpu)(void);
-};
-
-/*The shared (encoder and decoder) tables that vary according to which variants
- of the above functions are used.*/
-struct oc_base_opt_data{
- const unsigned char *dct_fzig_zag;
-};
-
-
-/*State information common to both the encoder and decoder.*/
-struct oc_theora_state{
- /*The stream information.*/
- th_info info;
- /*Table for shared accelerated functions.*/
- oc_base_opt_vtable opt_vtable;
- /*Table for shared data used by accelerated functions.*/
- oc_base_opt_data opt_data;
- /*CPU flags to detect the presence of extended instruction sets.*/
- ogg_uint32_t cpu_flags;
- /*The fragment plane descriptions.*/
- oc_fragment_plane fplanes[3];
- /*The list of fragments, indexed in image order.*/
- oc_fragment *frags;
- /*The the offset into the reference frame buffer to the upper-left pixel of
- each fragment.*/
- ptrdiff_t *frag_buf_offs;
- /*The motion vector for each fragment.*/
- oc_mv *frag_mvs;
- /*The total number of fragments in a single frame.*/
- ptrdiff_t nfrags;
- /*The list of super block maps, indexed in image order.*/
- oc_sb_map *sb_maps;
- /*The list of super block flags, indexed in image order.*/
- oc_sb_flags *sb_flags;
- /*The total number of super blocks in a single frame.*/
- unsigned nsbs;
- /*The fragments from each color plane that belong to each macro block.
- Fragments are stored in image order (left to right then top to bottom).
- When chroma components are decimated, the extra fragments have an index of
- -1.*/
- oc_mb_map *mb_maps;
- /*The list of macro block modes.
- A negative number indicates the macro block lies entirely outside the
- coded frame.*/
- signed char *mb_modes;
- /*The number of macro blocks in the X direction.*/
- unsigned nhmbs;
- /*The number of macro blocks in the Y direction.*/
- unsigned nvmbs;
- /*The total number of macro blocks.*/
- size_t nmbs;
- /*The list of coded fragments, in coded order.
- Uncoded fragments are stored in reverse order from the end of the list.*/
- ptrdiff_t *coded_fragis;
- /*The number of coded fragments in each plane.*/
- ptrdiff_t ncoded_fragis[3];
- /*The total number of coded fragments.*/
- ptrdiff_t ntotal_coded_fragis;
- /*The index of the buffers being used for each OC_FRAME_* reference frame.*/
- int ref_frame_idx[6];
- /*The actual buffers used for the reference frames.*/
- th_ycbcr_buffer ref_frame_bufs[6];
- /*The storage for the reference frame buffers.*/
- unsigned char *ref_frame_data[6];
- /*The strides for each plane in the reference frames.*/
- int ref_ystride[3];
- /*The number of unique border patterns.*/
- int nborders;
- /*The unique border patterns for all border fragments.
- The borderi field of fragments which straddle the border indexes this
- list.*/
- oc_border_info borders[16];
- /*The frame number of the last keyframe.*/
- ogg_int64_t keyframe_num;
- /*The frame number of the current frame.*/
- ogg_int64_t curframe_num;
- /*The granpos of the current frame.*/
- ogg_int64_t granpos;
- /*The type of the current frame.*/
- signed char frame_type;
- /*The bias to add to the frame count when computing granule positions.*/
- unsigned char granpos_bias;
- /*The number of quality indices used in the current frame.*/
- unsigned char nqis;
- /*The quality indices of the current frame.*/
- unsigned char qis[3];
- /*The dequantization tables, stored in zig-zag order, and indexed by
- qi, pli, qti, and zzi.*/
- ogg_uint16_t *dequant_tables[64][3][2];
- OC_ALIGN16(oc_quant_table dequant_table_data[64][3][2]);
- /*Loop filter strength parameters.*/
- unsigned char loop_filter_limits[64];
-};
-
-
-
-/*The function type used to fill in the chroma plane motion vectors for a
- macro block when 4 different motion vectors are specified in the luma
- plane.
- _cbmvs: The chroma block-level motion vectors to fill in.
- _lmbmv: The luma macro-block level motion vector to fill in for use in
- prediction.
- _lbmvs: The luma block-level motion vectors.*/
-typedef void (*oc_set_chroma_mvs_func)(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]);
-
-
-
/*A map from the index in the zig zag scan to the coefficient number in a
block.*/
extern const unsigned char OC_FZIG_ZAG[128];
@@ -413,10 +88,6 @@
/*The number of indices in the oc_mb_map array that can be valid for each of
the various chroma decimation types.*/
extern const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS];
-/*A table of functions used to fill in the Cb,Cr plane motion vectors for a
- macro block when 4 different motion vectors are specified in the luma
- plane.*/
-extern const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS];
@@ -430,111 +101,4 @@
void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst,
const th_ycbcr_buffer _src);
-int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs);
-void oc_state_clear(oc_theora_state *_state);
-void oc_state_vtable_init_c(oc_theora_state *_state);
-void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
- int _y0,int _yend);
-void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli);
-void oc_state_borders_fill(oc_theora_state *_state,int _refi);
-void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx,
- th_ycbcr_buffer _img);
-int oc_state_mbi_for_pos(oc_theora_state *_state,int _mbx,int _mby);
-int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
- int _pli,int _dx,int _dy);
-
-int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv);
-void oc_state_loop_filter(oc_theora_state *_state,int _frame);
-# if defined(OC_DUMP_IMAGES)
-int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
- const char *_suf);
-# endif
-
-/*Shared accelerated functions.*/
-# if !defined(oc_frag_copy)
-# define oc_frag_copy(_state,_dst,_src,_ystride) \
- ((*(_state)->opt_vtable.frag_copy)(_dst,_src,_ystride))
-# endif
-# if !defined(oc_frag_recon_intra)
-# define oc_frag_recon_intra(_state,_dst,_dst_ystride,_residue) \
- ((*(_state)->opt_vtable.frag_recon_intra)(_dst,_dst_ystride,_residue))
-# endif
-# if !defined(oc_frag_recon_inter)
-# define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
- ((*(_state)->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue))
-# endif
-# if !defined(oc_frag_recon_inter2)
-# define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
- ((*(_state)->opt_vtable.frag_recon_inter2)(_dst, \
- _src1,_src2,_ystride,_residue))
-# endif
-# if !defined(oc_idct8x8)
-# define oc_idct8x8(_state,_y,_last_zzi) \
- ((*(_state)->opt_vtable.idct8x8)(_y,_last_zzi))
-# endif
-# if !defined(oc_state_frag_recon)
-# define oc_state_frag_recon(_state,_fragi, \
- _pli,_dct_coeffs,_last_zzi,_dc_quant) \
- ((*(_state)->opt_vtable.state_frag_recon)(_state,_fragi, \
- _pli,_dct_coeffs,_last_zzi,_dc_quant))
-# endif
-# if !defined(oc_state_frag_copy_list)
-# define oc_state_frag_copy_list(_state,_fragis,_nfragis, \
- _dst_frame,_src_frame,_pli) \
- ((*(_state)->opt_vtable.state_frag_copy_list)(_state,_fragis,_nfragis, \
- _dst_frame,_src_frame,_pli))
-# endif
-# if !defined(oc_state_loop_filter_frag_rows)
-# define oc_state_loop_filter_frag_rows(_state, \
- _bv,_refi,_pli,_fragy0,_fragy_end) \
- ((*(_state)->opt_vtable.state_loop_filter_frag_rows)(_state, \
- _bv,_refi,_pli,_fragy0,_fragy_end))
-# endif
-# if !defined(oc_restore_fpu)
-# define oc_restore_fpu(_state) \
- ((*(_state)->opt_vtable.restore_fpu)())
-# endif
-
-/*Default pure-C implementations.*/
-void oc_frag_copy_c(unsigned char *_dst,
- const unsigned char *_src,int _src_ystride);
-void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride,
- const ogg_int16_t _residue[64]);
-void oc_frag_recon_inter_c(unsigned char *_dst,
- const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
-void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1,
- const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
-void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi);
-void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
-void oc_state_frag_copy_list_c(const oc_theora_state *_state,
- const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
- int _dst_frame,int _src_frame,int _pli);
-void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
- int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
-void oc_restore_fpu_c(void);
-
-/*We need a way to call a few encoder functions without introducing a link-time
- dependency into the decoder, while still allowing the old alpha API which
- does not distinguish between encoder and decoder objects to be used.
- We do this by placing a function table at the start of the encoder object
- which can dispatch into the encoder library.
- We do a similar thing for the decoder in case we ever decide to split off a
- common base library.*/
-typedef void (*oc_state_clear_func)(theora_state *_th);
-typedef int (*oc_state_control_func)(theora_state *th,int _req,
- void *_buf,size_t _buf_sz);
-typedef ogg_int64_t (*oc_state_granule_frame_func)(theora_state *_th,
- ogg_int64_t _granulepos);
-typedef double (*oc_state_granule_time_func)(theora_state *_th,
- ogg_int64_t _granulepos);
-
-
-struct oc_state_dispatch_vtable{
- oc_state_clear_func clear;
- oc_state_control_func control;
- oc_state_granule_frame_func granule_frame;
- oc_state_granule_time_func granule_time;
-};
-
#endif
Modified: experimental/derf/theora-ptalarbvorm/lib/state.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/state.c 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/state.c 2010-09-02 20:17:34 UTC (rev 17378)
@@ -17,22 +17,86 @@
#include <stdlib.h>
#include <string.h>
-#include "internal.h"
-#if defined(OC_X86_ASM)
-#if defined(_MSC_VER)
-# include "x86_vc/x86int.h"
-#else
-# include "x86/x86int.h"
-#endif
-#endif
-#if defined(OC_C64X_ASM)
-# include "c64x/c64xint.h"
-#endif
+#include "state.h"
#if defined(OC_DUMP_IMAGES)
# include <stdio.h>
# include "png.h"
#endif
+/*The function used to fill in the chroma plane motion vectors for a macro
+ block when 4 different motion vectors are specified in the luma plane.
+ This version is for use with chroma decimated in the X and Y directions
+ (4:2:0).
+ _cbmvs: The chroma block-level motion vectors to fill in.
+ _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+ int dx;
+ int dy;
+ dx=_lbmvs[0][0]+_lbmvs[1][0]+_lbmvs[2][0]+_lbmvs[3][0];
+ dy=_lbmvs[0][1]+_lbmvs[1][1]+_lbmvs[2][1]+_lbmvs[3][1];
+ _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,2,2);
+ _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,2,2);
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+ block when 4 different motion vectors are specified in the luma plane.
+ This version is for use with chroma decimated in the Y direction.
+ _cbmvs: The chroma block-level motion vectors to fill in.
+ _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+ int dx;
+ int dy;
+ dx=_lbmvs[0][0]+_lbmvs[2][0];
+ dy=_lbmvs[0][1]+_lbmvs[2][1];
+ _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+ _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+ dx=_lbmvs[1][0]+_lbmvs[3][0];
+ dy=_lbmvs[1][1]+_lbmvs[3][1];
+ _cbmvs[1][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+ _cbmvs[1][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+ block when 4 different motion vectors are specified in the luma plane.
+ This version is for use with chroma decimated in the X direction (4:2:2).
+ _cbmvs: The chroma block-level motion vectors to fill in.
+ _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+ int dx;
+ int dy;
+ dx=_lbmvs[0][0]+_lbmvs[1][0];
+ dy=_lbmvs[0][1]+_lbmvs[1][1];
+ _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+ _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+ dx=_lbmvs[2][0]+_lbmvs[3][0];
+ dy=_lbmvs[2][1]+_lbmvs[3][1];
+ _cbmvs[2][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+ _cbmvs[2][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+ block when 4 different motion vectors are specified in the luma plane.
+ This version is for use with no chroma decimation (4:4:4).
+ _cbmvs: The chroma block-level motion vectors to fill in.
+ _lmbmv: The luma macro-block level motion vector to fill in for use in
+ prediction.
+ _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+ memcpy(_cbmvs,_lbmvs,4*sizeof(_lbmvs[0]));
+}
+
+/*A table of functions used to fill in the chroma plane motion vectors for a
+ macro block when 4 different motion vectors are specified in the luma
+ plane.*/
+const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={
+ (oc_set_chroma_mvs_func)oc_set_chroma_mvs00,
+ (oc_set_chroma_mvs_func)oc_set_chroma_mvs01,
+ (oc_set_chroma_mvs_func)oc_set_chroma_mvs10,
+ (oc_set_chroma_mvs_func)oc_set_chroma_mvs11
+};
+
+
+
/*Returns the fragment index of the top-left block in a macro block.
This can be used to test whether or not the whole macro block is valid.
_sb_map: The super block map.
@@ -595,7 +659,9 @@
}
-void oc_state_vtable_init_c(oc_theora_state *_state){
+void oc_state_accel_init_c(oc_theora_state *_state){
+ _state->cpu_flags=0;
+#if defined(OC_STATE_USE_VTABLE)
_state->opt_vtable.frag_copy=oc_frag_copy_c;
_state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
_state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
@@ -606,21 +672,11 @@
_state->opt_vtable.state_loop_filter_frag_rows=
oc_state_loop_filter_frag_rows_c;
_state->opt_vtable.restore_fpu=oc_restore_fpu_c;
+#endif
_state->opt_data.dct_fzig_zag=OC_FZIG_ZAG;
}
-/*Initialize the accelerated function pointers.*/
-void oc_state_vtable_init(oc_theora_state *_state){
-#if defined(OC_X86_ASM)
- oc_state_vtable_init_x86(_state);
-#elif defined(OC_C64X_ASM)
- oc_state_vtable_init_c64x(_state);
-#else
- oc_state_vtable_init_c(_state);
-#endif
-}
-
int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){
int ret;
/*First validate the parameters.*/
@@ -655,7 +711,7 @@
system.*/
_state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
_state->frame_type=OC_UNKWN_FRAME;
- oc_state_vtable_init(_state);
+ oc_state_accel_init(_state);
ret=oc_state_frarray_init(_state);
if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs);
if(ret<0){
Added: experimental/derf/theora-ptalarbvorm/lib/state.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/state.h (rev 0)
+++ experimental/derf/theora-ptalarbvorm/lib/state.h 2010-09-02 20:17:34 UTC (rev 17378)
@@ -0,0 +1,515 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function:
+ last mod: $Id: internal.h 17337 2010-07-19 16:08:54Z tterribe $
+
+ ********************************************************************/
+#if !defined(_state_H)
+# define _state_H (1)
+# include "internal.h"
+# include "huffman.h"
+# include "quant.h"
+
+
+
+/*A single quadrant of the map from a super block to fragment numbers.*/
+typedef ptrdiff_t oc_sb_map_quad[4];
+/*A map from a super block to fragment numbers.*/
+typedef oc_sb_map_quad oc_sb_map[4];
+/*A single plane of the map from a macro block to fragment numbers.*/
+typedef ptrdiff_t oc_mb_map_plane[4];
+/*A map from a macro block to fragment numbers.*/
+typedef oc_mb_map_plane oc_mb_map[3];
+/*A motion vector.*/
+typedef signed char oc_mv[2];
+
+typedef struct oc_sb_flags oc_sb_flags;
+typedef struct oc_border_info oc_border_info;
+typedef struct oc_fragment oc_fragment;
+typedef struct oc_fragment_plane oc_fragment_plane;
+typedef struct oc_base_opt_vtable oc_base_opt_vtable;
+typedef struct oc_base_opt_data oc_base_opt_data;
+typedef struct oc_state_dispatch_vtable oc_state_dispatch_vtable;
+typedef struct oc_theora_state oc_theora_state;
+
+
+
+/*Shared accelerated functions.*/
+# if defined(OC_X86_ASM)
+# if defined(_MSC_VER)
+# include "x86_vc/x86int.h"
+# else
+# include "x86/x86int.h"
+# endif
+# endif
+# if defined(OC_C64X_ASM)
+# include "c64x/c64xint.h"
+# endif
+
+# if !defined(oc_state_accel_init)
+# define oc_state_accel_init oc_state_accel_init_c
+# endif
+# if defined(OC_STATE_USE_VTABLE)
+# if !defined(oc_frag_copy)
+# define oc_frag_copy(_state,_dst,_src,_ystride) \
+ ((*(_state)->opt_vtable.frag_copy)(_dst,_src,_ystride))
+# endif
+# if !defined(oc_frag_recon_intra)
+# define oc_frag_recon_intra(_state,_dst,_dst_ystride,_residue) \
+ ((*(_state)->opt_vtable.frag_recon_intra)(_dst,_dst_ystride,_residue))
+# endif
+# if !defined(oc_frag_recon_inter)
+# define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
+ ((*(_state)->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue))
+# endif
+# if !defined(oc_frag_recon_inter2)
+# define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
+ ((*(_state)->opt_vtable.frag_recon_inter2)(_dst, \
+ _src1,_src2,_ystride,_residue))
+# endif
+# if !defined(oc_idct8x8)
+# define oc_idct8x8(_state,_y,_last_zzi) \
+ ((*(_state)->opt_vtable.idct8x8)(_y,_last_zzi))
+# endif
+# if !defined(oc_state_frag_recon)
+# define oc_state_frag_recon(_state,_fragi, \
+ _pli,_dct_coeffs,_last_zzi,_dc_quant) \
+ ((*(_state)->opt_vtable.state_frag_recon)(_state,_fragi, \
+ _pli,_dct_coeffs,_last_zzi,_dc_quant))
+# endif
+# if !defined(oc_state_frag_copy_list)
+# define oc_state_frag_copy_list(_state,_fragis,_nfragis, \
+ _dst_frame,_src_frame,_pli) \
+ ((*(_state)->opt_vtable.state_frag_copy_list)(_state,_fragis,_nfragis, \
+ _dst_frame,_src_frame,_pli))
+# endif
+# if !defined(oc_state_loop_filter_frag_rows)
+# define oc_state_loop_filter_frag_rows(_state, \
+ _bv,_refi,_pli,_fragy0,_fragy_end) \
+ ((*(_state)->opt_vtable.state_loop_filter_frag_rows)(_state, \
+ _bv,_refi,_pli,_fragy0,_fragy_end))
+# endif
+# if !defined(oc_restore_fpu)
+# define oc_restore_fpu(_state) \
+ ((*(_state)->opt_vtable.restore_fpu)())
+# endif
+# else
+# if !defined(oc_frag_copy)
+# define oc_frag_copy(_state,_dst,_src,_ystride) \
+ oc_frag_copy_c(_dst,_src,_ystride)
+# endif
+# if !defined(oc_frag_recon_intra)
+# define oc_frag_recon_intra(_state,_dst,_dst_ystride,_residue) \
+ oc_frag_recon_intra_c(_dst,_dst_ystride,_residue)
+# endif
+# if !defined(oc_frag_recon_inter)
+# define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
+ oc_frag_recon_inter_c(_dst,_src,_ystride,_residue)
+# endif
+# if !defined(oc_frag_recon_inter2)
+# define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
+ oc_frag_recon_inter2_c(_dst,_src1,_src2,_ystride,_residue)
+# endif
+# if !defined(oc_idct8x8)
+# define oc_idct8x8(_state,_y,_last_zzi) oc_idct8x8_c(_y,_last_zzi)
+# endif
+# if !defined(oc_state_frag_recon)
+# define oc_state_frag_recon oc_state_frag_recon_c
+# endif
+# if !defined(oc_state_frag_copy_list)
+# define oc_state_frag_copy_list oc_state_frag_copy_list_c
+# endif
+# if !defined(oc_state_loop_filter_frag_rows)
+# define oc_state_loop_filter_frag_rows oc_state_loop_filter_frag_rows_c
+# endif
+# if !defined(oc_restore_fpu)
+# define oc_restore_fpu(_state) do{}while(0)
+# endif
+# endif
+
+
+
+/*A keyframe.*/
+# define OC_INTRA_FRAME (0)
+/*A predicted frame.*/
+# define OC_INTER_FRAME (1)
+/*A frame of unknown type (frame type decision has not yet been made).*/
+# define OC_UNKWN_FRAME (-1)
+
+/*The amount of padding to add to the reconstructed frame buffers on all
+ sides.
+ This is used to allow unrestricted motion vectors without special casing.
+ This must be a multiple of 2.*/
+# define OC_UMV_PADDING (16)
+
+/*Frame classification indices.*/
+/*The previous golden frame.*/
+# define OC_FRAME_GOLD (0)
+/*The previous frame.*/
+# define OC_FRAME_PREV (1)
+/*The current frame.*/
+# define OC_FRAME_SELF (2)
+
+/*The input or output buffer.*/
+# define OC_FRAME_IO (3)
+/*Uncompressed prev golden frame.*/
+# define OC_FRAME_GOLD_ORIG (4)
+/*Uncompressed previous frame. */
+# define OC_FRAME_PREV_ORIG (5)
+
+/*Macroblock modes.*/
+/*Macro block is invalid: It is never coded.*/
+# define OC_MODE_INVALID (-1)
+/*Encoded difference from the same macro block in the previous frame.*/
+# define OC_MODE_INTER_NOMV (0)
+/*Encoded with no motion compensated prediction.*/
+# define OC_MODE_INTRA (1)
+/*Encoded difference from the previous frame offset by the given motion
+ vector.*/
+# define OC_MODE_INTER_MV (2)
+/*Encoded difference from the previous frame offset by the last coded motion
+ vector.*/
+# define OC_MODE_INTER_MV_LAST (3)
+/*Encoded difference from the previous frame offset by the second to last
+ coded motion vector.*/
+# define OC_MODE_INTER_MV_LAST2 (4)
+/*Encoded difference from the same macro block in the previous golden
+ frame.*/
+# define OC_MODE_GOLDEN_NOMV (5)
+/*Encoded difference from the previous golden frame offset by the given motion
+ vector.*/
+# define OC_MODE_GOLDEN_MV (6)
+/*Encoded difference from the previous frame offset by the individual motion
+ vectors given for each block.*/
+# define OC_MODE_INTER_MV_FOUR (7)
+/*The number of (coded) modes.*/
+# define OC_NMODES (8)
+
+/*Determines the reference frame used for a given MB mode.*/
+# define OC_FRAME_FOR_MODE(_x) \
+ OC_UNIBBLE_TABLE32(OC_FRAME_PREV,OC_FRAME_SELF,OC_FRAME_PREV,OC_FRAME_PREV, \
+ OC_FRAME_PREV,OC_FRAME_GOLD,OC_FRAME_GOLD,OC_FRAME_PREV,(_x))
+
+/*Constants for the packet state machine common between encoder and decoder.*/
+
+/*Next packet to emit/read: Codec info header.*/
+# define OC_PACKET_INFO_HDR (-3)
+/*Next packet to emit/read: Comment header.*/
+# define OC_PACKET_COMMENT_HDR (-2)
+/*Next packet to emit/read: Codec setup header.*/
+# define OC_PACKET_SETUP_HDR (-1)
+/*No more packets to emit/read.*/
+# define OC_PACKET_DONE (INT_MAX)
+
+
+
+/*Super blocks are 32x32 segments of pixels in a single color plane indexed
+ in image order.
+ Internally, super blocks are broken up into four quadrants, each of which
+ contains a 2x2 pattern of blocks, each of which is an 8x8 block of pixels.
+ Quadrants, and the blocks within them, are indexed in a special order called
+ a "Hilbert curve" within the super block.
+
+ In order to differentiate between the Hilbert-curve indexing strategy and
+ the regular image order indexing strategy, blocks indexed in image order
+ are called "fragments".
+ Fragments are indexed in image order, left to right, then bottom to top,
+ from Y' plane to Cb plane to Cr plane.
+
+ The co-located fragments in all image planes corresponding to the location
+ of a single quadrant of a luma plane super block form a macro block.
+ Thus there is only a single set of macro blocks for all planes, each of which
+ contains between 6 and 12 fragments, depending on the pixel format.
+ Therefore macro block information is kept in a separate set of arrays from
+ super blocks to avoid unused space in the other planes.
+ The lists are indexed in super block order.
+ That is, the macro block corresponding to the macro block mbi in (luma plane)
+ super block sbi is at index (sbi<<2|mbi).
+ Thus the number of macro blocks in each dimension is always twice the number
+ of super blocks, even when only an odd number fall inside the coded frame.
+ These "extra" macro blocks are just an artifact of our internal data layout,
+ and not part of the coded stream; they are flagged with a negative MB mode.*/
+
+
+
+/*Super block information.*/
+struct oc_sb_flags{
+ unsigned char coded_fully:1;
+ unsigned char coded_partially:1;
+ unsigned char quad_valid:4;
+};
+
+
+
+/*Information about a fragment which intersects the border of the displayable
+ region.
+ This marks which pixels belong to the displayable region.*/
+struct oc_border_info{
+ /*A bit mask marking which pixels are in the displayable region.
+ Pixel (x,y) corresponds to bit (y<<3|x).*/
+ ogg_int64_t mask;
+ /*The number of pixels in the displayable region.
+ This is always positive, and always less than 64.*/
+ int npixels;
+};
+
+
+
+/*Fragment information.*/
+struct oc_fragment{
+ /*A flag indicating whether or not this fragment is coded.*/
+ unsigned coded:1;
+ /*A flag indicating that this entire fragment lies outside the displayable
+ region of the frame.
+ Note the contrast with an invalid macro block, which is outside the coded
+ frame, not just the displayable one.
+ There are no fragments outside the coded frame by construction.*/
+ unsigned invalid:1;
+ /*The index of the quality index used for this fragment's AC coefficients.*/
+ unsigned qii:6;
+ /*The mode of the macroblock this fragment belongs to.*/
+ unsigned mb_mode:3;
+ /*The index of the associated border information for fragments which lie
+ partially outside the displayable region.
+ For fragments completely inside or outside this region, this is -1.
+ Note that the C standard requires an explicit signed keyword for bitfield
+ types, since some compilers may treat them as unsigned without it.*/
+ signed int borderi:5;
+ /*The prediction-corrected DC component.
+ Note that the C standard requires an explicit signed keyword for bitfield
+ types, since some compilers may treat them as unsigned without it.*/
+ signed int dc:16;
+};
+
+
+
+/*A description of each fragment plane.*/
+struct oc_fragment_plane{
+ /*The number of fragments in the horizontal direction.*/
+ int nhfrags;
+ /*The number of fragments in the vertical direction.*/
+ int nvfrags;
+ /*The offset of the first fragment in the plane.*/
+ ptrdiff_t froffset;
+ /*The total number of fragments in the plane.*/
+ ptrdiff_t nfrags;
+ /*The number of super blocks in the horizontal direction.*/
+ unsigned nhsbs;
+ /*The number of super blocks in the vertical direction.*/
+ unsigned nvsbs;
+ /*The offset of the first super block in the plane.*/
+ unsigned sboffset;
+ /*The total number of super blocks in the plane.*/
+ unsigned nsbs;
+};
+
+
+
+/*The shared (encoder and decoder) functions that have accelerated variants.*/
+struct oc_base_opt_vtable{
+ void (*frag_copy)(unsigned char *_dst,
+ const unsigned char *_src,int _ystride);
+ void (*frag_recon_intra)(unsigned char *_dst,int _ystride,
+ const ogg_int16_t _residue[64]);
+ void (*frag_recon_inter)(unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
+ void (*frag_recon_inter2)(unsigned char *_dst,const unsigned char *_src1,
+ const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
+ void (*idct8x8)(ogg_int16_t _y[64],int _last_zzi);
+ void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
+ void (*state_frag_copy_list)(const oc_theora_state *_state,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli);
+ void (*state_loop_filter_frag_rows)(const oc_theora_state *_state,
+ int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
+ void (*restore_fpu)(void);
+};
+
+/*The shared (encoder and decoder) tables that vary according to which variants
+ of the above functions are used.*/
+struct oc_base_opt_data{
+ const unsigned char *dct_fzig_zag;
+};
+
+
+/*State information common to both the encoder and decoder.*/
+struct oc_theora_state{
+ /*The stream information.*/
+ th_info info;
+# if defined(OC_STATE_USE_VTABLE)
+ /*Table for shared accelerated functions.*/
+ oc_base_opt_vtable opt_vtable;
+# endif
+ /*Table for shared data used by accelerated functions.*/
+ oc_base_opt_data opt_data;
+ /*CPU flags to detect the presence of extended instruction sets.*/
+ ogg_uint32_t cpu_flags;
+ /*The fragment plane descriptions.*/
+ oc_fragment_plane fplanes[3];
+ /*The list of fragments, indexed in image order.*/
+ oc_fragment *frags;
+ /*The the offset into the reference frame buffer to the upper-left pixel of
+ each fragment.*/
+ ptrdiff_t *frag_buf_offs;
+ /*The motion vector for each fragment.*/
+ oc_mv *frag_mvs;
+ /*The total number of fragments in a single frame.*/
+ ptrdiff_t nfrags;
+ /*The list of super block maps, indexed in image order.*/
+ oc_sb_map *sb_maps;
+ /*The list of super block flags, indexed in image order.*/
+ oc_sb_flags *sb_flags;
+ /*The total number of super blocks in a single frame.*/
+ unsigned nsbs;
+ /*The fragments from each color plane that belong to each macro block.
+ Fragments are stored in image order (left to right then top to bottom).
+ When chroma components are decimated, the extra fragments have an index of
+ -1.*/
+ oc_mb_map *mb_maps;
+ /*The list of macro block modes.
+ A negative number indicates the macro block lies entirely outside the
+ coded frame.*/
+ signed char *mb_modes;
+ /*The number of macro blocks in the X direction.*/
+ unsigned nhmbs;
+ /*The number of macro blocks in the Y direction.*/
+ unsigned nvmbs;
+ /*The total number of macro blocks.*/
+ size_t nmbs;
+ /*The list of coded fragments, in coded order.
+ Uncoded fragments are stored in reverse order from the end of the list.*/
+ ptrdiff_t *coded_fragis;
+ /*The number of coded fragments in each plane.*/
+ ptrdiff_t ncoded_fragis[3];
+ /*The total number of coded fragments.*/
+ ptrdiff_t ntotal_coded_fragis;
+ /*The index of the buffers being used for each OC_FRAME_* reference frame.*/
+ int ref_frame_idx[6];
+ /*The actual buffers used for the reference frames.*/
+ th_ycbcr_buffer ref_frame_bufs[6];
+ /*The storage for the reference frame buffers.*/
+ unsigned char *ref_frame_data[6];
+ /*The strides for each plane in the reference frames.*/
+ int ref_ystride[3];
+ /*The number of unique border patterns.*/
+ int nborders;
+ /*The unique border patterns for all border fragments.
+ The borderi field of fragments which straddle the border indexes this
+ list.*/
+ oc_border_info borders[16];
+ /*The frame number of the last keyframe.*/
+ ogg_int64_t keyframe_num;
+ /*The frame number of the current frame.*/
+ ogg_int64_t curframe_num;
+ /*The granpos of the current frame.*/
+ ogg_int64_t granpos;
+ /*The type of the current frame.*/
+ signed char frame_type;
+ /*The bias to add to the frame count when computing granule positions.*/
+ unsigned char granpos_bias;
+ /*The number of quality indices used in the current frame.*/
+ unsigned char nqis;
+ /*The quality indices of the current frame.*/
+ unsigned char qis[3];
+ /*The dequantization tables, stored in zig-zag order, and indexed by
+ qi, pli, qti, and zzi.*/
+ ogg_uint16_t *dequant_tables[64][3][2];
+ OC_ALIGN16(oc_quant_table dequant_table_data[64][3][2]);
+ /*Loop filter strength parameters.*/
+ unsigned char loop_filter_limits[64];
+};
+
+
+
+/*The function type used to fill in the chroma plane motion vectors for a
+ macro block when 4 different motion vectors are specified in the luma
+ plane.
+ _cbmvs: The chroma block-level motion vectors to fill in.
+ _lmbmv: The luma macro-block level motion vector to fill in for use in
+ prediction.
+ _lbmvs: The luma block-level motion vectors.*/
+typedef void (*oc_set_chroma_mvs_func)(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]);
+
+
+
+/*A table of functions used to fill in the Cb,Cr plane motion vectors for a
+ macro block when 4 different motion vectors are specified in the luma
+ plane.*/
+extern const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS];
+
+
+
+int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs);
+void oc_state_clear(oc_theora_state *_state);
+void oc_state_accel_init_c(oc_theora_state *_state);
+void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
+ int _y0,int _yend);
+void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli);
+void oc_state_borders_fill(oc_theora_state *_state,int _refi);
+void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx,
+ th_ycbcr_buffer _img);
+int oc_state_mbi_for_pos(oc_theora_state *_state,int _mbx,int _mby);
+int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
+ int _pli,int _dx,int _dy);
+
+int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv);
+void oc_state_loop_filter(oc_theora_state *_state,int _frame);
+# if defined(OC_DUMP_IMAGES)
+int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
+ const char *_suf);
+# endif
+
+/*Default pure-C implementations of shared accelerated functions.*/
+void oc_frag_copy_c(unsigned char *_dst,
+ const unsigned char *_src,int _src_ystride);
+void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride,
+ const ogg_int16_t _residue[64]);
+void oc_frag_recon_inter_c(unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
+void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1,
+ const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
+void oc_idct8x8_c(ogg_int16_t _y[64],int _last_zzi);
+void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
+void oc_state_frag_copy_list_c(const oc_theora_state *_state,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
+ int _dst_frame,int _src_frame,int _pli);
+void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
+ int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
+void oc_restore_fpu_c(void);
+
+/*We need a way to call a few encoder functions without introducing a link-time
+ dependency into the decoder, while still allowing the old alpha API which
+ does not distinguish between encoder and decoder objects to be used.
+ We do this by placing a function table at the start of the encoder object
+ which can dispatch into the encoder library.
+ We do a similar thing for the decoder in case we ever decide to split off a
+ common base library.*/
+typedef void (*oc_state_clear_func)(theora_state *_th);
+typedef int (*oc_state_control_func)(theora_state *th,int _req,
+ void *_buf,size_t _buf_sz);
+typedef ogg_int64_t (*oc_state_granule_frame_func)(theora_state *_th,
+ ogg_int64_t _granulepos);
+typedef double (*oc_state_granule_time_func)(theora_state *_th,
+ ogg_int64_t _granulepos);
+
+
+struct oc_state_dispatch_vtable{
+ oc_state_clear_func clear;
+ oc_state_control_func control;
+ oc_state_granule_frame_func granule_frame;
+ oc_state_granule_time_func granule_time;
+};
+
+#endif
Modified: experimental/derf/theora-ptalarbvorm/lib/x86/sse2idct.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86/sse2idct.c 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/x86/sse2idct.c 2010-09-02 20:17:34 UTC (rev 17378)
@@ -16,7 +16,7 @@
********************************************************************/
/*SSE2 acceleration of Theora's iDCT.*/
-#include "x86int.h"
+#include "x86enc.h"
#include "sse2trans.h"
#include "../dct.h"
Modified: experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.c 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.c 2010-09-02 20:17:34 UTC (rev 17378)
@@ -18,10 +18,11 @@
#if defined(OC_X86_ASM)
-void oc_enc_vtable_init_x86(oc_enc_ctx *_enc){
+void oc_enc_accel_init_x86(oc_enc_ctx *_enc){
ogg_uint32_t cpu_flags;
cpu_flags=_enc->state.cpu_flags;
- oc_enc_vtable_init_c(_enc);
+ oc_enc_accel_init_c(_enc);
+# if defined(OC_ENC_USE_VTABLE)
if(cpu_flags&OC_CPU_X86_MMX){
_enc->opt_vtable.frag_sub=oc_enc_frag_sub_mmx;
_enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_mmx;
@@ -39,19 +40,22 @@
_enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_mmxext;
}
if(cpu_flags&OC_CPU_X86_SSE2){
-# if defined(OC_X86_64_ASM)
+# if defined(OC_X86_64_ASM)
_enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_x86_64sse2;
-# endif
+# endif
_enc->opt_vtable.frag_ssd=oc_enc_frag_ssd_sse2;
_enc->opt_vtable.frag_border_ssd=oc_enc_frag_border_ssd_sse2;
_enc->opt_vtable.frag_satd=oc_enc_frag_satd_sse2;
_enc->opt_vtable.frag_satd2=oc_enc_frag_satd2_sse2;
_enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_sse2;
- _enc->opt_data.enquant_table_size=128*sizeof(ogg_uint16_t);
- _enc->opt_data.enquant_table_alignment=16;
_enc->opt_vtable.enquant_table_init=oc_enc_enquant_table_init_x86;
_enc->opt_vtable.enquant_table_fixup=oc_enc_enquant_table_fixup_x86;
_enc->opt_vtable.quantize=oc_enc_quantize_sse2;
+# endif
+ _enc->opt_data.enquant_table_size=128*sizeof(ogg_uint16_t);
+ _enc->opt_data.enquant_table_alignment=16;
+# if defined(OC_ENC_USE_VTABLE)
}
+# endif
}
#endif
Modified: experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.h 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.h 2010-09-02 20:17:34 UTC (rev 17378)
@@ -17,19 +17,62 @@
#if !defined(_x86_x86enc_H)
# define _x86_x86enc_H (1)
-# include "../encint.h"
# include "x86int.h"
-void oc_enc_vtable_init_x86(oc_enc_ctx *_enc);
+# if defined(OC_X86_ASM)
+# define oc_enc_accel_init oc_enc_accel_init_x86
+# if defined(OC_X86_64_ASM)
+/*x86-64 guarantees SIMD support up through at least SSE2.
+ If the best routine we have available only needs SSE2 (which at the moment
+ covers all of them), then we can avoid runtime detection and the indirect
+ call.*/
+# define oc_enc_frag_sub(_enc,_diff,_x,_y,_stride) \
+ oc_enc_frag_sub_mmx(_diff,_x,_y,_stride)
+# define oc_enc_frag_sub_128(_enc,_diff,_x,_stride) \
+ oc_enc_frag_sub_128_mmx(_diff,_x,_stride)
+# define oc_enc_frag_sad(_enc,_src,_ref,_ystride) \
+ oc_enc_frag_sad_mmxext(_src,_ref,_ystride)
+# define oc_enc_frag_sad_thresh(_enc,_src,_ref,_ystride,_thresh) \
+ oc_enc_frag_sad_thresh_mmxext(_src,_ref,_ystride,_thresh)
+# define oc_enc_frag_sad2_thresh(_enc,_src,_ref1,_ref2,_ystride,_thresh) \
+ oc_enc_frag_sad2_thresh_mmxext(_src,_ref1,_ref2,_ystride,_thresh)
+# define oc_enc_frag_satd(_enc,_dc,_src,_ref,_ystride) \
+ oc_enc_frag_satd_sse2(_dc,_src,_ref,_ystride)
+# define oc_enc_frag_satd2(_enc,_dc,_src,_ref1,_ref2,_ystride) \
+ oc_enc_frag_satd2_sse2(_dc,_src,_ref1,_ref2,_ystride)
+# define oc_enc_frag_intra_satd(_enc,_dc,_src,_ystride) \
+ oc_enc_frag_intra_satd_sse2(_dc,_src,_ystride)
+# define oc_enc_frag_ssd(_enc,_src,_ref,_ystride) \
+ oc_enc_frag_ssd_sse2(_src,_ref,_ystride)
+# define oc_enc_frag_border_ssd(_enc,_src,_ref,_ystride,_mask) \
+ oc_enc_frag_border_ssd_sse2(_src,_ref,_ystride,_mask)
+# define oc_enc_frag_copy2(_enc,_dst,_src1,_src2,_ystride) \
+ oc_int_frag_copy2_mmxext(_dst,_ystride,_src1,_src2,_ystride)
+# define oc_enc_enquant_table_init(_enc,_enquant,_dequant) \
+ oc_enc_enquant_table_init_x86(_enquant,_dequant)
+# define oc_enc_enquant_table_fixup(_enc,_enquant,_nqis) \
+ oc_enc_enquant_table_fixup_x86(_enquant,_nqis)
+# define oc_enc_quantize(_enc,_qdct,_dct,_dequant,_enquant) \
+ oc_enc_quantize_sse2(_qdct,_dct,_dequant,_enquant)
+# define oc_enc_frag_recon_intra(_enc,_dst,_ystride,_residue) \
+ oc_frag_recon_intra_mmx(_dst,_ystride,_residue)
+# define oc_enc_frag_recon_inter(_enc,_dst,_src,_ystride,_residue) \
+ oc_frag_recon_inter_mmx(_dst,_src,_ystride,_residue)
+# define oc_enc_fdct8x8(_enc,_y,_x) \
+ oc_enc_fdct8x8_x86_64sse2(_y,_x)
+# else
+# define OC_ENC_USE_VTABLE (1)
+# endif
+# endif
+# include "../encint.h"
+
+void oc_enc_accel_init_x86(oc_enc_ctx *_enc);
+
void oc_enc_frag_sub_mmx(ogg_int16_t _diff[64],
const unsigned char *_x,const unsigned char *_y,int _stride);
void oc_enc_frag_sub_128_mmx(ogg_int16_t _diff[64],
const unsigned char *_x,int _stride);
-unsigned oc_enc_frag_ssd_sse2(const unsigned char *_src,
- const unsigned char *_ref,int _ystride);
-unsigned oc_enc_frag_border_ssd_sse2(const unsigned char *_src,
- const unsigned char *_ref,int _ystride,ogg_int64_t _mask);
unsigned oc_enc_frag_sad_mmxext(const unsigned char *_src,
const unsigned char *_ref,int _ystride);
unsigned oc_enc_frag_sad_thresh_mmxext(const unsigned char *_src,
@@ -45,19 +88,23 @@
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
unsigned oc_enc_frag_satd2_sse2(unsigned *_dc,const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
+unsigned oc_enc_frag_intra_satd_mmxext(unsigned *_dc,
+ const unsigned char *_src,int _ystride);
unsigned oc_enc_frag_intra_satd_sse2(unsigned *_dc,
const unsigned char *_src,int _ystride);
-unsigned oc_enc_frag_intra_satd_mmxext(unsigned *_dc,
- const unsigned char *_src,int _ystride);
+unsigned oc_enc_frag_ssd_sse2(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_border_ssd_sse2(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,ogg_int64_t _mask);
+void oc_int_frag_copy2_mmxext(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src1,const unsigned char *_src2,int _src_ystride);
+void oc_enc_frag_copy2_mmxext(unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride);
void oc_enc_enquant_table_init_x86(void *_enquant,
const ogg_uint16_t _dequant[64]);
void oc_enc_enquant_table_fixup_x86(void *_enquant[3][3][2],int _nqis);
int oc_enc_quantize_sse2(ogg_int16_t _qdct[64],const ogg_int16_t _dct[64],
const ogg_uint16_t _dequant[64],const void *_enquant);
-void oc_int_frag_copy2_mmxext(unsigned char *_dst,int _dst_ystride,
- const unsigned char *_src1,const unsigned char *_src2,int _src_ystride);
-void oc_enc_frag_copy2_mmxext(unsigned char *_dst,
- const unsigned char *_src1,const unsigned char *_src2,int _ystride);
void oc_enc_fdct8x8_mmx(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
# if defined(OC_X86_64_ASM)
Modified: experimental/derf/theora-ptalarbvorm/lib/x86/x86int.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86/x86int.h 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/x86/x86int.h 2010-09-02 20:17:34 UTC (rev 17378)
@@ -18,6 +18,35 @@
#if !defined(_x86_x86int_H)
# define _x86_x86int_H (1)
# include "../internal.h"
+
+# if defined(OC_X86_ASM)
+# define oc_state_accel_init oc_state_accel_init_x86
+# if defined(OC_X86_64_ASM)
+/*x86-64 guarantees SIMD support up through at least SSE2.
+ If the best routine we have available only needs SSE2 (which at the moment
+ covers all of them), then we can avoid runtime detection and the indirect
+ call.*/
+# define oc_frag_copy(_state,_dst,_src,_ystride) \
+ oc_frag_copy_mmx(_dst,_src,_ystride)
+# define oc_frag_recon_intra(_state,_dst,_ystride,_residue) \
+ oc_frag_recon_intra_mmx(_dst,_ystride,_residue)
+# define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
+ oc_frag_recon_inter_mmx(_dst,_src,_ystride,_residue)
+# define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
+ oc_frag_recon_inter2_mmx(_dst,_src1,_src2,_ystride,_residue)
+# define oc_idct8x8(_state,_y,_last_zzi) \
+ oc_idct8x8_sse2(_y,_last_zzi)
+# define oc_state_frag_recon oc_state_frag_recon_mmx
+# define oc_state_frag_copy_list oc_state_frag_copy_list_mmx
+# define oc_state_loop_filter_frag_rows oc_state_loop_filter_frag_rows_mmxext
+# define oc_restore_fpu(_state) \
+ oc_restore_fpu_mmx()
+# else
+# define OC_STATE_USE_VTABLE (1)
+# endif
+# endif
+
+# include "../state.h"
# include "cpu.h"
/*Converts the expression in the argument to a string.*/
@@ -62,7 +91,7 @@
extern const short __attribute__((aligned(16))) OC_IDCT_CONSTS[64];
-void oc_state_vtable_init_x86(oc_theora_state *_state);
+void oc_state_accel_init_x86(oc_theora_state *_state);
void oc_frag_copy_mmx(unsigned char *_dst,
const unsigned char *_src,int _ystride);
Modified: experimental/derf/theora-ptalarbvorm/lib/x86/x86state.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86/x86state.c 2010-09-01 22:23:12 UTC (rev 17377)
+++ experimental/derf/theora-ptalarbvorm/lib/x86/x86state.c 2010-09-02 20:17:34 UTC (rev 17378)
@@ -61,8 +61,10 @@
64,64,64,64,64,64,64,64
};
-void oc_state_vtable_init_x86(oc_theora_state *_state){
+void oc_state_accel_init_x86(oc_theora_state *_state){
+ oc_state_accel_init_c(_state);
_state->cpu_flags=oc_cpu_flags_get();
+# if defined(OC_STATE_USE_VTABLE)
if(_state->cpu_flags&OC_CPU_X86_MMX){
_state->opt_vtable.frag_copy=oc_frag_copy_mmx;
_state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx;
@@ -76,14 +78,16 @@
_state->opt_vtable.restore_fpu=oc_restore_fpu_mmx;
_state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_MMX;
}
- else oc_state_vtable_init_c(_state);
if(_state->cpu_flags&OC_CPU_X86_MMXEXT){
_state->opt_vtable.state_loop_filter_frag_rows=
oc_state_loop_filter_frag_rows_mmxext;
}
if(_state->cpu_flags&OC_CPU_X86_SSE2){
_state->opt_vtable.idct8x8=oc_idct8x8_sse2;
+# endif
_state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_SSE2;
+# if defined(OC_STATE_USE_VTABLE)
}
+# endif
}
#endif
More information about the commits
mailing list