[xiph-commits] r17173 - in branches/theorarm-merge-branch: . lib lib/x86
robin at svn.xiph.org
robin at svn.xiph.org
Sat Apr 24 07:22:55 PDT 2010
Author: robin
Date: 2010-04-24 07:22:55 -0700 (Sat, 24 Apr 2010)
New Revision: 17173
Modified:
branches/theorarm-merge-branch/configure.ac
branches/theorarm-merge-branch/lib/Makefile.am
branches/theorarm-merge-branch/lib/analyze.c
branches/theorarm-merge-branch/lib/bitpack.h
branches/theorarm-merge-branch/lib/decode.c
branches/theorarm-merge-branch/lib/encint.h
branches/theorarm-merge-branch/lib/encode.c
branches/theorarm-merge-branch/lib/huffdec.c
branches/theorarm-merge-branch/lib/internal.h
branches/theorarm-merge-branch/lib/mcenc.c
branches/theorarm-merge-branch/lib/state.c
branches/theorarm-merge-branch/lib/x86/mmxstate.c
branches/theorarm-merge-branch/lib/x86/x86int.h
Log:
[Reapply of revision 17171 that went in without a commit message, and was
therefore subsequently backed out in 17172.]
Tweak theorarm-merge-branch so that it still builds on linux using
./autogen.sh. This has meant pushing various of the oc_mv changes through
the encoder side too - this needs to be discussed with the library
maintainers as I fear they may not like it.
I can't see any other way to do it without losing speed on ARMs, but maybe
smarter people than me can spot something.
Next step is to put the bitreading changes in protected with OC_LIBOGG2.
Modified: branches/theorarm-merge-branch/configure.ac
===================================================================
--- branches/theorarm-merge-branch/configure.ac 2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/configure.ac 2010-04-24 14:22:55 UTC (rev 17173)
@@ -58,6 +58,7 @@
AC_PROG_CPP
CFLAGS="$cflags_save"
+AM_PROG_AS
AM_PROG_CC_C_O
AC_LIBTOOL_WIN32_DLL
AM_PROG_LIBTOOL
@@ -191,6 +192,7 @@
cpu_x86_64=no
cpu_x86_32=no
+cpu_arm=no
AC_ARG_ENABLE(asm,
AS_HELP_STRING([--disable-asm], [Disable assembly optimizations]),
[ ac_enable_asm=$enableval ], [ ac_enable_asm=yes] )
@@ -212,12 +214,17 @@
AC_DEFINE([OC_X86_ASM], [], [make use of x86 asm optimization])
AC_DEFINE([OC_X86_64_ASM], [], [make use of x86_64 asm optimization])
;;
+ arm)
+ cpu_arm=yes
+ cpu_optimization="arm"
+ AC_DEFINE([OC_ARM_ASM], [], [make use of ARM asm optimization])
esac
else
cpu_optimization="disabled"
fi
AM_CONDITIONAL([CPU_x86_64], [test x$cpu_x86_64 = xyes])
AM_CONDITIONAL([CPU_x86_32], [test x$cpu_x86_32 = xyes])
+AM_CONDITIONAL([CPU_ARM], [test x$cpu_arm = xyes])
# Test whenever ld supports -version-script
AC_PROG_LD
Modified: branches/theorarm-merge-branch/lib/Makefile.am
===================================================================
--- branches/theorarm-merge-branch/lib/Makefile.am 2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/Makefile.am 2010-04-24 14:22:55 UTC (rev 17173)
@@ -96,11 +96,13 @@
x86/mmxstate.c \
x86/x86state.c
+if CPU_ARM
decoder_arm_sources = \
arm/ARMfrag.s \
arm/ARMidct.s \
arm/ARMpp.s \
arm/ARMstate.c
+endif
if CPU_x86_64
decoder_arch_sources = $(decoder_x86_sources)
Modified: branches/theorarm-merge-branch/lib/analyze.c
===================================================================
--- branches/theorarm-merge-branch/lib/analyze.c 2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/analyze.c 2010-04-24 14:22:55 UTC (rev 17173)
@@ -545,7 +545,7 @@
/*Temporary encoder state for the analysis pipeline.*/
struct oc_enc_pipeline_state{
- int bounding_values[256];
+ signed char bounding_values[256];
oc_fr_state fr[3];
oc_qii_state qs[3];
/*Condensed dequantization tables.*/
@@ -805,7 +805,7 @@
const oc_mv *frag_mvs;
frag_mvs=(const oc_mv *)_enc->state.frag_mvs;
nmv_offs=oc_state_get_mv_offsets(&_enc->state,mv_offs,_pli,
- frag_mvs[_fragi][0],frag_mvs[_fragi][1]);
+ frag_mvs[_fragi].v[0],frag_mvs[_fragi].v[1]);
if(nmv_offs>1){
oc_enc_frag_copy2(_enc,dst,
ref+mv_offs[0],ref+mv_offs[1],ystride);
@@ -2139,7 +2139,7 @@
unsigned _mbi,int _mb_mode,const oc_fr_state *_fr,const oc_qii_state *_qs,
const unsigned _skip_ssd[12],const unsigned _rd_scale[4]){
static const oc_mv OC_MV_ZERO;
- oc_cost_inter(_enc,_modec,_mbi,_mb_mode,OC_MV_ZERO,
+ oc_cost_inter(_enc,_modec,_mbi,_mb_mode,&OC_MV_ZERO.v[0],
_fr,_qs,_skip_ssd,_rd_scale);
}
@@ -2165,8 +2165,8 @@
unsigned _mbi,oc_mv _mv[4],const oc_fr_state *_fr,const oc_qii_state *_qs,
const unsigned _skip_ssd[12],const unsigned _rd_scale[5]){
unsigned frag_satd[12];
- oc_mv lbmvs[4];
- oc_mv cbmvs[4];
+ oc_mv4 lbmvs;
+ oc_mv4 cbmvs;
const unsigned char *src;
const unsigned char *ref;
int ystride;
@@ -2198,12 +2198,12 @@
_modec->rate=_modec->ssd=0;
for(bi=0;bi<4;bi++){
fragi=mb_map[0][bi];
- dx=_mv[bi][0];
- dy=_mv[bi][1];
+ dx=_mv[bi].v[0];
+ dy=_mv[bi].v[1];
/*Save the block MVs as the current ones while we're here; we'll replace
them if we don't ultimately choose 4MV mode.*/
- frag_mvs[fragi][0]=(signed char)dx;
- frag_mvs[fragi][1]=(signed char)dy;
+ frag_mvs[fragi].v[0]=(signed char)dx;
+ frag_mvs[fragi].v[1]=(signed char)dy;
frag_offs=frag_buf_offs[fragi];
if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){
satd=oc_enc_frag_satd2(_enc,&dc,src+frag_offs,
@@ -2223,16 +2223,16 @@
nqis=_enc->state.nqis;
for(bi=0;bi<4;bi++){
if(_modec->qii[OC_MB_PHASE[_mbi&3][bi]]>=nqis){
- memset(lbmvs+bi,0,sizeof(*lbmvs));
+ memset(&lbmvs.v[bi],0,sizeof(lbmvs.v[0]));
}
else{
- memcpy(lbmvs+bi,_mv+bi,sizeof(*lbmvs));
- bits0+=OC_MV_BITS[0][_mv[bi][0]+31]+OC_MV_BITS[0][_mv[bi][1]+31];
+ memcpy(&lbmvs.v[bi],_mv+bi,sizeof(lbmvs.v[0]));
+ bits0+=OC_MV_BITS[0][_mv[bi].v[0]+31]+OC_MV_BITS[0][_mv[bi].v[1]+31];
bits1+=12;
}
}
- (*OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt])(cbmvs,
- (const oc_mv *)lbmvs);
+ (*OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt])(&cbmvs,
+ (const oc_mv4 *)&lbmvs);
map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
/*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
@@ -2242,8 +2242,8 @@
pli=mapi>>2;
bi=mapi&3;
fragi=mb_map[pli][bi];
- dx=cbmvs[bi][0];
- dy=cbmvs[bi][1];
+ dx=cbmvs.v[bi].v[0];
+ dy=cbmvs.v[bi].v[1];
frag_offs=frag_buf_offs[fragi];
/*TODO: We could save half these calls by re-using the results for the Cb
and Cr planes; is it worth it?*/
@@ -2314,7 +2314,7 @@
luma_avg=OC_CLAMPI(90<<8,_enc->luma_avg,160<<8);
mcu_rd_scale=_enc->mcu_rd_scale;
mcu_rd_iscale=_enc->mcu_rd_iscale;
- last_mv[0]=last_mv[1]=prior_mv[0]=prior_mv[1]=0;
+ last_mv.v[0]=last_mv.v[1]=prior_mv.v[0]=prior_mv.v[1]=0;
/*Choose MVs and MB modes and quantize and code luma.
Must be done in Hilbert order.*/
map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
@@ -2405,18 +2405,18 @@
oc_cost_intra(_enc,modes+OC_MODE_INTRA,mbi,
pipe.fr+0,pipe.qs+0,intra_satd,skip_ssd,rd_scale);
mb_mv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_INTER_MV,mbi,
- OC_MODE_INTER_MV,embs[mbi].unref_mv[OC_FRAME_PREV],
+ OC_MODE_INTER_MV,embs[mbi].unref_mv[OC_FRAME_PREV].v,
pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
oc_cost_inter(_enc,modes+OC_MODE_INTER_MV_LAST,mbi,
- OC_MODE_INTER_MV_LAST,last_mv,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
+ OC_MODE_INTER_MV_LAST,last_mv.v,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
oc_cost_inter(_enc,modes+OC_MODE_INTER_MV_LAST2,mbi,
- OC_MODE_INTER_MV_LAST2,prior_mv,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
+ OC_MODE_INTER_MV_LAST2,prior_mv.v,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
oc_cost_inter4mv(_enc,modes+OC_MODE_INTER_MV_FOUR,mbi,
embs[mbi].block_mv,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
oc_cost_inter_nomv(_enc,modes+OC_MODE_GOLDEN_NOMV,mbi,
OC_MODE_GOLDEN_NOMV,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
mb_gmv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_GOLDEN_MV,mbi,
- OC_MODE_GOLDEN_MV,embs[mbi].unref_mv[OC_FRAME_GOLD],
+ OC_MODE_GOLDEN_MV,embs[mbi].unref_mv[OC_FRAME_GOLD].v,
pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
/*The explicit MV modes (2,6,7) have not yet gone through halfpel
refinement.
@@ -2441,7 +2441,7 @@
embs[mbi].refined|=0x40;
}
mb_gmv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_GOLDEN_MV,mbi,
- OC_MODE_GOLDEN_MV,embs[mbi].analysis_mv[0][OC_FRAME_GOLD],
+ OC_MODE_GOLDEN_MV,embs[mbi].analysis_mv[0][OC_FRAME_GOLD].v,
pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
}
if(!(embs[mbi].refined&0x04)){
@@ -2449,7 +2449,7 @@
embs[mbi].refined|=0x04;
}
mb_mv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_INTER_MV,mbi,
- OC_MODE_INTER_MV,embs[mbi].analysis_mv[0][OC_FRAME_PREV],
+ OC_MODE_INTER_MV,embs[mbi].analysis_mv[0][OC_FRAME_PREV].v,
pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
/*Finally, pick the mode with the cheapest estimated R-D cost.*/
mb_mode=OC_MODE_INTER_NOMV;
@@ -2496,26 +2496,26 @@
if(mb_mode!=OC_MODE_INTER_MV_FOUR){
switch(mb_mode){
case OC_MODE_INTER_MV:{
- dx=embs[mbi].analysis_mv[0][OC_FRAME_PREV][0];
- dy=embs[mbi].analysis_mv[0][OC_FRAME_PREV][1];
+ dx=embs[mbi].analysis_mv[0][OC_FRAME_PREV].v[0];
+ dy=embs[mbi].analysis_mv[0][OC_FRAME_PREV].v[1];
}break;
case OC_MODE_INTER_MV_LAST:{
- dx=last_mv[0];
- dy=last_mv[1];
+ dx=last_mv.v[0];
+ dy=last_mv.v[1];
}break;
case OC_MODE_INTER_MV_LAST2:{
- dx=prior_mv[0];
- dy=prior_mv[1];
+ dx=prior_mv.v[0];
+ dy=prior_mv.v[1];
}break;
case OC_MODE_GOLDEN_MV:{
- dx=embs[mbi].analysis_mv[0][OC_FRAME_GOLD][0];
- dy=embs[mbi].analysis_mv[0][OC_FRAME_GOLD][1];
+ dx=embs[mbi].analysis_mv[0][OC_FRAME_GOLD].v[0];
+ dy=embs[mbi].analysis_mv[0][OC_FRAME_GOLD].v[1];
}break;
}
for(bi=0;bi<4;bi++){
fragi=mb_maps[mbi][0][bi];
- frag_mvs[fragi][0]=(signed char)dx;
- frag_mvs[fragi][1]=(signed char)dy;
+ frag_mvs[fragi].v[0]=(signed char)dx;
+ frag_mvs[fragi].v[1]=(signed char)dy;
}
}
for(bi=0;bi<4;bi++){
@@ -2529,16 +2529,16 @@
mb_mode=mb_modes[mbi];
switch(mb_mode){
case OC_MODE_INTER_MV:{
- memcpy(prior_mv,last_mv,sizeof(prior_mv));
+ memcpy(prior_mv.v,last_mv.v,sizeof(prior_mv));
/*If we're backing out from 4MV, find the MV we're actually
using.*/
if(orig_mb_mode==OC_MODE_INTER_MV_FOUR){
for(bi=0;;bi++){
fragi=mb_maps[mbi][0][bi];
if(frags[fragi].coded){
- memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv));
- dx=frag_mvs[fragi][0];
- dy=frag_mvs[fragi][1];
+ memcpy(last_mv.v,frag_mvs[fragi].v,sizeof(last_mv));
+ dx=frag_mvs[fragi].v[0];
+ dy=frag_mvs[fragi].v[1];
break;
}
}
@@ -2546,39 +2546,39 @@
}
/*Otherwise we used the original analysis MV.*/
else{
- memcpy(last_mv,
- embs[mbi].analysis_mv[0][OC_FRAME_PREV],sizeof(last_mv));
+ memcpy(last_mv.v,
+ embs[mbi].analysis_mv[0][OC_FRAME_PREV].v,sizeof(last_mv));
}
_enc->mv_bits[0]+=mb_mv_bits_0;
_enc->mv_bits[1]+=12;
}break;
case OC_MODE_INTER_MV_LAST2:{
oc_mv tmp_mv;
- memcpy(tmp_mv,prior_mv,sizeof(tmp_mv));
- memcpy(prior_mv,last_mv,sizeof(prior_mv));
- memcpy(last_mv,tmp_mv,sizeof(last_mv));
+ memcpy(tmp_mv.v,prior_mv.v,sizeof(tmp_mv));
+ memcpy(prior_mv.v,last_mv.v,sizeof(prior_mv));
+ memcpy(last_mv.v,tmp_mv.v,sizeof(last_mv));
}break;
case OC_MODE_GOLDEN_MV:{
_enc->mv_bits[0]+=mb_gmv_bits_0;
_enc->mv_bits[1]+=12;
}break;
case OC_MODE_INTER_MV_FOUR:{
- oc_mv lbmvs[4];
- oc_mv cbmvs[4];
- memcpy(prior_mv,last_mv,sizeof(prior_mv));
+ oc_mv4 lbmvs;
+ oc_mv4 cbmvs;
+ memcpy(prior_mv.v,last_mv.v,sizeof(prior_mv));
for(bi=0;bi<4;bi++){
fragi=mb_maps[mbi][0][bi];
if(frags[fragi].coded){
- memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv));
- memcpy(lbmvs[bi],frag_mvs[fragi],sizeof(lbmvs[bi]));
- _enc->mv_bits[0]+=OC_MV_BITS[0][frag_mvs[fragi][0]+31]
- +OC_MV_BITS[0][frag_mvs[fragi][1]+31];
+ memcpy(last_mv.v,frag_mvs[fragi].v,sizeof(last_mv));
+ memcpy(lbmvs.v[bi].v,frag_mvs[fragi].v,sizeof(lbmvs.v[bi]));
+ _enc->mv_bits[0]+=OC_MV_BITS[0][frag_mvs[fragi].v[0]+31]
+ +OC_MV_BITS[0][frag_mvs[fragi].v[1]+31];
_enc->mv_bits[1]+=12;
}
/*Replace the block MVs for not-coded blocks with (0,0).*/
- else memset(lbmvs[bi],0,sizeof(lbmvs[bi]));
+ else memset(lbmvs.v[bi].v,0,sizeof(lbmvs.v[bi]));
}
- (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
+ (*set_chroma_mvs)(&cbmvs,&lbmvs);
for(mapii=4;mapii<nmap_idxs;mapii++){
mapi=map_idxs[mapii];
pli=mapi>>2;
@@ -2586,7 +2586,7 @@
fragi=mb_maps[mbi][pli][bi];
frags[fragi].mb_mode=mb_mode;
frags[fragi].qii=modes[OC_MODE_INTER_MV_FOUR].qii[mapii];
- memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(frag_mvs[fragi]));
+ memcpy(frag_mvs[fragi].v,&cbmvs.v[bi],sizeof(frag_mvs[fragi]));
}
}break;
}
@@ -2613,8 +2613,8 @@
values won't have been chosen with the right MV, but it's
probaby not worth re-estimating them.*/
frags[fragi].qii=modes[mb_mode].qii[mapii];
- frag_mvs[fragi][0]=(signed char)dx;
- frag_mvs[fragi][1]=(signed char)dy;
+ frag_mvs[fragi].v[0]=(signed char)dx;
+ frag_mvs[fragi].v[1]=(signed char)dy;
}
}
/*Save masking scale factors for chroma blocks.*/
Modified: branches/theorarm-merge-branch/lib/bitpack.h
===================================================================
--- branches/theorarm-merge-branch/lib/bitpack.h 2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/bitpack.h 2010-04-24 14:22:55 UTC (rev 17173)
@@ -51,10 +51,10 @@
/* returns -1 for read beyond EOF, or the number of whole bytes available */
long oc_pack_bytes_left(oc_pack_buf *_b);
-/*These two functions are implemented locally in huffdec.c*/
+/*These two functions are implemented in huffdec.c*/
/*Read in bits without advancing the bitptr.
Here we assume 0<=_bits&&_bits<=32.*/
-/*static int oc_pack_look(oc_pack_buf *_b,int _bits);*/
-/*static void oc_pack_adv(oc_pack_buf *_b,int _bits);*/
+long oc_pack_look(oc_pack_buf *_b,int _bits);
+void oc_pack_adv(oc_pack_buf *_b,int _bits);
#endif
Modified: branches/theorarm-merge-branch/lib/decode.c
===================================================================
--- branches/theorarm-merge-branch/lib/decode.c 2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/decode.c 2010-04-24 14:22:55 UTC (rev 17173)
@@ -568,7 +568,6 @@
static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
oc_sb_flags *sb_flags;
unsigned nsbs;
- unsigned sbi;
unsigned npartial;
unsigned run_count;
int flag;
@@ -781,7 +780,6 @@
unsigned char scheme0_alphabet[8];
oc_mode_unpack_func mode_unpack;
size_t nmbs;
- size_t mbi;
int mode_scheme;
mode_scheme=(int)oc_pack_read(&_dec->opb,3);
if(mode_scheme==0){
@@ -977,7 +975,6 @@
static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
oc_fragment *frags;
const ptrdiff_t *coded_fragis;
- const ptrdiff_t *coded_fragis_end;
ptrdiff_t ncoded_fragis;
ptrdiff_t fragi;
ncoded_fragis=_dec->state.ntotal_coded_fragis;
@@ -1041,7 +1038,7 @@
flag=!flag;
run_count=oc_sb_run_unpack(&_dec->opb);
full_run=run_count>=4129;
- for(;coded_fragis<coded_fragis_end;*coded_fragis++){
+ for(;coded_fragis<coded_fragis_end;coded_fragis++){
fragi=*coded_fragis;
if(frags[fragi].qii==0)continue;
if(run_count--<=0)break;
@@ -1165,7 +1162,6 @@
ptrdiff_t run_counts[64];
ptrdiff_t eob_count;
size_t ntoks_left;
- size_t ntoks;
int rli;
_dec->eob_runs[pli][_zzi]=_eobs;
_dec->ti0[pli][_zzi]=ti;
@@ -2201,7 +2197,11 @@
int pli;
int notstart;
int notdone;
+#ifdef OC_LIBOGG2
oc_pack_readinit(&_dec->opb,_op->packet);
+#else
+ oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
+#endif
#if defined(HAVE_CAIRO)
_dec->telemetry_frame_bytes=_op->bytes;
#endif
Modified: branches/theorarm-merge-branch/lib/encint.h
===================================================================
--- branches/theorarm-merge-branch/lib/encint.h 2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/encint.h 2010-04-24 14:22:55 UTC (rev 17173)
@@ -26,8 +26,6 @@
-typedef oc_mv oc_mv2[2];
-
typedef struct oc_enc_opt_vtable oc_enc_opt_vtable;
typedef struct oc_mb_enc_info oc_mb_enc_info;
typedef struct oc_mode_scheme_chooser oc_mode_scheme_chooser;
@@ -105,6 +103,7 @@
void oc_enc_vtable_init(oc_enc_ctx *_enc);
+typedef oc_mv oc_mv2_[2];
/*Encoder-specific macroblock information.*/
struct oc_mb_enc_info{
@@ -124,7 +123,7 @@
can be used to estimate constant velocity and constant acceleration
predictors.
Uninitialized MVs are (0,0).*/
- oc_mv2 analysis_mv[3];
+ oc_mv2_ analysis_mv[3];
/*Current unrefined analysis MVs.*/
oc_mv unref_mv[2];
/*Unrefined block MVs.*/
Modified: branches/theorarm-merge-branch/lib/encode.c
===================================================================
--- branches/theorarm-merge-branch/lib/encode.c 2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/encode.c 2010-04-24 14:22:55 UTC (rev 17173)
@@ -663,7 +663,7 @@
fragi=mb_maps[mbi][0][bi];
if(frags[fragi].coded){
oc_enc_mv_pack(_enc,mv_scheme,
- frag_mvs[fragi][0],frag_mvs[fragi][1]);
+ frag_mvs[fragi].v[0],frag_mvs[fragi].v[1]);
/*Only code a single MV for this macro block.*/
break;
}
@@ -674,7 +674,7 @@
fragi=mb_maps[mbi][0][bi];
if(frags[fragi].coded){
oc_enc_mv_pack(_enc,mv_scheme,
- frag_mvs[fragi][0],frag_mvs[fragi][1]);
+ frag_mvs[fragi].v[0],frag_mvs[fragi].v[1]);
/*Keep coding all the MVs for this macro block.*/
}
}
Modified: branches/theorarm-merge-branch/lib/huffdec.c
===================================================================
--- branches/theorarm-merge-branch/lib/huffdec.c 2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/huffdec.c 2010-04-24 14:22:55 UTC (rev 17173)
@@ -130,7 +130,7 @@
/*Read in bits without advancing the bit pointer.
Here we assume 0<=_bits&&_bits<=32.*/
-static long oc_pack_look(oc_pack_buf *_b,int _bits){
+long oc_pack_look(oc_pack_buf *_b,int _bits){
oc_pb_window window;
int available;
long result;
@@ -143,7 +143,7 @@
}
/*Advance the bit pointer.*/
-static void oc_pack_adv(oc_pack_buf *_b,int _bits){
+void oc_pack_adv(oc_pack_buf *_b,int _bits){
/*We ignore the special cases for _bits==0 and _bits==32 here, since they are
never used actually used.
OC_HUFF_SLUSH (defined below) would have to be at least 27 to actually read
Modified: branches/theorarm-merge-branch/lib/internal.h
===================================================================
--- branches/theorarm-merge-branch/lib/internal.h 2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/internal.h 2010-04-24 14:22:55 UTC (rev 17173)
@@ -341,7 +341,7 @@
const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
int _dst_frame,int _src_frame,int _pli);
void (*state_loop_filter_frag_rows)(const oc_theora_state *_state,
- int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
+ signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end);
void (*restore_fpu)(void);
};
@@ -519,7 +519,7 @@
void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,
signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
void oc_restore_fpu(const oc_theora_state *_state);
-void oc_idct8x8(ogg_int16_t _y[64],int _last_zzi);
+void oc_idct8x8_arm(ogg_int16_t _y[64],int _last_zzi);
/*Default pure-C implementations.*/
void oc_frag_copy_c(unsigned char *_dst,
@@ -537,7 +537,7 @@
const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
int _dst_frame,int _src_frame,int _pli);
void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
- int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
+ signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end);
void oc_restore_fpu_c(void);
/*We need a way to call a few encoder functions without introducing a link-time
Modified: branches/theorarm-merge-branch/lib/mcenc.c
===================================================================
--- branches/theorarm-merge-branch/lib/mcenc.c 2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/mcenc.c 2010-04-24 14:22:55 UTC (rev 17173)
@@ -102,8 +102,8 @@
/*Fill in the first part of set A: the vectors from adjacent blocks.*/
for(i=0;i<embs[_mbi].ncneighbors;i++){
nmbi=embs[_mbi].cneighbors[i];
- _mcenc->candidates[ncandidates][0]=embs[nmbi].analysis_mv[0][_frame][0];
- _mcenc->candidates[ncandidates][1]=embs[nmbi].analysis_mv[0][_frame][1];
+ _mcenc->candidates[ncandidates][0]=embs[nmbi].analysis_mv[0][_frame].v[0];
+ _mcenc->candidates[ncandidates][1]=embs[nmbi].analysis_mv[0][_frame].v[1];
ncandidates++;
}
}
@@ -113,9 +113,9 @@
_mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,_accum[1],31);
ncandidates++;
_mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
- embs[_mbi].analysis_mv[1][_frame][0]+_accum[0],31);
+ embs[_mbi].analysis_mv[1][_frame].v[0]+_accum[0],31);
_mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
- embs[_mbi].analysis_mv[1][_frame][1]+_accum[1],31);
+ embs[_mbi].analysis_mv[1][_frame].v[1]+_accum[1],31);
ncandidates++;
_mcenc->candidates[ncandidates][0]=0;
_mcenc->candidates[ncandidates][1]=0;
@@ -137,11 +137,11 @@
nmbi=_mbi;
for(i=0;;i++){
_mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
- 2*embs[_mbi].analysis_mv[1][_frame][0]
- -embs[_mbi].analysis_mv[2][_frame][0]+_accum[0],31);
+ 2*embs[_mbi].analysis_mv[1][_frame].v[0]
+ -embs[_mbi].analysis_mv[2][_frame].v[0]+_accum[0],31);
_mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
- 2*embs[_mbi].analysis_mv[1][_frame][1]
- -embs[_mbi].analysis_mv[2][_frame][1]+_accum[1],31);
+ 2*embs[_mbi].analysis_mv[1][_frame].v[1]
+ -embs[_mbi].analysis_mv[2][_frame].v[1]+_accum[1],31);
ncandidates++;
if(i>=embs[_mbi].npneighbors)break;
nmbi=embs[_mbi].pneighbors[i];
@@ -483,57 +483,57 @@
candy=best_vec[1];
embs[_mbi].satd[_frame]=oc_mcenc_ysatd_check_mbcandidate_fullpel(_enc,
frag_buf_offs,fragis,candx,candy,src,ref,ystride);
- embs[_mbi].analysis_mv[0][_frame][0]=(signed char)(candx<<1);
- embs[_mbi].analysis_mv[0][_frame][1]=(signed char)(candy<<1);
+ embs[_mbi].analysis_mv[0][_frame].v[0]=(signed char)(candx<<1);
+ embs[_mbi].analysis_mv[0][_frame].v[1]=(signed char)(candy<<1);
if(_frame==OC_FRAME_PREV){
for(bi=0;bi<4;bi++){
candx=best_block_vec[bi][0];
candy=best_block_vec[bi][1];
embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_check_bcandidate_fullpel(_enc,
frag_buf_offs[fragis[bi]],candx,candy,src,ref,ystride);
- embs[_mbi].block_mv[bi][0]=(signed char)(candx<<1);
- embs[_mbi].block_mv[bi][1]=(signed char)(candy<<1);
+ embs[_mbi].block_mv[bi].v[0]=(signed char)(candx<<1);
+ embs[_mbi].block_mv[bi].v[1]=(signed char)(candy<<1);
}
}
}
void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi){
- oc_mv2 *mvs;
+ oc_mv2_ *mvs;
int accum_p[2];
int accum_g[2];
- mvs=_enc->mb_info[_mbi].analysis_mv;
+ mvs=&_enc->mb_info[_mbi].analysis_mv[0];
if(_enc->prevframe_dropped){
- accum_p[0]=mvs[0][OC_FRAME_PREV][0];
- accum_p[1]=mvs[0][OC_FRAME_PREV][1];
+ accum_p[0]=mvs[0][OC_FRAME_PREV].v[0];
+ accum_p[1]=mvs[0][OC_FRAME_PREV].v[1];
}
else accum_p[1]=accum_p[0]=0;
- accum_g[0]=mvs[2][OC_FRAME_GOLD][0];
- accum_g[1]=mvs[2][OC_FRAME_GOLD][1];
- mvs[0][OC_FRAME_PREV][0]-=mvs[2][OC_FRAME_PREV][0];
- mvs[0][OC_FRAME_PREV][1]-=mvs[2][OC_FRAME_PREV][1];
+ accum_g[0]=mvs[2][OC_FRAME_GOLD].v[0];
+ accum_g[1]=mvs[2][OC_FRAME_GOLD].v[1];
+ mvs[0][OC_FRAME_PREV].v[0]-=mvs[2][OC_FRAME_PREV].v[0];
+ mvs[0][OC_FRAME_PREV].v[1]-=mvs[2][OC_FRAME_PREV].v[1];
/*Move the motion vector predictors back a frame.*/
memmove(mvs+1,mvs,2*sizeof(*mvs));
/*Search the last frame.*/
oc_mcenc_search_frame(_enc,accum_p,_mbi,OC_FRAME_PREV);
- mvs[2][OC_FRAME_PREV][0]=accum_p[0];
- mvs[2][OC_FRAME_PREV][1]=accum_p[1];
+ mvs[2][OC_FRAME_PREV].v[0]=accum_p[0];
+ mvs[2][OC_FRAME_PREV].v[1]=accum_p[1];
/*GOLDEN MVs are different from PREV MVs in that they're each absolute
offsets from some frame in the past rather than relative offsets from the
frame before.
For predictor calculation to make sense, we need them to be in the same
form as PREV MVs.*/
- mvs[1][OC_FRAME_GOLD][0]-=mvs[2][OC_FRAME_GOLD][0];
- mvs[1][OC_FRAME_GOLD][1]-=mvs[2][OC_FRAME_GOLD][1];
- mvs[2][OC_FRAME_GOLD][0]-=accum_g[0];
- mvs[2][OC_FRAME_GOLD][1]-=accum_g[1];
+ mvs[1][OC_FRAME_GOLD].v[0]-=mvs[2][OC_FRAME_GOLD].v[0];
+ mvs[1][OC_FRAME_GOLD].v[1]-=mvs[2][OC_FRAME_GOLD].v[1];
+ mvs[2][OC_FRAME_GOLD].v[0]-=accum_g[0];
+ mvs[2][OC_FRAME_GOLD].v[1]-=accum_g[1];
/*Search the golden frame.*/
oc_mcenc_search_frame(_enc,accum_g,_mbi,OC_FRAME_GOLD);
/*Put GOLDEN MVs back into absolute offset form.
The newest MV is already an absolute offset.*/
- mvs[2][OC_FRAME_GOLD][0]+=accum_g[0];
- mvs[2][OC_FRAME_GOLD][1]+=accum_g[1];
- mvs[1][OC_FRAME_GOLD][0]+=mvs[2][OC_FRAME_GOLD][0];
- mvs[1][OC_FRAME_GOLD][1]+=mvs[2][OC_FRAME_GOLD][1];
+ mvs[2][OC_FRAME_GOLD].v[0]+=accum_g[0];
+ mvs[2][OC_FRAME_GOLD].v[1]+=accum_g[1];
+ mvs[1][OC_FRAME_GOLD].v[0]+=mvs[2][OC_FRAME_GOLD].v[0];
+ mvs[1][OC_FRAME_GOLD].v[1]+=mvs[2][OC_FRAME_GOLD].v[1];
}
#if 0
@@ -650,12 +650,12 @@
oc_mb_enc_info *embs;
int vec[2];
embs=_enc->mb_info;
- vec[0]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][0]);
- vec[1]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame][1]);
+ vec[0]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame].v[0]);
+ vec[1]=OC_DIV2(embs[_mbi].analysis_mv[0][_frame].v[1]);
embs[_mbi].satd[_frame]=oc_mcenc_ysatd_halfpel_mbrefine(_enc,
_mbi,vec,embs[_mbi].satd[_frame],_frame);
- embs[_mbi].analysis_mv[0][_frame][0]=(signed char)vec[0];
- embs[_mbi].analysis_mv[0][_frame][1]=(signed char)vec[1];
+ embs[_mbi].analysis_mv[0][_frame].v[0]=(signed char)vec[0];
+ embs[_mbi].analysis_mv[0][_frame].v[1]=(signed char)vec[1];
}
#if 0
@@ -766,11 +766,11 @@
ptrdiff_t frag_offs;
int vec[2];
frag_offs=frag_buf_offs[fragis[bi]];
- vec[0]=OC_DIV2(embs[_mbi].block_mv[bi][0]);
- vec[1]=OC_DIV2(embs[_mbi].block_mv[bi][1]);
+ vec[0]=OC_DIV2(embs[_mbi].block_mv[bi].v[0]);
+ vec[1]=OC_DIV2(embs[_mbi].block_mv[bi].v[1]);
embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_halfpel_brefine(_enc,vec,
src+frag_offs,ref+frag_offs,ystride,offset_y,embs[_mbi].block_satd[bi]);
- embs[_mbi].ref_mv[bi][0]=(signed char)vec[0];
- embs[_mbi].ref_mv[bi][1]=(signed char)vec[1];
+ embs[_mbi].ref_mv[bi].v[0]=(signed char)vec[0];
+ embs[_mbi].ref_mv[bi].v[1]=(signed char)vec[1];
}
}
Modified: branches/theorarm-merge-branch/lib/state.c
===================================================================
--- branches/theorarm-merge-branch/lib/state.c 2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/state.c 2010-04-24 14:22:55 UTC (rev 17173)
@@ -910,7 +910,7 @@
_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
+frag_buf_off;
if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
- _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){
+ _state->frag_mvs[_fragi].v[0],_state->frag_mvs[_fragi].v[1])>1){
oc_frag_recon_inter2(_state,
dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs);
}
@@ -952,7 +952,7 @@
}
}
-static void loop_filter_h(unsigned char *_pix,int _ystride,int *_bv){
+static void loop_filter_h(unsigned char *_pix,int _ystride,signed char *_bv){
int y;
_pix-=2;
for(y=0;y<8;y++){
@@ -968,7 +968,7 @@
}
}
-static void loop_filter_v(unsigned char *_pix,int _ystride,int *_bv){
+static void loop_filter_v(unsigned char *_pix,int _ystride,signed char *_bv){
int x;
_pix-=_ystride*2;
for(x=0;x<8;x++){
@@ -986,7 +986,7 @@
/*Initialize the bounding values array used by the loop filter.
_bv: Storage for the array.
Return: 0 on success, or a non-zero value if no filtering need be applied.*/
-int oc_state_loop_filter_init(oc_theora_state *_state,int _bv[256]){
+int oc_state_loop_filter_init(oc_theora_state *_state,signed char _bv[256]){
int flimit;
int i;
flimit=_state->loop_filter_limits[_state->qis[0]];
@@ -1009,13 +1009,13 @@
_pli: The color plane to filter.
_fragy0: The Y coordinate of the first fragment row to filter.
_fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
-void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,int _bv[256],
+void oc_state_loop_filter_frag_rows(const oc_theora_state *_state,signed char *_bv,
int _refi,int _pli,int _fragy0,int _fragy_end){
_state->opt_vtable.state_loop_filter_frag_rows(_state,_bv,_refi,_pli,
_fragy0,_fragy_end);
}
-void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,int *_bv,
+void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,signed char *_bv,
int _refi,int _pli,int _fragy0,int _fragy_end){
const oc_fragment_plane *fplane;
const oc_fragment *frags;
Modified: branches/theorarm-merge-branch/lib/x86/mmxstate.c
===================================================================
--- branches/theorarm-merge-branch/lib/x86/mmxstate.c 2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/x86/mmxstate.c 2010-04-24 14:22:55 UTC (rev 17173)
@@ -86,7 +86,7 @@
_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
+frag_buf_off;
if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
- _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){
+ _state->frag_mvs[_fragi].v[0],_state->frag_mvs[_fragi].v[1])>1){
oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
_dct_coeffs);
}
@@ -133,7 +133,7 @@
_fragy0: The Y coordinate of the first fragment row to filter.
_fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
- int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
+ signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end){
OC_ALIGN8(unsigned char ll[8]);
const oc_fragment_plane *fplane;
const oc_fragment *frags;
Modified: branches/theorarm-merge-branch/lib/x86/x86int.h
===================================================================
--- branches/theorarm-merge-branch/lib/x86/x86int.h 2010-04-24 14:19:47 UTC (rev 17172)
+++ branches/theorarm-merge-branch/lib/x86/x86int.h 2010-04-24 14:22:55 UTC (rev 17173)
@@ -36,7 +36,7 @@
const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
int _dst_frame,int _src_frame,int _pli);
void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
- int _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
+ signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end);
void oc_restore_fpu_mmx(void);
#endif
More information about the commits
mailing list