[xiph-commits] r16211 - in branches/theora-thusnelda: . lib/dec
tterribe at svn.xiph.org
tterribe at svn.xiph.org
Mon Jul 6 18:43:19 PDT 2009
Author: tterribe
Date: 2009-07-06 18:43:19 -0700 (Mon, 06 Jul 2009)
New Revision: 16211
Modified:
branches/theora-thusnelda/AUTHORS
branches/theora-thusnelda/lib/dec/decint.h
branches/theora-thusnelda/lib/dec/decode.c
Log:
Remove some indirections in token decode.
Patch by Simon Hosie.
This gives a 2.2% speed-up on x86-32, and a 1.2% speed-up on x86-64.
Modified: branches/theora-thusnelda/AUTHORS
===================================================================
--- branches/theora-thusnelda/AUTHORS 2009-07-06 20:36:28 UTC (rev 16210)
+++ branches/theora-thusnelda/AUTHORS 2009-07-07 01:43:19 UTC (rev 16211)
@@ -21,6 +21,7 @@
Conrad Parker
Cristian Adam
Sebastian Pippin
+Simon Hosie
- Bug fixes, enhancements, build systems.
Mauricio Piacentini
Modified: branches/theora-thusnelda/lib/dec/decint.h
===================================================================
--- branches/theora-thusnelda/lib/dec/decint.h 2009-07-06 20:36:28 UTC (rev 16210)
+++ branches/theora-thusnelda/lib/dec/decint.h 2009-07-07 01:43:19 UTC (rev 16211)
@@ -56,21 +56,22 @@
oggpack_buffer opb;
/*Huffman decode trees.*/
oc_huff_node *huff_tables[TH_NHUFFMAN_TABLES];
- /*The index of one past the last token in each plane for each coefficient.
- The final entries are the total number of tokens for each coefficient.*/
+ /*The index of the first token in each plane for each coefficient.*/
ptrdiff_t ti0[3][64];
- /*The index of one past the last extra bits entry in each plane for each
- coefficient.
- The final entries are the total number of extra bits entries for each
+ /*The index of the first extra bits entry in each plane for each
coefficient.*/
ptrdiff_t ebi0[3][64];
/*The number of outstanding EOB runs at the start of each coefficient in each
plane.*/
ptrdiff_t eob_runs[3][64];
/*The DCT token lists.*/
- unsigned char **dct_tokens;
+ unsigned char *dct_tokens;
/*The extra bits associated with DCT tokens.*/
- ogg_uint16_t **extra_bits;
+ ogg_uint16_t *extra_bits;
+ /*The number of dct tokens unpacked so far.*/
+ int dct_tokens_count;
+ /*The number of extra bits entries unpacked so far.*/
+ int extra_bits_count;
/*The out-of-loop post-processing level.*/
int pp_level;
/*The DC scale used for out-of-loop deblocking.*/
Modified: branches/theora-thusnelda/lib/dec/decode.c
===================================================================
--- branches/theora-thusnelda/lib/dec/decode.c 2009-07-06 20:36:28 UTC (rev 16210)
+++ branches/theora-thusnelda/lib/dec/decode.c 2009-07-07 01:43:19 UTC (rev 16211)
@@ -186,10 +186,10 @@
}
_dec->pp_sharp_mod[qi]=-(qsum>>11);
}
- _dec->dct_tokens=(unsigned char **)oc_calloc_2d(64,
- _dec->state.nfrags,sizeof(_dec->dct_tokens[0][0]));
- _dec->extra_bits=(ogg_uint16_t **)oc_calloc_2d(64,
- _dec->state.nfrags,sizeof(_dec->extra_bits[0][0]));
+ _dec->dct_tokens=(unsigned char *)_ogg_malloc(64*
+ _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
+ _dec->extra_bits=(ogg_uint16_t *)_ogg_malloc(64*
+ _dec->state.nfrags*sizeof(_dec->extra_bits[0]));
memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
sizeof(_dec->state.loop_filter_limits));
_dec->pp_level=OC_PP_LEVEL_DISABLED;
@@ -865,8 +865,8 @@
ptrdiff_t ti;
ptrdiff_t ebi;
int pli;
- dct_tokens=_dec->dct_tokens[0];
- extra_bits=_dec->extra_bits[0];
+ dct_tokens=_dec->dct_tokens;
+ extra_bits=_dec->extra_bits;
frags=_dec->state.frags;
coded_fragis=_dec->state.coded_fragis;
ncoded_fragis=fragii=eobs=ti=ebi=0;
@@ -878,6 +878,8 @@
ncoded_fragis+=_dec->state.ncoded_fragis[pli];
memset(run_counts,0,sizeof(run_counts));
_dec->eob_runs[pli][0]=eobs;
+ _dec->ti0[pli][0]=ti;
+ _dec->ebi0[pli][0]=ebi;
/*Continue any previous EOB run, if there was one.*/
eobi=eobs;
if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
@@ -914,8 +916,6 @@
frags[coded_fragis[fragii++]].dc=oc_dct_token_dec1val(token,eb);
}
}
- _dec->ti0[pli][0]=ti;
- _dec->ebi0[pli][0]=ebi;
/*Add the total EOB count to the longest run length.*/
run_counts[63]+=eob_count;
/*And convert the run_counts array to a moment table.*/
@@ -924,6 +924,8 @@
accounted for by runs started in this coefficient.*/
for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
}
+ _dec->dct_tokens_count=ti;
+ _dec->extra_bits_count=ebi;
return eobs;
}
@@ -944,9 +946,10 @@
ptrdiff_t ti;
ptrdiff_t ebi;
int pli;
- dct_tokens=_dec->dct_tokens[_zzi];
- extra_bits=_dec->extra_bits[_zzi];
- ti=ebi=0;
+ dct_tokens=_dec->dct_tokens;
+ extra_bits=_dec->extra_bits;
+ ti=_dec->dct_tokens_count;
+ ebi=_dec->extra_bits_count;
for(pli=0;pli<3;pli++){
ptrdiff_t run_counts[64];
ptrdiff_t ntoks_left;
@@ -954,6 +957,8 @@
ptrdiff_t ntoks;
int rli;
_dec->eob_runs[pli][_zzi]=_eobs;
+ _dec->ti0[pli][_zzi]=ti;
+ _dec->ebi0[pli][_zzi]=ebi;
ntoks_left=_ntoks_left[pli][_zzi];
memset(run_counts,0,sizeof(run_counts));
eob_count=0;
@@ -984,8 +989,6 @@
_eobs=0;
}
}
- _dec->ti0[pli][_zzi]=ti;
- _dec->ebi0[pli][_zzi]=ebi;
/*Add the portion of the last EOB run actually used by this coefficient.*/
eob_count+=ntoks_left-ntoks;
/*And remove it from the remaining EOB count.*/
@@ -998,6 +1001,8 @@
accounted for by runs started in this coefficient.*/
for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
}
+ _dec->dct_tokens_count=ti;
+ _dec->extra_bits_count=ebi;
return _eobs;
}
@@ -1332,12 +1337,8 @@
_pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
/*Initialize the token and extra bits indices for each plane and
coefficient.*/
- memset(_pipe->ti[0],0,sizeof(_pipe->ti[0]));
- memset(_pipe->ebi[0],0,sizeof(_pipe->ebi[0]));
- for(pli=1;pli<3;pli++){
- memcpy(_pipe->ti[pli],_dec->ti0[pli-1],sizeof(_pipe->ti[0]));
- memcpy(_pipe->ebi[pli],_dec->ebi0[pli-1],sizeof(_pipe->ebi[0]));
- }
+ memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
+ memcpy(_pipe->ebi,_dec->ebi0,sizeof(_pipe->ebi));
/*Also copy over the initial the EOB run counts.*/
memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
/*Set up per-plane pointers to the coded and uncoded fragments lists.*/
@@ -1430,6 +1431,8 @@
counts.*/
static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
oc_dec_pipeline_state *_pipe,int _pli){
+ unsigned char *dct_tokens;
+ ogg_uint16_t *extra_bits;
ogg_uint16_t dc_quant[2];
const oc_fragment *frags;
const ptrdiff_t *coded_fragis;
@@ -1439,6 +1442,8 @@
ptrdiff_t *ebi;
ptrdiff_t *eob_runs;
int qti;
+ dct_tokens=_dec->dct_tokens;
+ extra_bits=_dec->extra_bits;
frags=_dec->state.frags;
coded_fragis=_pipe->coded_fragis[_pli];
ncoded_fragis=_pipe->ncoded_fragis[_pli];
@@ -1465,9 +1470,9 @@
}
else{
int ebflag;
- token=_dec->dct_tokens[zzi][ti[zzi]++];
+ token=dct_tokens[ti[zzi]++];
ebflag=OC_DCT_TOKEN_EXTRA_BITS[token]!=0;
- eb=_dec->extra_bits[zzi][ebi[zzi]]&-ebflag;
+ eb=extra_bits[ebi[zzi]]&-ebflag;
ebi[zzi]+=ebflag;
if(token<OC_NDCT_EOB_TOKEN_MAX){
eob_runs[zzi]=-oc_dct_token_skip(token,eb);
More information about the commits
mailing list