[xiph-commits] r16256 - branches/theora-gumboot/lib/dec

gumboot at svn.xiph.org gumboot at svn.xiph.org
Fri Jul 10 17:22:29 PDT 2009


Author: gumboot
Date: 2009-07-10 17:22:29 -0700 (Fri, 10 Jul 2009)
New Revision: 16256

Modified:
   branches/theora-gumboot/lib/dec/decint.h
   branches/theora-gumboot/lib/dec/decode.c
   branches/theora-gumboot/lib/dec/huffdec.c
Log:
experimental DCT token renumbering and huffman table extension... just like it said on the tin.


Modified: branches/theora-gumboot/lib/dec/decint.h
===================================================================
--- branches/theora-gumboot/lib/dec/decint.h	2009-07-10 23:40:41 UTC (rev 16255)
+++ branches/theora-gumboot/lib/dec/decint.h	2009-07-11 00:22:29 UTC (rev 16256)
@@ -58,20 +58,15 @@
   oc_huff_node        *huff_tables[TH_NHUFFMAN_TABLES];
   /*The index of the first token in each plane for each coefficient.*/
   ptrdiff_t            ti0[3][64];
-  /*The index of the first extra bits entry in each plane for each
-     coefficient.*/
-  ptrdiff_t            ebi0[3][64];
   /*The number of outstanding EOB runs at the start of each coefficient in each
      plane.*/
   ptrdiff_t            eob_runs[3][64];
   /*The DCT token lists.*/
   unsigned char       *dct_tokens;
   /*The extra bits associated with DCT tokens.*/
-  ogg_uint16_t        *extra_bits;
+  unsigned char       *extra_bits;
   /*The number of dct tokens unpacked so far.*/
   int                  dct_tokens_count;
-  /*The number of extra bits entries unpacked so far.*/
-  int                  extra_bits_count;
   /*The out-of-loop post-processing level.*/
   int                  pp_level;
   /*The DC scale used for out-of-loop deblocking.*/

Modified: branches/theora-gumboot/lib/dec/decode.c
===================================================================
--- branches/theora-gumboot/lib/dec/decode.c	2009-07-10 23:40:41 UTC (rev 16255)
+++ branches/theora-gumboot/lib/dec/decode.c	2009-07-11 00:22:29 UTC (rev 16256)
@@ -93,6 +93,136 @@
 };
 
 
+/*The number of extra bits that are coded with each of the new DCT tokens.*/
+static const unsigned char OC_DCT_NEW_TOKEN_EXTRA_BITS[16]={
+  0,6,3,3,
+  8,8,8,8,
+  3,3,4,4,
+  5,5,4,12
+};
+
+/*The original DCT tokens are extended and reordered during the construction of
+   the Huffman tables.  This revised ordering reveals essential information in
+   the token value itself; specifically, whether or not there are extra bits to
+   read and the parameter to which those extra bits are applied.  The token is
+   used to fetch a code word from the following table.  The extra bits are
+   added into code word at the bit position inferred from the token value and
+   then optionally negated, according to the 'flip' bit, giving the final code
+   word from which all the required parameters are derived. */
+#define OC_DCT_TOKEN_NEEDS_MORE(token) (token<(sizeof(OC_DCT_NEW_TOKEN_EXTRA_BITS)/sizeof(*OC_DCT_NEW_TOKEN_EXTRA_BITS)))
+#define OC_DCT_TOKEN_EB_POS(token) ((token)>=14?0:(token)>=4?OC_DCT_CW_MAG_SHIFT:OC_DCT_CW_RLEN_SHIFT)
+#define OC_DCT_TOKEN_FAT_EOB      15
+#define OC_DCT_CW_MAG_SHIFT       21
+#define OC_DCT_CW_FLIP_BIT        20
+#define OC_DCT_CW_RLEN_SHIFT      12
+#define OC_DCT_CW_FINISH          0
+#define OC_DCT_CW_PACK(eobs,rlen,mag,flip) (0\
+    |(eobs) \
+    |((rlen)<<OC_DCT_CW_RLEN_SHIFT)\
+    |((flip)<<OC_DCT_CW_FLIP_BIT)\
+    |(((mag)-(flip))<<OC_DCT_CW_MAG_SHIFT))
+static const int OC_DCT_CODE_WORD[96]={
+ 0,
+ OC_DCT_CW_PACK(0, 0,  0, 1),
+ OC_DCT_CW_PACK(0, 10, +1, 0),
+ OC_DCT_CW_PACK(0, 10, -1, 0),
+ OC_DCT_CW_PACK(0, 0,  69, 0),
+ OC_DCT_CW_PACK(0, 0, 325, 0),
+ OC_DCT_CW_PACK(0, 0,  69, 1),
+ OC_DCT_CW_PACK(0, 0, 325, 1),
+ OC_DCT_CW_PACK(0, 0, 13, 0),
+ OC_DCT_CW_PACK(0, 0, 13, 1),
+ OC_DCT_CW_PACK(0, 0, 21, 0),
+ OC_DCT_CW_PACK(0, 0, 21, 1),
+ OC_DCT_CW_PACK(0, 0, 37, 0),
+ OC_DCT_CW_PACK(0, 0, 37, 1),
+ OC_DCT_CW_PACK(16, 0, 0, 0),
+ OC_DCT_CW_FINISH,           
+ OC_DCT_CW_FINISH,           
+ OC_DCT_CW_PACK(1, 0, 0, 0),
+ OC_DCT_CW_PACK(2, 0, 0, 0),
+ OC_DCT_CW_PACK(3, 0, 0, 0),
+ OC_DCT_CW_PACK(4, 0, 0, 0),
+ OC_DCT_CW_PACK(5, 0, 0, 0),
+ OC_DCT_CW_PACK(6, 0, 0, 0),
+ OC_DCT_CW_PACK(7, 0, 0, 0),
+ OC_DCT_CW_PACK(8, 0, 0, 0),
+ OC_DCT_CW_PACK(9, 0, 0, 0),
+ OC_DCT_CW_PACK(10, 0, 0, 0),
+ OC_DCT_CW_PACK(11, 0, 0, 0),
+ OC_DCT_CW_PACK(12, 0, 0, 0),
+ OC_DCT_CW_PACK(13, 0, 0, 0),
+ OC_DCT_CW_PACK(14, 0, 0, 0),
+ OC_DCT_CW_PACK(15, 0, 0, 0),
+ OC_DCT_CW_PACK(0, 0,  0, 1),
+ OC_DCT_CW_PACK(0, 1,  0, 0),
+ OC_DCT_CW_PACK(0, 2,  0, 0),
+ OC_DCT_CW_PACK(0, 3,  0, 0),
+ OC_DCT_CW_PACK(0, 4,  0, 0),
+ OC_DCT_CW_PACK(0, 5,  0, 0),
+ OC_DCT_CW_PACK(0, 6,  0, 0),
+ OC_DCT_CW_PACK(0, 7,  0, 0),
+ OC_DCT_CW_PACK(0, 0, +1, 0),
+ OC_DCT_CW_PACK(0, 0, -1, 0),
+ OC_DCT_CW_PACK(0, 0, +2, 0),
+ OC_DCT_CW_PACK(0, 0, -2, 0),
+ OC_DCT_CW_PACK(0, 0, +3, 0),
+ OC_DCT_CW_PACK(0, 0, -3, 0),
+ OC_DCT_CW_PACK(0, 0, +4, 0),
+ OC_DCT_CW_PACK(0, 0, -4, 0),
+ OC_DCT_CW_PACK(0, 0, +5, 0),
+ OC_DCT_CW_PACK(0, 0, -5, 0),
+ OC_DCT_CW_PACK(0, 0, +6, 0),
+ OC_DCT_CW_PACK(0, 0, -6, 0),
+ OC_DCT_CW_PACK(0, 0, +7, 0),
+ OC_DCT_CW_PACK(0, 0, +8, 0),
+ OC_DCT_CW_PACK(0, 0, -7, 0),
+ OC_DCT_CW_PACK(0, 0, -8, 0),
+ OC_DCT_CW_PACK(0, 0, +9, 0),
+ OC_DCT_CW_PACK(0, 0,+10, 0),
+ OC_DCT_CW_PACK(0, 0,+11, 0),
+ OC_DCT_CW_PACK(0, 0,+12, 0),
+ OC_DCT_CW_PACK(0, 0, -9, 0),
+ OC_DCT_CW_PACK(0, 0,-10, 0),
+ OC_DCT_CW_PACK(0, 0,-11, 0),
+ OC_DCT_CW_PACK(0, 0,-12, 0),
+ OC_DCT_CW_PACK(0, 6, +1, 0),
+ OC_DCT_CW_PACK(0, 7, +1, 0),
+ OC_DCT_CW_PACK(0, 8, +1, 0),
+ OC_DCT_CW_PACK(0, 9, +1, 0),
+ OC_DCT_CW_PACK(0, 6, -1, 0),
+ OC_DCT_CW_PACK(0, 7, -1, 0),
+ OC_DCT_CW_PACK(0, 8, -1, 0),
+ OC_DCT_CW_PACK(0, 9, -1, 0),
+ OC_DCT_CW_PACK(0, 2, +2, 0),
+ OC_DCT_CW_PACK(0, 3, +2, 0),
+ OC_DCT_CW_PACK(0, 2, +3, 0),
+ OC_DCT_CW_PACK(0, 3, +3, 0),
+ OC_DCT_CW_PACK(0, 2, -2, 0),
+ OC_DCT_CW_PACK(0, 3, -2, 0),
+ OC_DCT_CW_PACK(0, 2, -3, 0),
+ OC_DCT_CW_PACK(0, 3, -3, 0),
+ OC_DCT_CW_PACK(0, 1, +2, 0),
+ OC_DCT_CW_PACK(0, 1, +3, 0),
+ OC_DCT_CW_PACK(0, 1, -2, 0),
+ OC_DCT_CW_PACK(0, 1, -3, 0),
+ OC_DCT_CW_PACK(0, 1, +1, 0),
+ OC_DCT_CW_PACK(0, 1, -1, 0),
+ OC_DCT_CW_PACK(0, 2, +1, 0),
+ OC_DCT_CW_PACK(0, 2, -1, 0),
+ OC_DCT_CW_PACK(0, 3, +1, 0),
+ OC_DCT_CW_PACK(0, 3, -1, 0),
+ OC_DCT_CW_PACK(0, 4, +1, 0),
+ OC_DCT_CW_PACK(0, 4, -1, 0),
+ OC_DCT_CW_PACK(0, 5, +1, 0),
+ OC_DCT_CW_PACK(0, 5, -1, 0),
+ OC_DCT_CW_FINISH,           
+ OC_DCT_CW_FINISH,           
+ OC_DCT_CW_FINISH,           
+};
+#undef OC_DCT_CW_PACK
+
+
 static int oc_sb_run_unpack(oggpack_buffer *_opb){
   long bits;
   int ret;
@@ -186,10 +316,8 @@
     }
     _dec->pp_sharp_mod[qi]=-(qsum>>11);
   }
-  _dec->dct_tokens=(unsigned char *)_ogg_malloc(64*
+  _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
    _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
-  _dec->extra_bits=(ogg_uint16_t *)_ogg_malloc(64*
-   _dec->state.nfrags*sizeof(_dec->extra_bits[0]));
   memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
    sizeof(_dec->state.loop_filter_limits));
   _dec->pp_level=OC_PP_LEVEL_DISABLED;
@@ -214,7 +342,6 @@
   _ogg_free(_dec->pp_frame_data);
   _ogg_free(_dec->variances);
   _ogg_free(_dec->dc_qis);
-  oc_free_2d(_dec->extra_bits);
   oc_free_2d(_dec->dct_tokens);
   oc_huff_trees_clear(_dec->huff_tables);
   oc_state_clear(&_dec->state);
@@ -744,107 +871,6 @@
 
 
 
-/*Returns the decoded value of the first coefficient produced by the given
-   token.
-  It CANNOT be called for any of the EOB tokens.
-  _token:      The token value to skip.
-  _extra_bits: The extra bits attached to this token.
-  Return: The decoded coefficient value.*/
-typedef int (*oc_token_dec1val_func)(int _token,int _extra_bits);
-
-/*We want to avoid accessing arrays of constants in these functions, because
-   we take the address of them, which means that when compiling with -fPIC,
-   an expensive prolog is added to set up the PIC register in any functions
-   which access a global symbol (even if it has file scope or smaller).
-  Thus a lot of what would be tables are packed into 32-bit constants.*/
-
-/*Handles zero run tokens.*/
-static int oc_token_dec1val_zrl(void){
-  return 0;
-}
-
-/*Handles 1, -1, 2 and -2 tokens.*/
-static int oc_token_dec1val_const(int _token){
-  return OC_BYTE_TABLE32(1,-1,2,-2,_token-OC_NDCT_ZRL_TOKEN_MAX);
-}
-
-/*Handles DCT value tokens category 2.*/
-static int oc_token_dec1val_cat2(int _token,int _extra_bits){
-  int mask;
-  mask=-_extra_bits;
-  return _token-OC_DCT_VAL_CAT2+3+mask^mask;
-}
-
-/*Handles DCT value tokens categories 3 through 6.*/
-static int oc_token_dec1val_cat3_6(int _token,int _extra_bits){
-  int cati;
-  int mask;
-  int val_cat_offs;
-  int val_cat_shift;
-  cati=_token-OC_DCT_VAL_CAT3;
-  val_cat_shift=cati+1;
-  mask=-(_extra_bits>>val_cat_shift);
-  _extra_bits&=(1<<val_cat_shift)-1;
-  val_cat_offs=OC_BYTE_TABLE32(7,9,13,21,cati);
-  return val_cat_offs+_extra_bits+mask^mask;
-}
-
-/*Handles DCT value tokens categories 7 through 8.*/
-static int oc_token_dec1val_cat7_8(int _token,int _extra_bits){
-  int cati;
-  int mask;
-  int val_cat_offs;
-  int val_cat_shift;
-  cati=_token-OC_DCT_VAL_CAT7;
-  val_cat_shift=5+(cati<<2);
-  mask=-(_extra_bits>>val_cat_shift);
-  _extra_bits&=(1<<val_cat_shift)-1;
-  val_cat_offs=37+(cati<<5);
-  return val_cat_offs+_extra_bits+mask^mask;
-}
-
-/*A jump table for computing the first coefficient value the given token value
-   represents.*/
-static const oc_token_dec1val_func OC_TOKEN_DEC1VAL_TABLE[TH_NDCT_TOKENS-
- OC_NDCT_EOB_TOKEN_MAX]={
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_const,
-  (oc_token_dec1val_func)oc_token_dec1val_const,
-  (oc_token_dec1val_func)oc_token_dec1val_const,
-  (oc_token_dec1val_func)oc_token_dec1val_const,
-  oc_token_dec1val_cat2,
-  oc_token_dec1val_cat2,
-  oc_token_dec1val_cat2,
-  oc_token_dec1val_cat2,
-  oc_token_dec1val_cat3_6,
-  oc_token_dec1val_cat3_6,
-  oc_token_dec1val_cat3_6,
-  oc_token_dec1val_cat3_6,
-  oc_token_dec1val_cat7_8,
-  oc_token_dec1val_cat7_8,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl,
-  (oc_token_dec1val_func)oc_token_dec1val_zrl
-};
-
-/*Returns the decoded value of the first coefficient produced by the given
-   token.
-  It CANNOT be called for any of the EOB tokens.
-  _token:      The token value to skip.
-  _extra_bits: The extra bits attached to this token.
-  Return: The decoded coefficient value.*/
-static int oc_dct_token_dec1val(int _token,int _extra_bits){
-  return (*OC_TOKEN_DEC1VAL_TABLE[_token-OC_NDCT_EOB_TOKEN_MAX])(_token,
-   _extra_bits);
-}
-
 /*Unpacks the DC coefficient tokens.
   Unlike when unpacking the AC coefficient tokens, we actually need to decode
    the DC coefficient values now so that we can do DC prediction.
@@ -856,20 +882,17 @@
 static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
  ptrdiff_t _ntoks_left[3][64]){
   unsigned char   *dct_tokens;
-  ogg_uint16_t    *extra_bits;
   oc_fragment     *frags;
   const ptrdiff_t *coded_fragis;
   ptrdiff_t        ncoded_fragis;
   ptrdiff_t        fragii;
   ptrdiff_t        eobs;
   ptrdiff_t        ti;
-  ptrdiff_t        ebi;
   int              pli;
   dct_tokens=_dec->dct_tokens;
-  extra_bits=_dec->extra_bits;
   frags=_dec->state.frags;
   coded_fragis=_dec->state.coded_fragis;
-  ncoded_fragis=fragii=eobs=ti=ebi=0;
+  ncoded_fragis=fragii=eobs=ti=0;
   for(pli=0;pli<3;pli++){
     ptrdiff_t run_counts[64];
     ptrdiff_t eob_count;
@@ -879,7 +902,6 @@
     memset(run_counts,0,sizeof(run_counts));
     _dec->eob_runs[pli][0]=eobs;
     _dec->ti0[pli][0]=ti;
-    _dec->ebi0[pli][0]=ebi;
     /*Continue any previous EOB run, if there was one.*/
     eobi=eobs;
     if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
@@ -888,32 +910,39 @@
     while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
     while(fragii<ncoded_fragis){
       int token;
-      int neb;
+      int cw;
       int eb;
       int skip;
       token=oc_huff_token_decode(&_dec->opb,
        _dec->huff_tables[_huff_idxs[pli+1>>1]]);
       dct_tokens[ti++]=(unsigned char)token;
-      neb=OC_DCT_TOKEN_EXTRA_BITS[token];
-      if(neb){
+      if(OC_DCT_TOKEN_NEEDS_MORE(token)){
+        int neb=OC_DCT_NEW_TOKEN_EXTRA_BITS[token];
         long val;
         theorapackB_read(&_dec->opb,neb,&val);
         eb=(int)val;
-        extra_bits[ebi++]=(ogg_uint16_t)eb;
+        dct_tokens[ti++]=(unsigned char)eb;
+        if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
       }
       else eb=0;
-      skip=oc_dct_token_skip(token,eb);
-      if(skip<0){
-        eobs=eobi=-skip;
-        if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
+      cw=OC_DCT_CODE_WORD[token];
+      cw+=eb<<OC_DCT_TOKEN_EB_POS(token);
+      eobs=cw&0xfff;
+      if(cw==OC_DCT_CW_FINISH)eobs=(~(size_t)0>>1);
+      if(eobs){
+        eobi=OC_MINI(eobs,ncoded_fragis-fragii); 
         eob_count+=eobi;
         eobs-=eobi;
         while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
       }
       else{
-        run_counts[skip-1]++;
-        eobs=0;
-        frags[coded_fragis[fragii++]].dc=oc_dct_token_dec1val(token,eb);
+        int coeff;
+        skip=(cw>>OC_DCT_CW_RLEN_SHIFT)&127;
+        cw^=-(cw&(1<<OC_DCT_CW_FLIP_BIT));
+        coeff=(cw>>OC_DCT_CW_MAG_SHIFT);
+        if(skip)coeff=0;
+        run_counts[skip]++;
+        frags[coded_fragis[fragii++]].dc=coeff;
       }
     }
     /*Add the total EOB count to the longest run length.*/
@@ -925,7 +954,6 @@
     for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
   }
   _dec->dct_tokens_count=ti;
-  _dec->extra_bits_count=ebi;
   return eobs;
 }
 
@@ -942,14 +970,10 @@
 static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
  ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
   unsigned char *dct_tokens;
-  ogg_uint16_t  *extra_bits;
   ptrdiff_t      ti;
-  ptrdiff_t      ebi;
   int            pli;
   dct_tokens=_dec->dct_tokens;
-  extra_bits=_dec->extra_bits;
   ti=_dec->dct_tokens_count;
-  ebi=_dec->extra_bits_count;
   for(pli=0;pli<3;pli++){
     ptrdiff_t run_counts[64];
     ptrdiff_t ntoks_left;
@@ -958,14 +982,13 @@
     int       rli;
     _dec->eob_runs[pli][_zzi]=_eobs;
     _dec->ti0[pli][_zzi]=ti;
-    _dec->ebi0[pli][_zzi]=ebi;
     ntoks_left=_ntoks_left[pli][_zzi];
     memset(run_counts,0,sizeof(run_counts));
     eob_count=0;
     ntoks=0;
     while(ntoks+_eobs<ntoks_left){
       int token;
-      int neb;
+      int cw;
       int eb;
       int skip;
       ntoks+=_eobs;
@@ -973,20 +996,23 @@
       token=oc_huff_token_decode(&_dec->opb,
        _dec->huff_tables[_huff_idxs[pli+1>>1]]);
       dct_tokens[ti++]=(unsigned char)token;
-      neb=OC_DCT_TOKEN_EXTRA_BITS[token];
-      if(neb){
+      if(OC_DCT_TOKEN_NEEDS_MORE(token)){
+        int neb=OC_DCT_NEW_TOKEN_EXTRA_BITS[token];
         long val;
         theorapackB_read(&_dec->opb,neb,&val);
         eb=(int)val;
-        extra_bits[ebi++]=(ogg_uint16_t)eb;
+        dct_tokens[ti++]=(unsigned char)eb;
+        if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
       }
       else eb=0;
-      skip=oc_dct_token_skip(token,eb);
-      if(skip<0)_eobs=-skip;
-      else{
-        run_counts[skip-1]++;
+      cw=OC_DCT_CODE_WORD[token];
+      cw+=eb<<OC_DCT_TOKEN_EB_POS(token);
+      skip=((cw)>>OC_DCT_CW_RLEN_SHIFT)&127;
+      _eobs=cw&0xfff;
+      if(cw==OC_DCT_CW_FINISH)_eobs=(~(size_t)0>>1);
+      if(_eobs==0){
+        run_counts[skip]++;
         ntoks++;
-        _eobs=0;
       }
     }
     /*Add the portion of the last EOB run actually used by this coefficient.*/
@@ -1002,7 +1028,6 @@
     for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
   }
   _dec->dct_tokens_count=ti;
-  _dec->extra_bits_count=ebi;
   return _eobs;
 }
 
@@ -1066,141 +1091,6 @@
 }
 
 
-
-/*Expands a single token into the given coefficient list.
-  This fills in the zeros for zero runs as well as coefficient values, and
-   updates the index of the current coefficient.
-  It CANNOT be called for any of the EOB tokens.
-  _token:      The token value to expand.
-  _extra_bits: The extra bits associated with the token.
-  _dct_coeffs: The current list of coefficients, in zig-zag order.
-  _zzi:        The zig-zag index of the next coefficient to write to.
-  Return: The updated index of the next coefficient to write to.*/
-typedef int (*oc_token_expand_func)(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int _zzi);
-
-/*Expands a zero run token.*/
-static int oc_token_expand_zrl(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int _zzi){
-  do _dct_coeffs[_zzi++]=0;
-  while(_extra_bits-->0);
-  return _zzi;
-}
-
-/*Expands a constant, single-value token.*/
-static int oc_token_expand_const(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int _zzi){
-  _dct_coeffs[_zzi++]=(ogg_int16_t)oc_token_dec1val_const(_token);
-  return _zzi;
-}
-
-/*Expands category 2 single-valued tokens.*/
-static int oc_token_expand_cat2(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int _zzi){
-  _dct_coeffs[_zzi++]=(ogg_int16_t)oc_token_dec1val_cat2(_token,_extra_bits);
-  return _zzi;
-}
-
-/*Expands category 3 through 6 single-valued tokens.*/
-static int oc_token_expand_cat3_6(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int _zzi){
-  _dct_coeffs[_zzi++]=(ogg_int16_t)oc_token_dec1val_cat3_6(_token,_extra_bits);
-  return _zzi;
-}
-
-/*Expands category 7 through 8 single-valued tokens.*/
-static int oc_token_expand_cat7_8(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int _zzi){
-  _dct_coeffs[_zzi++]=(ogg_int16_t)oc_token_dec1val_cat7_8(_token,_extra_bits);
-  return _zzi;
-}
-
-/*Expands a category 1a zero run/value combo token.*/
-static int oc_token_expand_run_cat1a(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int _zzi){
-  int rl;
-  /*LOOP VECTORIZES.*/
-  for(rl=_token-OC_DCT_RUN_CAT1A+1;rl-->0;)_dct_coeffs[_zzi++]=0;
-  _dct_coeffs[_zzi++]=(ogg_int16_t)(1-(_extra_bits<<1));
-  return _zzi;
-}
-
-/*Expands all other zero run/value combo tokens.*/
-static int oc_token_expand_run(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int _zzi){
-  int nzeros_mask;
-  int nzeros_adjust;
-  int sign_shift;
-  int value_shift;
-  int value_mask;
-  int value_adjust;
-  int mask;
-  int rl;
-  _token-=OC_DCT_RUN_CAT1B;
-  nzeros_mask=OC_BYTE_TABLE32(3,7,0,1,_token);
-  nzeros_adjust=OC_BYTE_TABLE32(6,10,1,2,_token);
-  rl=(_extra_bits&nzeros_mask)+nzeros_adjust;
-  /*LOOP VECTORIZES.*/
-  while(rl-->0)_dct_coeffs[_zzi++]=0;
-  sign_shift=OC_BYTE_TABLE32(2,3,1,2,_token);
-  mask=-(_extra_bits>>sign_shift);
-  value_shift=_token+1>>2;
-  value_mask=_token>>1;
-  value_adjust=value_mask+1;
-  _dct_coeffs[_zzi++]=
-   (ogg_int16_t)(value_adjust+(_extra_bits>>value_shift&value_mask)+mask^mask);
-  return _zzi;
-}
-
-/*A jump table for expanding token values into coefficient values.
-  This reduces all the conditional branches, etc., needed to parse these token
-   values down to one indirect jump.*/
-static const oc_token_expand_func OC_TOKEN_EXPAND_TABLE[TH_NDCT_TOKENS-
- OC_NDCT_EOB_TOKEN_MAX]={
-  oc_token_expand_zrl,
-  oc_token_expand_zrl,
-  oc_token_expand_const,
-  oc_token_expand_const,
-  oc_token_expand_const,
-  oc_token_expand_const,
-  oc_token_expand_cat2,
-  oc_token_expand_cat2,
-  oc_token_expand_cat2,
-  oc_token_expand_cat2,
-  oc_token_expand_cat3_6,
-  oc_token_expand_cat3_6,
-  oc_token_expand_cat3_6,
-  oc_token_expand_cat3_6,
-  oc_token_expand_cat7_8,
-  oc_token_expand_cat7_8,
-  oc_token_expand_run_cat1a,
-  oc_token_expand_run_cat1a,
-  oc_token_expand_run_cat1a,
-  oc_token_expand_run_cat1a,
-  oc_token_expand_run_cat1a,
-  oc_token_expand_run,
-  oc_token_expand_run,
-  oc_token_expand_run,
-  oc_token_expand_run
-};
-
-/*Expands a single token into the given coefficient list.
-  This fills in the zeros for zero runs as well as coefficient values, and
-   updates the index of the current coefficient.
-  It CANNOT be called for any of the EOB tokens.
-  _token:      The token value to expand.
-  _extra_bits: The extra bits associated with the token.
-  _dct_coeffs: The current list of coefficients, in zig-zag order.
-  _zzi:        The zig-zag index of the next coefficient to write to.
-  Return: The updated index of the next coefficient to write to.*/
-static int oc_dct_token_expand(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[128],int _zzi){
-  return (*OC_TOKEN_EXPAND_TABLE[_token-OC_NDCT_EOB_TOKEN_MAX])(_token,
-   _extra_bits,_dct_coeffs,_zzi);
-}
-
-
-
 static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
   /*pp_level 0: disabled; free any memory used and return*/
   if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
@@ -1307,7 +1197,6 @@
 typedef struct{
   int                 bounding_values[256];
   ptrdiff_t           ti[3][64];
-  ptrdiff_t           ebi[3][64];
   ptrdiff_t           eob_runs[3][64];
   const ptrdiff_t    *coded_fragis[3];
   const ptrdiff_t    *uncoded_fragis[3];
@@ -1338,7 +1227,6 @@
   /*Initialize the token and extra bits indices for each plane and
      coefficient.*/
   memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
-  memcpy(_pipe->ebi,_dec->ebi0,sizeof(_pipe->ebi));
   /*Also copy over the initial the EOB run counts.*/
   memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
   /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
@@ -1432,23 +1320,19 @@
 static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
  oc_dec_pipeline_state *_pipe,int _pli){
   unsigned char     *dct_tokens;
-  ogg_uint16_t      *extra_bits;
   ogg_uint16_t       dc_quant[2];
   const oc_fragment *frags;
   const ptrdiff_t   *coded_fragis;
   ptrdiff_t          ncoded_fragis;
   ptrdiff_t          fragii;
   ptrdiff_t         *ti;
-  ptrdiff_t         *ebi;
   ptrdiff_t         *eob_runs;
   int                qti;
   dct_tokens=_dec->dct_tokens;
-  extra_bits=_dec->extra_bits;
   frags=_dec->state.frags;
   coded_fragis=_pipe->coded_fragis[_pli];
   ncoded_fragis=_pipe->ncoded_fragis[_pli];
   ti=_pipe->ti[_pli];
-  ebi=_pipe->ebi[_pli];
   eob_runs=_pipe->eob_runs[_pli];
   for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
   for(fragii=0;fragii<ncoded_fragis;fragii++){
@@ -1469,15 +1353,33 @@
         break;
       }
       else{
-        int ebflag;
-        token=dct_tokens[ti[zzi]++];
-        ebflag=OC_DCT_TOKEN_EXTRA_BITS[token]!=0;
-        eb=extra_bits[ebi[zzi]]&-ebflag;
-        ebi[zzi]+=ebflag;
-        if(token<OC_NDCT_EOB_TOKEN_MAX){
-          eob_runs[zzi]=-oc_dct_token_skip(token,eb);
+        int ebmask;
+        int cw;
+        int rlen;
+        int coeff;
+        int lti;
+        ptrdiff_t eob;
+        lti=ti[zzi];
+        token=dct_tokens[lti++];
+        eb=dct_tokens[lti];
+        cw=OC_DCT_CODE_WORD[token];
+        ebmask=-OC_DCT_TOKEN_NEEDS_MORE(token);
+        lti-=ebmask;
+        eb&=ebmask;
+        cw+=eb<<OC_DCT_TOKEN_EB_POS(token);
+        eob=cw&0xfff;
+        if(token==OC_DCT_TOKEN_FAT_EOB){
+          eob+=dct_tokens[lti++]<<8;
+          if(eob==0)eob=(~(size_t)0>>1);
         }
-        else zzi=oc_dct_token_expand(token,eb,dct_coeffs,zzi);
+        rlen=(cw>>OC_DCT_CW_RLEN_SHIFT)&127;
+        cw^=-(cw&(1<<OC_DCT_CW_FLIP_BIT));
+        coeff=(cw>>OC_DCT_CW_MAG_SHIFT);
+        eob_runs[zzi]=eob;
+        ti[zzi]=lti;
+        while(--rlen>=0)dct_coeffs[zzi++]=0;
+        dct_coeffs[zzi]=coeff;
+        zzi+=(eob==0);
       }
     }
     /*TODO: zzi should be exactly 64 here.

Modified: branches/theora-gumboot/lib/dec/huffdec.c
===================================================================
--- branches/theora-gumboot/lib/dec/huffdec.c	2009-07-10 23:40:41 UTC (rev 16255)
+++ branches/theora-gumboot/lib/dec/huffdec.c	2009-07-11 00:22:29 UTC (rev 16256)
@@ -25,6 +25,52 @@
 #define _ogg_offsetof(_type,_field)\
  ((size_t)((char *)&((_type *)0)->_field-(char *)0))
 
+static const unsigned char OC_DCT_TOKEN_MAP_ENTRIES[TH_NDCT_TOKENS]={
+  1,1,1,4,
+  8,1,1,8,
+  1,1,1,1,
+  1,2,2,2,
+  2,4,8,2,
+  2,2,4,2,
+  2,2,2,2,
+  8,2,4,8
+};
+
+static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS][8]={
+  /*  0 */{17},
+  /*  1 */{18},
+  /*  2 */{19},
+  /*  3 */{20,21,22,23},
+  /*  4 */{24,25,26,27,28,29,30,31},
+  /*  5 */{14},
+  /*  6 */{15},
+  /*  7 */{32,33,34,35,36,37,38,39},
+  /*  8 */{1},
+  /*  9 */{40},
+  /* 10 */{41},
+  /* 11 */{42},
+  /* 12 */{43},
+  /* 13 */{44,45},
+  /* 14 */{46,47},
+  /* 15 */{48,49},
+  /* 16 */{50,51},
+  /* 17 */{52,53,54,55},
+  /* 18 */{56,57,58,59,60,61,62,63},
+  /* 19 */{8,9},
+  /* 20 */{10,11},
+  /* 21 */{12,13},
+  /* 22 */{4,5,6,7},
+  /* 23 */{84,85},
+  /* 24 */{86,87},
+  /* 25 */{88,89},
+  /* 26 */{90,91},
+  /* 27 */{92,93},
+  /* 28 */{64,65,66,67,68,69,70,71},
+  /* 29 */{2,3}, 
+  /* 30 */{80,81,82,83},
+  /* 31 */{72,73,74,75,76,77,78,79},
+};
+
 /*These two functions are really part of the bitpack.c module, but
   they are only used here. Declaring local static versions so they
   can be inlined saves considerable function call overhead.*/
@@ -162,9 +208,36 @@
   /*Read a leaf node:*/
   else{
     if(theorapackB_read(_opb,OC_NDCT_TOKEN_BITS,&bits)<0)return TH_EBADHEADER;
+    int tokens=OC_DCT_TOKEN_MAP_ENTRIES[bits];
+    if(tokens>1){
+      int i;
+      if(_nbinodes<tokens+tokens-1)return TH_EBADHEADER;
+      binode->nbits=1;
+      binode->depth=1;
+      binode->nodes[0]=_binodes+nused;
+      binode->nodes[1]=_binodes+nused+1;
+      for(i=2;i<tokens;i<<=1){
+        int j;
+        binode=_binodes+nused;
+        nused+=i;
+        for(j=0;j<i;j++){
+          binode[j].nbits=1;
+          binode[j].depth=1;
+          binode[j].nodes[0]=_binodes+nused+2*j;
+          binode[j].nodes[1]=_binodes+nused+2*j+1;
+        }
+      }
+      for(i=0;i<tokens;i++){
+        binode=_binodes+nused++;
+        binode->nbits=0;
+        binode->depth=1;
+        binode->token=OC_DCT_TOKEN_MAP[bits][i];
+      }
+      return nused;
+    }
     binode->nbits=0;
     binode->depth=1;
-    binode->token=(unsigned char)bits;
+    binode->token=OC_DCT_TOKEN_MAP[bits][0];
   }
   return nused;
 }
@@ -287,10 +360,10 @@
  oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
   int i;
   for(i=0;i<TH_NHUFFMAN_TABLES;i++){
-    oc_huff_node nodes[63];
+    oc_huff_node nodes[63+13*2+4*6+5*14];
     int          ret;
     /*Unpack the full tree into a temporary buffer.*/
-    ret=oc_huff_tree_unpack(_opb,nodes,63);
+    ret=oc_huff_tree_unpack(_opb,nodes,sizeof(nodes)/sizeof(*nodes));
     if(ret<0)return ret;
     _nodes[i]=oc_huff_tree_collapse(nodes);
   }



More information about the commits mailing list