[xiph-commits] r17204 - experimental/derf/theora-ptalarbvorm/lib

tterribe at svn.xiph.org tterribe at svn.xiph.org
Wed May 12 23:25:13 PDT 2010


Author: tterribe
Date: 2010-05-12 23:25:13 -0700 (Wed, 12 May 2010)
New Revision: 17204

Modified:
   experimental/derf/theora-ptalarbvorm/lib/bitpack.c
   experimental/derf/theora-ptalarbvorm/lib/bitpack.h
   experimental/derf/theora-ptalarbvorm/lib/decint.h
   experimental/derf/theora-ptalarbvorm/lib/decode.c
   experimental/derf/theora-ptalarbvorm/lib/huffdec.c
   experimental/derf/theora-ptalarbvorm/lib/huffdec.h
Log:
Major restructuring of the Huffman decoder.
This uses a much more compact representation of the tree structure, which packs
 everything into 16-bit integers, and stores leaves directly in their parents'
 child pointers, instead of in their own node, reducing cache footprint and
 eliminating one memory reference per decoded token.
The actual decode function now manually inlines the bitstream access functions,
 as the repeated stores to and from the pack buffer struct were not being
 optimized out by the compiler.
Together this gives a 6% speed-up on x86-32 and a 2% speed-up on x86-64 for
 very high-bitrate files (split roughly evenly between the two changes).


Modified: experimental/derf/theora-ptalarbvorm/lib/bitpack.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/bitpack.c	2010-05-13 00:13:51 UTC (rev 17203)
+++ experimental/derf/theora-ptalarbvorm/lib/bitpack.c	2010-05-13 06:25:13 UTC (rev 17204)
@@ -32,10 +32,10 @@
   const unsigned char *stop;
   oc_pb_window         window;
   int                  available;
+  stop=_b->stop;
+  ptr=_b->ptr;
   window=_b->window;
   available=_b->bits;
-  ptr=_b->ptr;
-  stop=_b->stop;
   while(available<=OC_PB_WINDOW_SIZE-8&&ptr<stop){
     available+=8;
     window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
@@ -82,8 +82,8 @@
   available-=_bits;
   window<<=1;
   window<<=_bits-1;
+  _b->window=window;
   _b->bits=available;
-  _b->window=window;
   return result;
 }
 
@@ -100,8 +100,8 @@
   result=window>>OC_PB_WINDOW_SIZE-1;
   available--;
   window<<=1;
+  _b->window=window;
   _b->bits=available;
-  _b->window=window;
   return result;
 }
 

Modified: experimental/derf/theora-ptalarbvorm/lib/bitpack.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/bitpack.h	2010-05-13 00:13:51 UTC (rev 17203)
+++ experimental/derf/theora-ptalarbvorm/lib/bitpack.h	2010-05-13 06:25:13 UTC (rev 17204)
@@ -35,9 +35,9 @@
 
 
 struct oc_pack_buf{
+  const unsigned char *stop;
+  const unsigned char *ptr;
   oc_pb_window         window;
-  const unsigned char *ptr;
-  const unsigned char *stop;
   int                  bits;
   int                  eof;
 };

Modified: experimental/derf/theora-ptalarbvorm/lib/decint.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/decint.h	2010-05-13 00:13:51 UTC (rev 17203)
+++ experimental/derf/theora-ptalarbvorm/lib/decint.h	2010-05-13 06:25:13 UTC (rev 17204)
@@ -37,7 +37,7 @@
 
 struct th_setup_info{
   /*The Huffman codes.*/
-  oc_huff_node      *huff_tables[TH_NHUFFMAN_TABLES];
+  ogg_int16_t   *huff_tables[TH_NHUFFMAN_TABLES];
   /*The quantization parameters.*/
   th_quant_info  qinfo;
 };
@@ -55,7 +55,7 @@
   /*Buffer in which to assemble packets.*/
   oc_pack_buf          opb;
   /*Huffman decode trees.*/
-  oc_huff_node        *huff_tables[TH_NHUFFMAN_TABLES];
+  ogg_int16_t         *huff_tables[TH_NHUFFMAN_TABLES];
   /*The index of the first token in each plane for each coefficient.*/
   ptrdiff_t            ti0[3][64];
   /*The number of outstanding EOB runs at the start of each coefficient in each

Modified: experimental/derf/theora-ptalarbvorm/lib/decode.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/decode.c	2010-05-13 00:13:51 UTC (rev 17203)
+++ experimental/derf/theora-ptalarbvorm/lib/decode.c	2010-05-13 06:25:13 UTC (rev 17204)
@@ -371,7 +371,7 @@
   ret=oc_state_init(&_dec->state,_info,3);
   if(ret<0)return ret;
   ret=oc_huff_trees_copy(_dec->huff_tables,
-   (const oc_huff_node *const *)_setup->huff_tables);
+   (const ogg_int16_t *const *)_setup->huff_tables);
   if(ret<0){
     oc_state_clear(&_dec->state);
     return ret;

Modified: experimental/derf/theora-ptalarbvorm/lib/huffdec.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/huffdec.c	2010-05-13 00:13:51 UTC (rev 17203)
+++ experimental/derf/theora-ptalarbvorm/lib/huffdec.c	2010-05-13 06:25:13 UTC (rev 17204)
@@ -22,10 +22,60 @@
 #include "decint.h"
 
 
-/*The ANSI offsetof macro is broken on some platforms (e.g., older DECs).*/
-#define _ogg_offsetof(_type,_field)\
- ((size_t)((char *)&((_type *)0)->_field-(char *)0))
 
+/*Instead of storing every branching in the tree, subtrees can be collapsed
+   into one node, with a table of size 1<<nbits pointing directly to its
+   descedents nbits levels down.
+  This allows more than one bit to be read at a time, and avoids following all
+   the intermediate branches with next to no increased code complexity once
+   the collapsed tree has been built.
+  We do _not_ require that a subtree be complete to be collapsed, but instead
+   store duplicate pointers in the table, and record the actual depth of the
+   node below its parent.
+  This tells us the number of bits to advance the stream after reaching it.
+
+  This turns out to be equivalent to the method described in \cite{Hash95},
+   without the requirement that codewords be sorted by length.
+  If the codewords were sorted by length (so-called ``canonical-codes''), they
+   could be decoded much faster via either Lindell and Moffat's approach or
+   Hashemian's Condensed Huffman Code approach, the latter of which has an
+   extremely small memory footprint.
+  We can't use Choueka et al.'s finite state machine approach, which is
+   extremely fast, because we can't allow multiple symbols to be output at a
+   time; the codebook can and does change between symbols.
+  It also has very large memory requirements, which impairs cache coherency.
+
+  We store the tree packed in an array of 16-bit integers (words).
+  Each node of the tree consists of a single word, followed consecutiably by
+   zero or more indexes of its children.
+  Let n be the value of this first word.
+
+  If n is negative or zero, then this is a leaf node.
+  The token in the leaf node is (-n&255).
+  The number of bits that should have been read to reach this child, starting
+   from the current node, is (-n>>8).
+
+  If n is positive, then it is the number of bits that need to be read to
+   traverse the node.
+  1<<n entries follow in the array, each an index to a child node.
+  As an optimization to save space, leaf nodes are stored directly, instead of
+   storing an index, since they only require a single word.
+  If a leaf node would have been before reading n bits, then it is duplicated
+   the necessary number of times in this table.
+
+  @ARTICLE{Hash95,
+    author="Reza Hashemian",
+    title="Memory Efficient and High-Speed Search {Huffman} Coding",
+    journal="{IEEE} Transactions on Communications",
+    volume=43,
+    number=10,
+    pages="2576--2581",
+    month=Oct,
+    year=1995
+  }*/
+
+
+
 /*The number of internal tokens associated with each of the spec tokens.*/
 static const unsigned char OC_DCT_TOKEN_MAP_ENTRIES[TH_NDCT_TOKENS]={
   1,1,1,4,8,1,1,8,1,1,1,1,1,2,2,2,2,4,8,2,2,2,4,2,2,2,2,2,8,2,4,8
@@ -99,61 +149,7 @@
   40
 };
 
-/*These three functions are really part of the bitpack.c module, but
-   they are only used here.
-  Declaring local static versions so they can be inlined saves considerable
-   function call overhead.*/
 
-static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){
-  const unsigned char *ptr;
-  const unsigned char *stop;
-  oc_pb_window         window;
-  int                  available;
-  window=_b->window;
-  available=_b->bits;
-  ptr=_b->ptr;
-  stop=_b->stop;
-  /*This version of _refill() doesn't bother setting eof because we won't
-     check for it after we've started decoding DCT tokens.*/
-  if(ptr>=stop)available=OC_LOTS_OF_BITS;
-  while(available<=OC_PB_WINDOW_SIZE-8){
-    available+=8;
-    window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
-    if(ptr>=stop)available=OC_LOTS_OF_BITS;
-  }
-  _b->ptr=ptr;
-  if(_bits>available)window|=*ptr>>(available&7);
-  _b->bits=available;
-  return window;
-}
-
-
-/*Read in bits without advancing the bit pointer.
-  Here we assume 0<=_bits&&_bits<=32.*/
-static long oc_pack_look(oc_pack_buf *_b,int _bits){
-  oc_pb_window window;
-  int          available;
-  long         result;
-  window=_b->window;
-  available=_b->bits;
-  if(_bits==0)return 0;
-  if(_bits>available)_b->window=window=oc_pack_refill(_b,_bits);
-  result=window>>OC_PB_WINDOW_SIZE-_bits;
-  return result;
-}
-
-/*Advance the bit pointer.*/
-static void oc_pack_adv(oc_pack_buf *_b,int _bits){
-  /*We ignore the special cases for _bits==0 and _bits==32 here, since they are
-     never used actually used.
-    OC_HUFF_SLUSH (defined below) would have to be at least 27 to actually read
-     32 bits in a single go, and would require a 32 GB lookup table (assuming
-     8 byte pointers, since 4 byte pointers couldn't fit such a table).*/
-  _b->window<<=_bits;
-  _b->bits-=_bits;
-}
-
-
 /*The log_2 of the size of a lookup table is allowed to grow to relative to
    the number of unique nodes it contains.
   E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is
@@ -173,43 +169,41 @@
 #define OC_HUFF_SLUSH (1)
 
 
-/*Determines the size in bytes of a Huffman tree node that represents a
+/*Determines the size in words of a Huffman tree node that represents a
    subtree of depth _nbits.
   _nbits: The depth of the subtree.
-          If this is 0, the node is a leaf node.
+          If this is <=0, the node is a leaf node.
           Otherwise 1<<_nbits pointers are allocated for children.
-  Return: The number of bytes required to store the node.*/
+  Return: The number of words required to store the node.*/
 static size_t oc_huff_node_size(int _nbits){
-  size_t size;
-  size=_ogg_offsetof(oc_huff_node,nodes);
-  if(_nbits>0)size+=sizeof(oc_huff_node *)*(1<<_nbits);
-  return size;
+  return _nbits>0?1+(1<<_nbits):1;
 }
 
-static oc_huff_node *oc_huff_node_init(char **_storage,size_t _size,int _nbits){
-  oc_huff_node *ret;
-  ret=(oc_huff_node *)*_storage;
-  ret->nbits=(unsigned char)_nbits;
-  (*_storage)+=_size;
-  return ret;
-}
 
-
-/*Determines the size in bytes of a Huffman tree.
-  _nbits: The depth of the subtree.
-          If this is 0, the node is a leaf node.
-          Otherwise storage for 1<<_nbits pointers are added for children.
-  Return: The number of bytes required to store the tree.*/
-static size_t oc_huff_tree_size(const oc_huff_node *_node){
+/*Determines the size in words of a Huffman subtree.
+  _tree: The complete Huffman tree.
+  _node: The index of the root of the desired subtree.
+  Return: The number of words required to store the tree.*/
+static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){
   size_t size;
-  size=oc_huff_node_size(_node->nbits);
-  if(_node->nbits){
+  int    n;
+  n=_tree[_node];
+  size=oc_huff_node_size(n);
+  if(n>0){
     int nchildren;
     int i;
-    nchildren=1<<_node->nbits;
-    for(i=0;i<nchildren;i+=1<<_node->nbits-_node->nodes[i]->depth){
-      size+=oc_huff_tree_size(_node->nodes[i]);
+    nchildren=1<<n;
+    i=0;
+    do{
+      int child;
+      child=_tree[_node+i+1];
+      if(child<=0)i+=1<<n-(-child>>8);
+      else{
+        size+=oc_huff_tree_size(_tree,child);
+        i++;
+      }
     }
+    while(i<nchildren);
   }
   return size;
 }
@@ -217,208 +211,279 @@
 
 /*Unpacks a sub-tree from the given buffer.
   _opb:      The buffer to unpack from.
-  _binodes:  The nodes to store the sub-tree in.
-  _nbinodes: The number of nodes available for the sub-tree.
-  Return: 0 on success, or a negative value on error.*/
+  _bitree:   The tree to store the sub-tree in.
+  _nbinodes: The number of nodes available for the tree.
+  _binode:   The index of the tree to store it in.
+             This is updated to point to the first available slot past the
+              sub-tree on return.
+             If an error occurs, this is set larger than _nbinodes.
+  Return: The node index of the sub-tree, or leaf node value if this was a leaf
+   node and this wasn't the root of the tree.*/
 static int oc_huff_tree_unpack(oc_pack_buf *_opb,
- oc_huff_node *_binodes,int _nbinodes){
-  oc_huff_node *binode;
-  long          bits;
-  int           nused;
-  if(_nbinodes<1)return TH_EBADHEADER;
-  binode=_binodes;
-  nused=0;
+ ogg_int16_t *_bitree,int _nbinodes,int *_binode){
+  long bits;
+  int  binode;
+  binode=*_binode;
+  if(binode>_nbinodes)return 0;
   bits=oc_pack_read1(_opb);
-  if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
+  if(oc_pack_bytes_left(_opb)<0){
+    *_binode=_nbinodes+1;
+    return 0;
+  }
   /*Read an internal node:*/
   if(!bits){
-    int ret;
-    nused++;
-    binode->nbits=1;
-    binode->depth=1;
-    binode->nodes[0]=_binodes+nused;
-    ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused);
-    if(ret>=0){
-      nused+=ret;
-      binode->nodes[1]=_binodes+nused;
-      ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused);
-    }
-    if(ret<0)return ret;
-    nused+=ret;
+    *_binode=binode+3;
+    if(binode+3>_nbinodes)return 0;
+    _bitree[binode+0]=1;
+    _bitree[binode+1]=
+     (ogg_int16_t)oc_huff_tree_unpack(_opb,_bitree,_nbinodes,_binode);
+    _bitree[binode+2]=
+     (ogg_int16_t)oc_huff_tree_unpack(_opb,_bitree,_nbinodes,_binode);
   }
   /*Read a leaf node:*/
   else{
     int ntokens;
     int token;
-    int i;
     bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS);
-    if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
-    /*Find out how many internal tokens we translate this external token into.*/
+    if(oc_pack_bytes_left(_opb)<0){
+      *_binode=_nbinodes+1;
+      return 0;
+    }
+    /*Find out how many internal tokens we translate this external token into.
+      This must be a power of two.*/
     ntokens=OC_DCT_TOKEN_MAP_ENTRIES[bits];
-    if(_nbinodes<2*ntokens-1)return TH_EBADHEADER;
-    /*Fill in a complete binary tree pointing to the internal tokens.*/
-    for(i=1;i<ntokens;i<<=1){
+    token=OC_DCT_TOKEN_MAP[bits];
+    if(ntokens<=1){
+      /*If we only have the one leaf, return it directly.*/
+      if(binode>0)return -(1<<8|token);
+      /*Unless we are the root of the tree; then we have to store it as a
+         node explicitly.*/
+      *_binode=binode+1;
+      if(binode+1>_nbinodes)return 0;
+      _bitree[binode]=-token;
+    }
+    else{
+      int i;
       int j;
-      binode=_binodes+nused;
-      nused+=i;
-      for(j=0;j<i;j++){
-        binode[j].nbits=1;
-        binode[j].depth=1;
-        binode[j].nodes[0]=_binodes+nused+2*j;
-        binode[j].nodes[1]=_binodes+nused+2*j+1;
+      /*Fill in a complete binary tree pointing to the internal nodes.*/
+      *_binode=binode+3*(ntokens-1);
+      if(binode+3*(ntokens-1)>_nbinodes)return 0;
+      for(i=1;i<ntokens>>1;i<<=1){
+        int child;
+        child=binode+3*(2*i-1);
+        for(j=0;j<i;j++){
+          _bitree[binode+3*(i-1+j)+0]=1;
+          _bitree[binode+3*(i-1+j)+1]=(ogg_int16_t)child;
+          child+=3;
+          _bitree[binode+3*(i-1+j)+2]=(ogg_int16_t)child;
+          child+=3;
+        }
       }
+      /*And now the last row pointing to the leaf nodes.*/
+      for(i=j=0;j<ntokens>>1;j++){
+        _bitree[binode+3*((ntokens>>1)-1+j)+0]=1;
+        _bitree[binode+3*((ntokens>>1)-1+j)+1]=(ogg_int16_t)-(1<<8|token+i++);
+        _bitree[binode+3*((ntokens>>1)-1+j)+2]=(ogg_int16_t)-(1<<8|token+i++);
+      }
     }
-    /*And now the leaf nodes with those tokens.*/
-    token=OC_DCT_TOKEN_MAP[bits];
-    for(i=0;i<ntokens;i++){
-      binode=_binodes+nused++;
-      binode->nbits=0;
-      binode->depth=1;
-      binode->token=token+i;
-    }
   }
-  return nused;
+  return binode;
 }
 
 /*Finds the depth of shortest branch of the given sub-tree.
   The tree must be binary.
+  _bitree: A packed binary Huffman tree.
   _binode: The root of the given sub-tree.
-           _binode->nbits must be 0 or 1.
+           _bitree[_binode] must be <=1.
   Return: The smallest depth of a leaf node in this sub-tree.
           0 indicates this sub-tree is a leaf node.*/
-static int oc_huff_tree_mindepth(oc_huff_node *_binode){
+static int oc_huff_tree_mindepth(const ogg_int16_t *_bitree,int _binode){
+  int child;
   int depth0;
   int depth1;
-  if(_binode->nbits==0)return 0;
-  depth0=oc_huff_tree_mindepth(_binode->nodes[0]);
-  depth1=oc_huff_tree_mindepth(_binode->nodes[1]);
+  if(_bitree[_binode]<=0)return 0;
+  child=_bitree[_binode+1];
+  depth0=child<=0?0:oc_huff_tree_mindepth(_bitree,child);
+  child=_bitree[_binode+2];
+  depth1=child<=0?0:oc_huff_tree_mindepth(_bitree,child);
   return OC_MINI(depth0,depth1)+1;
 }
 
 /*Finds the number of internal nodes at a given depth, plus the number of
    leaves at that depth or shallower.
   The tree must be binary.
+  _bitree: A packed binary Huffman tree.
   _binode: The root of the given sub-tree.
-           _binode->nbits must be 0 or 1.
-  Return: The number of entries that would be contained in a jump table of the
-           given depth.*/
-static int oc_huff_tree_occupancy(oc_huff_node *_binode,int _depth){
-  if(_binode->nbits==0||_depth<=0)return 1;
+           _bitree[_binode] must be <=1.
+  Return: The number of unique entries that would be contained in a jump table
+           of the given depth.*/
+static int oc_huff_tree_occupancy(const ogg_int16_t *_bitree,int _binode,
+ int _depth){
+  if(_depth<=0||_bitree[_binode]<=0)return 1;
   else{
-    return oc_huff_tree_occupancy(_binode->nodes[0],_depth-1)+
-     oc_huff_tree_occupancy(_binode->nodes[1],_depth-1);
+    int child1;
+    int child2;
+    child1=_bitree[_binode+1];
+    child2=_bitree[_binode+2];
+    return (child1<=0?1:oc_huff_tree_occupancy(_bitree,child1,_depth-1))+
+     (child2<=0?1:oc_huff_tree_occupancy(_bitree,child2,_depth-1));
   }
 }
 
-/*Makes a copy of the given Huffman tree.
-  _node: The Huffman tree to copy.
-  Return: The copy of the Huffman tree.*/
-static oc_huff_node *oc_huff_tree_copy(const oc_huff_node *_node,
- char **_storage){
-  oc_huff_node *ret;
-  ret=oc_huff_node_init(_storage,oc_huff_node_size(_node->nbits),_node->nbits);
-  ret->depth=_node->depth;
-  if(_node->nbits){
+/*Makes a copy of the given Huffman sub-tree.
+  _dst: The destination tree.
+  _src: The destination tree.
+  _node: The index Huffman sub-tree to copy.
+  Return: The index of the first available slot after the sub-tree copy.*/
+static int oc_huff_tree_copy(ogg_int16_t *_dst,
+ const ogg_int16_t *_src,int _node){
+  int n;
+  n=_src[_node];
+  _dst[_node++]=(ogg_int16_t)n;
+  if(n<=0)return _node;
+  else{
     int nchildren;
+    int ret;
     int i;
-    int inext;
-    nchildren=1<<_node->nbits;
-    for(i=0;i<nchildren;){
-      ret->nodes[i]=oc_huff_tree_copy(_node->nodes[i],_storage);
-      inext=i+(1<<_node->nbits-ret->nodes[i]->depth);
-      while(++i<inext)ret->nodes[i]=ret->nodes[i-1];
+    nchildren=1<<n;
+    ret=_node+nchildren;
+    for(i=0;i<nchildren;i++){
+      int child;
+      child=_src[_node+i];
+      _dst[_node+i]=(ogg_int16_t)child;
+      if(child>0){
+        /*Note: This assumes that storage for children is in order, and
+           consecutive.*/
+        ret=oc_huff_tree_copy(_dst,_src,child);
+      }
     }
+    return ret;
   }
-  else ret->token=_node->token;
-  return ret;
 }
 
-static size_t oc_huff_tree_collapse_size(oc_huff_node *_binode,int _depth){
+static size_t oc_huff_tree_collapse_size(ogg_int16_t *_bitree,int _binode,
+ int _depth){
   size_t size;
   int    mindepth;
   int    depth;
   int    loccupancy;
   int    occupancy;
-  if(_binode->nbits!=0&&_depth>0){
-    return oc_huff_tree_collapse_size(_binode->nodes[0],_depth-1)+
-     oc_huff_tree_collapse_size(_binode->nodes[1],_depth-1);
+  int    child1;
+  int    child2;
+  int    n;
+  n=_bitree[_binode];
+  if(n>0&&_depth>0){
+    child1=_bitree[_binode+1];
+    child2=_bitree[_binode+2];
+    return (child1<=0?0:oc_huff_tree_collapse_size(_bitree,child1,_depth-1))+
+     (child2<=0?0:oc_huff_tree_collapse_size(_bitree,child2,_depth-1));
   }
-  depth=mindepth=oc_huff_tree_mindepth(_binode);
+  depth=mindepth=oc_huff_tree_mindepth(_bitree,_binode);
   occupancy=1<<mindepth;
   do{
     loccupancy=occupancy;
-    occupancy=oc_huff_tree_occupancy(_binode,++depth);
+    occupancy=oc_huff_tree_occupancy(_bitree,_binode,++depth);
   }
   while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-OC_HUFF_SLUSH,0));
   depth--;
   size=oc_huff_node_size(depth);
   if(depth>0){
-    size+=oc_huff_tree_collapse_size(_binode->nodes[0],depth-1);
-    size+=oc_huff_tree_collapse_size(_binode->nodes[1],depth-1);
+    child1=_bitree[_binode+1];
+    child2=_bitree[_binode+2];
+    size+=child1<=0?0:oc_huff_tree_collapse_size(_bitree,child1,depth-1);
+    size+=child2<=0?0:oc_huff_tree_collapse_size(_bitree,child2,depth-1);
   }
   return size;
 }
 
-static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode,
- char **_storage);
+static int oc_huff_tree_collapse(ogg_int16_t *_tree,int *_node,
+ const ogg_int16_t *_bitree,int _binode);
 
 /*Fills the given nodes table with all the children in the sub-tree at the
    given depth.
   The nodes in the sub-tree with a depth less than that stored in the table
    are freed.
   The sub-tree must be binary and complete up until the given depth.
-  _nodes:  The nodes table to fill.
-  _binode: The root of the sub-tree to fill it with.
-           _binode->nbits must be 0 or 1.
+  _tree:   The storage for the collapsed Huffman tree.
+  _node:   The index of the next available storage in the tree.
+  _offs:   The index of the nodes to fill.
+  _bitree: The binary tree to collapse.
+  _binode: The index of the root of the sub-tree to fill it with.
+           _bitree[_binode] must be <=1 .
   _level:  The current level in the table.
            0 indicates that the current node should be stored, regardless of
             whether it is a leaf node or an internal node.
   _depth:  The depth of the nodes to fill the table with, relative to their
             parent.*/
-static void oc_huff_node_fill(oc_huff_node **_nodes,
- oc_huff_node *_binode,int _level,int _depth,char **_storage){
-  if(_level<=0||_binode->nbits==0){
-    int i;
-    _binode->depth=(unsigned char)(_depth-_level);
-    _nodes[0]=oc_huff_tree_collapse(_binode,_storage);
-    for(i=1;i<1<<_level;i++)_nodes[i]=_nodes[0];
+static void oc_huff_node_fill(ogg_int16_t *_tree,int *_node,int _offs,
+ const ogg_int16_t *_bitree,int _binode,int _level,int _depth){
+  int leaf;
+  int n;
+  int i;
+  n=_bitree[_binode];
+  if(n<=0){
+    leaf=-((_depth-_level<<8)|(-n&255));
+    for(i=0;i<1<<_level;i++)_tree[_offs+i]=(ogg_int16_t)leaf;
   }
+  else if(_level<=0){
+    _tree[_offs]=oc_huff_tree_collapse(_tree,_node,_bitree,_binode);
+  }
   else{
+    int child1;
+    int child2;
     _level--;
-    oc_huff_node_fill(_nodes,_binode->nodes[0],_level,_depth,_storage);
-    _nodes+=1<<_level;
-    oc_huff_node_fill(_nodes,_binode->nodes[1],_level,_depth,_storage);
+    child1=_bitree[_binode+1];
+    if(child1<=0){
+      leaf=-((_depth-_level<<8)|(-child1&255));
+      for(i=0;i<1<<_level;i++)_tree[_offs+i]=(ogg_int16_t)leaf;
+    }
+    else oc_huff_node_fill(_tree,_node,_offs,_bitree,child1,_level,_depth);
+    _offs+=1<<_level;
+    child2=_bitree[_binode+2];
+    if(child2<=0){
+      leaf=-((_depth-_level<<8)|(-child2&255));
+      for(i=0;i<1<<_level;i++)_tree[_offs+i]=(ogg_int16_t)leaf;
+    }
+    else oc_huff_node_fill(_tree,_node,_offs,_bitree,child2,_level,_depth);
   }
 }
 
 /*Finds the largest complete sub-tree rooted at the current node and collapses
    it into a single node.
   This procedure is then applied recursively to all the children of that node.
-  _binode: The root of the sub-tree to collapse.
-           _binode->nbits must be 0 or 1.
+  _tree:   The storage for the collapsed Huffman tree.
+  _node:   The index of the next available storage in the tree.
+  _offs:   The index of the nodes to fill.
+  _bitree: The binary tree to collapse.
+  _binode: The index of the root of the sub-tree to fill it with.
+           _bitree[_binode] must be <=1 .
   Return: The new root of the collapsed sub-tree.*/
-static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode,
- char **_storage){
-  oc_huff_node *root;
-  size_t        size;
-  int           mindepth;
-  int           depth;
-  int           loccupancy;
-  int           occupancy;
-  depth=mindepth=oc_huff_tree_mindepth(_binode);
+static int oc_huff_tree_collapse(ogg_int16_t *_tree,int *_node,
+ const ogg_int16_t *_bitree,int _binode){
+  int mindepth;
+  int depth;
+  int loccupancy;
+  int occupancy;
+  int node;
+  depth=mindepth=oc_huff_tree_mindepth(_bitree,_binode);
   occupancy=1<<mindepth;
   do{
     loccupancy=occupancy;
-    occupancy=oc_huff_tree_occupancy(_binode,++depth);
+    occupancy=oc_huff_tree_occupancy(_bitree,_binode,++depth);
   }
   while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-OC_HUFF_SLUSH,0));
   depth--;
-  if(depth<=0)return oc_huff_tree_copy(_binode,_storage);
-  size=oc_huff_node_size(depth);
-  root=oc_huff_node_init(_storage,size,depth);
-  root->depth=_binode->depth;
-  oc_huff_node_fill(root->nodes,_binode,depth,depth,_storage);
-  return root;
+  node=*_node;
+  if(depth<=0){
+    _tree[node]=_bitree[_binode];
+    *_node=node+1;
+  }
+  else{
+    _tree[node]=depth;
+    *_node=node+1+(1<<depth);
+    oc_huff_node_fill(_tree,_node,node+1,_bitree,_binode,depth,depth);
+  }
+  return node;
 }
 
 /*Unpacks a set of Huffman trees, and reduces them to a collapsed
@@ -427,22 +492,25 @@
   _nodes: The table to fill with the Huffman trees.
   Return: 0 on success, or a negative value on error.*/
 int oc_huff_trees_unpack(oc_pack_buf *_opb,
- oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
+ ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
   int i;
   for(i=0;i<TH_NHUFFMAN_TABLES;i++){
-    oc_huff_node  nodes[511];
-    char         *storage;
+    ogg_int16_t   nodes[255*3];
+    ogg_int16_t  *tree;
     size_t        size;
-    int           ret;
+    int           node;
     /*Unpack the full tree into a temporary buffer.*/
-    ret=oc_huff_tree_unpack(_opb,nodes,sizeof(nodes)/sizeof(*nodes));
-    if(ret<0)return ret;
+    node=0;
+    oc_huff_tree_unpack(_opb,nodes,sizeof(nodes)/sizeof(*nodes),&node);
+    if(node>sizeof(nodes)/sizeof(*nodes))return TH_EBADHEADER;
     /*Figure out how big the collapsed tree will be.*/
-    size=oc_huff_tree_collapse_size(nodes,0);
-    storage=(char *)_ogg_calloc(1,size);
-    if(storage==NULL)return TH_EFAULT;
+    size=oc_huff_tree_collapse_size(nodes,0,0);
+    tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree));
+    if(tree==NULL)return TH_EFAULT;
     /*And collapse it.*/
-    _nodes[i]=oc_huff_tree_collapse(nodes,&storage);
+    node=0;
+    oc_huff_tree_collapse(tree,&node,nodes,0);
+    _nodes[i]=tree;
   }
   return 0;
 }
@@ -450,40 +518,89 @@
 /*Makes a copy of the given set of Huffman trees.
   _dst: The array to store the copy in.
   _src: The array of trees to copy.*/
-int oc_huff_trees_copy(oc_huff_node *_dst[TH_NHUFFMAN_TABLES],
- const oc_huff_node *const _src[TH_NHUFFMAN_TABLES]){
+int oc_huff_trees_copy(ogg_int16_t *_dst[TH_NHUFFMAN_TABLES],
+ const ogg_int16_t *const _src[TH_NHUFFMAN_TABLES]){
   int i;
   for(i=0;i<TH_NHUFFMAN_TABLES;i++){
-    size_t  size;
-    char   *storage;
-    size=oc_huff_tree_size(_src[i]);
-    storage=(char *)_ogg_calloc(1,size);
-    if(storage==NULL){
+    size_t       size;
+    ogg_int16_t *tree;
+    size=oc_huff_tree_size(_src[i],0);
+    tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree));
+    if(tree==NULL){
       while(i-->0)_ogg_free(_dst[i]);
       return TH_EFAULT;
     }
-    _dst[i]=oc_huff_tree_copy(_src[i],&storage);
+    oc_huff_tree_copy(tree,_src[i],0);
+    _dst[i]=tree;
   }
   return 0;
 }
 
 /*Frees the memory used by a set of Huffman trees.
   _nodes: The array of trees to free.*/
-void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
+void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
   int i;
   for(i=0;i<TH_NHUFFMAN_TABLES;i++)_ogg_free(_nodes[i]);
 }
 
+
 /*Unpacks a single token using the given Huffman tree.
   _opb:  The buffer to unpack the token from.
   _node: The tree to unpack the token with.
   Return: The token value.*/
-int oc_huff_token_decode(oc_pack_buf *_opb,const oc_huff_node *_node){
-  long bits;
-  while(_node->nbits!=0){
-    bits=oc_pack_look(_opb,_node->nbits);
-    _node=_node->nodes[bits];
-    oc_pack_adv(_opb,_node->depth);
+int oc_huff_token_decode(oc_pack_buf *_opb,const ogg_int16_t *_tree){
+  const unsigned char *ptr;
+  const unsigned char *stop;
+  oc_pb_window         window;
+  int                  available;
+  long                 bits;
+  int                  node;
+  int                  n;
+  n=_tree[0];
+  ptr=_opb->ptr;
+  window=_opb->window;
+  stop=_opb->stop;
+  available=_opb->bits;
+  /*Handle the case of a codebook with a single value.
+    Branch prediction should make this esoteric case free.*/
+  if(n<=0)return -n&255;
+  if(n>available){
+    /*We don't bother setting eof because we won't check for it after we've
+       started decoding DCT tokens.*/
+    if(ptr>=stop)available=OC_LOTS_OF_BITS;
+    while(available<=OC_PB_WINDOW_SIZE-8){
+      available+=8;
+      window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
+      if(ptr>=stop)available=OC_LOTS_OF_BITS;
+    }
+    if(n>available)window|=*ptr>>(available&7);
   }
-  return _node->token;
+  bits=window>>OC_PB_WINDOW_SIZE-n;
+  node=_tree[1+bits];
+  while(node>0){
+    window<<=n;
+    available-=n;
+    n=_tree[node];
+    if(n>available){
+      /*We don't bother setting eof because we won't check for it after we've
+         started decoding DCT tokens.*/
+      if(ptr>=stop)available=OC_LOTS_OF_BITS;
+      while(available<=OC_PB_WINDOW_SIZE-8){
+        available+=8;
+        window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
+        if(ptr>=stop)available=OC_LOTS_OF_BITS;
+      }
+      if(n>available)window|=*ptr>>(available&7);
+    }
+    bits=window>>OC_PB_WINDOW_SIZE-n;
+    node=_tree[node+1+bits];
+  }
+  node=-node;
+  n=node>>8;
+  window<<=n;
+  available-=n;
+  _opb->ptr=ptr;
+  _opb->window=window;
+  _opb->bits=available;
+  return node&255;
 }

Modified: experimental/derf/theora-ptalarbvorm/lib/huffdec.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/huffdec.h	2010-05-13 00:13:51 UTC (rev 17203)
+++ experimental/derf/theora-ptalarbvorm/lib/huffdec.h	2010-05-13 06:25:13 UTC (rev 17204)
@@ -22,71 +22,11 @@
 
 
 
-typedef struct oc_huff_node oc_huff_node;
-
-/*A node in the Huffman tree.
-  Instead of storing every branching in the tree, subtrees can be collapsed
-   into one node, with a table of size 1<<nbits pointing directly to its
-   descedents nbits levels down.
-  This allows more than one bit to be read at a time, and avoids following all
-   the intermediate branches with next to no increased code complexity once
-   the collapsed tree has been built.
-  We do _not_ require that a subtree be complete to be collapsed, but instead
-   store duplicate pointers in the table, and record the actual depth of the
-   node below its parent.
-  This tells us the number of bits to advance the stream after reaching it.
-
-  This turns out to be equivalent to the method described in \cite{Hash95},
-   without the requirement that codewords be sorted by length.
-  If the codewords were sorted by length (so-called ``canonical-codes''), they
-   could be decoded much faster via either Lindell and Moffat's approach or
-   Hashemian's Condensed Huffman Code approach, the latter of which has an
-   extremely small memory footprint.
-  We can't use Choueka et al.'s finite state machine approach, which is
-   extremely fast, because we can't allow multiple symbols to be output at a
-   time; the codebook can and does change between symbols.
-  It also has very large memory requirements, which impairs cache coherency.
-
-  @ARTICLE{Hash95,
-    author="Reza Hashemian",
-    title="Memory Efficient and High-Speed Search {Huffman} Coding",
-    journal="{IEEE} Transactions on Communications",
-    volume=43,
-    number=10,
-    pages="2576--2581",
-    month=Oct,
-    year=1995
-  }*/
-struct oc_huff_node{
-  /*The number of bits of the code needed to descend through this node.
-    0 indicates a leaf node.
-    Otherwise there are 1<<nbits nodes in the nodes table, which can be
-     indexed by reading nbits bits from the stream.*/
-  unsigned char  nbits;
-  /*The value of a token stored in a leaf node.
-    The value in non-leaf nodes is undefined.*/
-  unsigned char  token;
-  /*The depth of the current node, relative to its parent in the collapsed
-     tree.
-    This can be less than its parent's nbits value, in which case there are
-     1<<nbits-depth copies of this node in the table, and the bitstream should
-     only be advanced depth bits after reaching this node.*/
-  unsigned char  depth;
-  /*The table of child nodes.
-    The ACTUAL size of this array is 1<<nbits, despite what the declaration
-     below claims.
-    The exception is that for leaf nodes the size is 0.*/
-  oc_huff_node  *nodes[2];
-};
-
-
-
 int oc_huff_trees_unpack(oc_pack_buf *_opb,
- oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]);
-int oc_huff_trees_copy(oc_huff_node *_dst[TH_NHUFFMAN_TABLES],
- const oc_huff_node *const _src[TH_NHUFFMAN_TABLES]);
-void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]);
-int oc_huff_token_decode(oc_pack_buf *_opb,const oc_huff_node *_node);
+ ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]);
+int oc_huff_trees_copy(ogg_int16_t *_dst[TH_NHUFFMAN_TABLES],
+ const ogg_int16_t *const _src[TH_NHUFFMAN_TABLES]);
+void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]);
+int oc_huff_token_decode(oc_pack_buf *_opb,const ogg_int16_t *_node);
 
-
 #endif



More information about the commits mailing list