[xiph-commits] r17230 - experimental/derf/theora-ptalarbvorm/lib

tterribe at svn.xiph.org tterribe at svn.xiph.org
Tue May 18 08:59:52 PDT 2010


Author: tterribe
Date: 2010-05-18 08:59:52 -0700 (Tue, 18 May 2010)
New Revision: 17230

Modified:
   experimental/derf/theora-ptalarbvorm/lib/huffdec.c
Log:
Remove a bunch of the special-case handling for a Huffman tree with one node
 (e.g., that decodes a single, constant token value using 0 bits).
This simplifies the tree-unpacking code a lot, and removes a special case from
 the actual token decode.
This special case was basically free on x86 thanks to branch prediction, but
 eliminating it reduces the fast-path cycle count on c64x by almost 32%.


Modified: experimental/derf/theora-ptalarbvorm/lib/huffdec.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/huffdec.c	2010-05-17 22:38:07 UTC (rev 17229)
+++ experimental/derf/theora-ptalarbvorm/lib/huffdec.c	2010-05-18 15:59:52 UTC (rev 17230)
@@ -46,22 +46,28 @@
   It also has very large memory requirements, which impairs cache coherency.
 
   We store the tree packed in an array of 16-bit integers (words).
-  Each node of the tree consists of a single word, followed consecutiably by
-   zero or more indexes of its children.
-  Let n be the value of this first word.
+  These come in two flavors.
+  The first is a "bitree", or binary tree, where each node always has exactly
+   two children, stored in consecutive words.
+  If the child is negative or zero, then it is a leaf node.
+  These are stored directly in the child pointer to save space, since they only
+   require a single word.
+  The token value is -leaf.
+  If the child is positive, then it is the index of another internal node in
+   the table.
 
-  If n is negative or zero, then this is a leaf node.
-  The token in the leaf node is (-n&255).
-  The number of bits that should have been read to reach this child, starting
-   from the current node, is (-n>>8).
-
-  If n is positive, then it is the number of bits that need to be read to
-   traverse the node.
+  The second flavor is a general "tree", where each node consists of a single
+   word, followed consecutively by two or more indices of its children.
+  Let n be the value of this first word.
+  This is the number of bits that need to be read to traverse the node, and
+   must be positive.
   1<<n entries follow in the array, each an index to a child node.
-  As an optimization to save space, leaf nodes are stored directly, instead of
-   storing an index, since they only require a single word.
   If a leaf node would have been before reading n bits, then it is duplicated
    the necessary number of times in this table.
+  Leaf nodes pack both a token value and their actual depth in the tree.
+  The token in the leaf node is (-leaf&255).
+  The number of bits that need to be consumed to reach the leaf, starting from
+   the current node, is (-leaf>>8).
 
   @ARTICLE{Hash95,
     author="Reza Hashemian",
@@ -177,46 +183,7 @@
 #define OC_ROOT_HUFF_SLUSH (3)
 
 
-/*Determines the size in words of a Huffman tree node that represents a
-   subtree of depth _nbits.
-  _nbits: The depth of the subtree.
-          If this is <=0, the node is a leaf node.
-          Otherwise 1<<_nbits pointers are allocated for children.
-  Return: The number of words required to store the node.*/
-static size_t oc_huff_node_size(int _nbits){
-  return _nbits>0?1+(1<<_nbits):1;
-}
 
-
-/*Determines the size in words of a Huffman subtree.
-  _tree: The complete Huffman tree.
-  _node: The index of the root of the desired subtree.
-  Return: The number of words required to store the tree.*/
-static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){
-  size_t size;
-  int    n;
-  n=_tree[_node];
-  size=oc_huff_node_size(n);
-  if(n>0){
-    int nchildren;
-    int i;
-    nchildren=1<<n;
-    i=0;
-    do{
-      int child;
-      child=_tree[_node+i+1];
-      if(child<=0)i+=1<<n-(-child>>8);
-      else{
-        size+=oc_huff_tree_size(_tree,child);
-        i++;
-      }
-    }
-    while(i<nchildren);
-  }
-  return size;
-}
-
-
 /*Unpacks a sub-tree from the given buffer.
   _opb:      The buffer to unpack from.
   _bitree:   The tree to store the sub-tree in.
@@ -240,13 +207,12 @@
   }
   /*Read an internal node:*/
   if(!bits){
-    *_binode=binode+3;
-    if(binode+3>_nbinodes)return 0;
-    _bitree[binode+0]=1;
+    *_binode=binode+2;
+    if(binode+2>_nbinodes)return 0;
+    _bitree[binode+0]=
+     (ogg_int16_t)oc_huff_tree_unpack(_opb,_bitree,_nbinodes,_binode);
     _bitree[binode+1]=
      (ogg_int16_t)oc_huff_tree_unpack(_opb,_bitree,_nbinodes,_binode);
-    _bitree[binode+2]=
-     (ogg_int16_t)oc_huff_tree_unpack(_opb,_bitree,_nbinodes,_binode);
   }
   /*Read a leaf node:*/
   else{
@@ -263,79 +229,102 @@
     token=OC_DCT_TOKEN_MAP[bits];
     if(ntokens<=1){
       /*If we only have the one leaf, return it directly.*/
-      if(binode>0)return -(1<<8|token);
-      /*Unless we are the root of the tree; then we have to store it as a
-         node explicitly.*/
-      *_binode=binode+1;
-      if(binode+1>_nbinodes)return 0;
-      _bitree[binode]=-token;
+      return -token;
     }
     else{
+      int last_row_sz;
       int i;
       int j;
       /*Fill in a complete binary tree pointing to the internal nodes.*/
-      *_binode=binode+3*(ntokens-1);
-      if(binode+3*(ntokens-1)>_nbinodes)return 0;
-      for(i=1;i<ntokens>>1;i<<=1){
+      *_binode=binode+2*(ntokens-1);
+      if(binode+2*(ntokens-1)>_nbinodes)return 0;
+      last_row_sz=ntokens>>1;
+      for(i=1;i<last_row_sz;i<<=1){
         int child;
-        child=binode+3*(2*i-1);
+        child=binode+2*(2*i-1);
         for(j=0;j<i;j++){
-          _bitree[binode+3*(i-1+j)+0]=1;
-          _bitree[binode+3*(i-1+j)+1]=(ogg_int16_t)child;
-          child+=3;
-          _bitree[binode+3*(i-1+j)+2]=(ogg_int16_t)child;
-          child+=3;
+          _bitree[binode+2*(i-1+j)+0]=(ogg_int16_t)child;
+          child+=2;
+          _bitree[binode+2*(i-1+j)+1]=(ogg_int16_t)child;
+          child+=2;
         }
       }
       /*And now the last row pointing to the leaf nodes.*/
-      for(i=j=0;j<ntokens>>1;j++){
-        _bitree[binode+3*((ntokens>>1)-1+j)+0]=1;
-        _bitree[binode+3*((ntokens>>1)-1+j)+1]=(ogg_int16_t)-(1<<8|token+i++);
-        _bitree[binode+3*((ntokens>>1)-1+j)+2]=(ogg_int16_t)-(1<<8|token+i++);
+      for(i=j=0;j<last_row_sz;j++){
+        _bitree[binode+2*(last_row_sz-1+j)+0]=(ogg_int16_t)-(token+i++);
+        _bitree[binode+2*(last_row_sz-1+j)+1]=(ogg_int16_t)-(token+i++);
       }
     }
   }
-  return binode;
+  return binode>0?binode:1;
 }
 
 /*Finds the depth of shortest branch of the given sub-tree.
-  The tree must be binary.
   _bitree: A packed binary Huffman tree.
   _binode: The root of the given sub-tree.
-           _bitree[_binode] must be <=1.
   Return: The smallest depth of a leaf node in this sub-tree.
           0 indicates this sub-tree is a leaf node.*/
-static int oc_huff_tree_mindepth(const ogg_int16_t *_bitree,int _binode){
+static int oc_huff_bitree_mindepth(const ogg_int16_t *_bitree,int _binode){
   int child;
   int depth0;
   int depth1;
-  if(_bitree[_binode]<=0)return 0;
+  child=_bitree[_binode+0];
+  depth0=child<=0?0:oc_huff_bitree_mindepth(_bitree,child);
   child=_bitree[_binode+1];
-  depth0=child<=0?0:oc_huff_tree_mindepth(_bitree,child);
-  child=_bitree[_binode+2];
-  depth1=child<=0?0:oc_huff_tree_mindepth(_bitree,child);
+  depth1=child<=0?0:oc_huff_bitree_mindepth(_bitree,child);
   return OC_MINI(depth0,depth1)+1;
 }
 
 /*Finds the number of internal nodes at a given depth, plus the number of
    leaves at that depth or shallower.
-  The tree must be binary.
   _bitree: A packed binary Huffman tree.
   _binode: The root of the given sub-tree.
-           _bitree[_binode] must be <=1.
   Return: The number of unique entries that would be contained in a jump table
            of the given depth.*/
-static int oc_huff_tree_occupancy(const ogg_int16_t *_bitree,int _binode,
+static int oc_huff_bitree_occupancy(const ogg_int16_t *_bitree,int _binode,
  int _depth){
-  if(_depth<=0||_bitree[_binode]<=0)return 1;
-  else{
-    int child1;
-    int child2;
-    child1=_bitree[_binode+1];
-    child2=_bitree[_binode+2];
-    return (child1<=0?1:oc_huff_tree_occupancy(_bitree,child1,_depth-1))+
-     (child2<=0?1:oc_huff_tree_occupancy(_bitree,child2,_depth-1));
+  int child1;
+  int child2;
+  if(_depth<=0)return 1;
+  child1=_bitree[_binode+0];
+  child2=_bitree[_binode+1];
+  return (child1<=0?1:oc_huff_bitree_occupancy(_bitree,child1,_depth-1))+
+   (child2<=0?1:oc_huff_bitree_occupancy(_bitree,child2,_depth-1));
+}
+
+/*Determines the size in words of a Huffman tree node that represents a
+   subtree of depth _nbits.
+  _nbits: The depth of the subtree.
+          This must be greater than zero.
+  Return: The number of words required to store the node.*/
+static size_t oc_huff_node_size(int _nbits){
+  return 1+(1<<_nbits);
+}
+
+/*Determines the size in words of a Huffman subtree.
+  _tree: The complete Huffman tree.
+  _node: The index of the root of the desired subtree.
+  Return: The number of words required to store the tree.*/
+static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){
+  size_t size;
+  int    nchildren;
+  int    n;
+  int    i;
+  n=_tree[_node];
+  size=oc_huff_node_size(n);
+  nchildren=1<<n;
+  i=0;
+  do{
+    int child;
+    child=_tree[_node+i+1];
+    if(child<=0)i+=1<<n-(-child>>8);
+    else{
+      size+=oc_huff_tree_size(_tree,child);
+      i++;
+    }
   }
+  while(i<nchildren);
+  return size;
 }
 
 /*Makes a copy of the given Huffman sub-tree.
@@ -345,64 +334,62 @@
   Return: The index of the first available slot after the sub-tree copy.*/
 static int oc_huff_tree_copy(ogg_int16_t *_dst,
  const ogg_int16_t *_src,int _node){
+  int nchildren;
+  int ret;
+  int i;
   int n;
   n=_src[_node];
   _dst[_node++]=(ogg_int16_t)n;
-  if(n<=0)return _node;
-  else{
-    int nchildren;
-    int ret;
-    int i;
-    nchildren=1<<n;
-    ret=_node+nchildren;
-    for(i=0;i<nchildren;i++){
-      int child;
-      child=_src[_node+i];
-      _dst[_node+i]=(ogg_int16_t)child;
-      if(child>0){
-        /*Note: This assumes that storage for children is in order, and
-           consecutive.*/
-        ret=oc_huff_tree_copy(_dst,_src,child);
-      }
+  nchildren=1<<n;
+  ret=_node+nchildren;
+  for(i=0;i<nchildren;i++){
+    int child;
+    child=_src[_node+i];
+    _dst[_node+i]=(ogg_int16_t)child;
+    if(child>0){
+      /*Note: The assignment to ret here assumes that storage for children is
+         in order, and consecutive (e.g., child==ret before the call).*/
+      ret=oc_huff_tree_copy(_dst,_src,child);
     }
-    return ret;
   }
+  return ret;
 }
 
-static size_t oc_huff_tree_collapse_size(ogg_int16_t *_bitree,int _binode,
+/*Computes the number of words of storage that will be required by
+   oc_huff_tree_collapse() for the given sub-tree.
+  _bitree: The binary tree to collapse.
+  _binode: The index of the root of the sub-tree to fill it with.
+           _bitree[_binode] must ==1.
+  _depth:  The depth of the nodes that our parent in the collapsed sub-tree
+            will fill its table with.
+           Pass 0 for the root of the tree.
+  Return: The number of words required to store the collapsed tree.*/
+static size_t oc_huff_bitree_collapse_size(ogg_int16_t *_bitree,int _binode,
  int _depth){
   size_t size;
-  int    mindepth;
-  int    depth;
-  int    threshold;
-  int    loccupancy;
-  int    occupancy;
   int    child1;
   int    child2;
-  int    n;
-  n=_bitree[_binode];
-  if(n>0&&_depth>0){
-    child1=_bitree[_binode+1];
-    child2=_bitree[_binode+2];
-    return (child1<=0?0:oc_huff_tree_collapse_size(_bitree,child1,_depth-1))+
-     (child2<=0?0:oc_huff_tree_collapse_size(_bitree,child2,_depth-1));
+  if(_depth>0)size=0;
+  else{
+    int mindepth;
+    int slush;
+    int loccupancy;
+    int occupancy;
+    _depth=mindepth=oc_huff_bitree_mindepth(_bitree,_binode);
+    occupancy=1<<mindepth;
+    slush=_binode>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH;
+    do{
+      loccupancy=occupancy;
+      occupancy=oc_huff_bitree_occupancy(_bitree,_binode,++_depth);
+    }
+    while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(_depth-slush,0));
+    _depth--;
+    size=oc_huff_node_size(_depth);
   }
-  depth=mindepth=oc_huff_tree_mindepth(_bitree,_binode);
-  occupancy=1<<mindepth;
-  do{
-    loccupancy=occupancy;
-    occupancy=oc_huff_tree_occupancy(_bitree,_binode,++depth);
-    threshold=1<<OC_MAXI(depth-(_binode>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH),0);
-  }
-  while(occupancy>loccupancy&&occupancy>=threshold);
-  depth--;
-  size=oc_huff_node_size(depth);
-  if(depth>0){
-    child1=_bitree[_binode+1];
-    child2=_bitree[_binode+2];
-    size+=child1<=0?0:oc_huff_tree_collapse_size(_bitree,child1,depth-1);
-    size+=child2<=0?0:oc_huff_tree_collapse_size(_bitree,child2,depth-1);
-  }
+  child1=_bitree[_binode+0];
+  child2=_bitree[_binode+1];
+  if(child1>0)size+=oc_huff_bitree_collapse_size(_bitree,child1,_depth-1);
+  if(child2>0)size+=oc_huff_bitree_collapse_size(_bitree,child2,_depth-1);
   return size;
 }
 
@@ -427,31 +414,23 @@
             parent.*/
 static void oc_huff_node_fill(ogg_int16_t *_tree,int *_node,int _offs,
  const ogg_int16_t *_bitree,int _binode,int _level,int _depth){
-  int leaf;
-  int n;
-  int i;
-  n=_bitree[_binode];
-  if(n<=0){
-    leaf=-((_depth-_level<<8)|(-n&255));
-    for(i=0;i<1<<_level;i++)_tree[_offs+i]=(ogg_int16_t)leaf;
-  }
-  else if(_level<=0){
-    _tree[_offs]=oc_huff_tree_collapse(_tree,_node,_bitree,_binode);
-  }
+  if(_level<=0)_tree[_offs]=oc_huff_tree_collapse(_tree,_node,_bitree,_binode);
   else{
     int child1;
     int child2;
+    int leaf;
+    int i;
     _level--;
-    child1=_bitree[_binode+1];
+    child1=_bitree[_binode+0];
     if(child1<=0){
-      leaf=-((_depth-_level<<8)|(-child1&255));
+      leaf=-(_depth-_level<<8|-child1);
       for(i=0;i<1<<_level;i++)_tree[_offs+i]=(ogg_int16_t)leaf;
     }
     else oc_huff_node_fill(_tree,_node,_offs,_bitree,child1,_level,_depth);
     _offs+=1<<_level;
-    child2=_bitree[_binode+2];
+    child2=_bitree[_binode+1];
     if(child2<=0){
-      leaf=-((_depth-_level<<8)|(-child2&255));
+      leaf=-(_depth-_level<<8|-child2);
       for(i=0;i<1<<_level;i++)_tree[_offs+i]=(ogg_int16_t)leaf;
     }
     else oc_huff_node_fill(_tree,_node,_offs,_bitree,child2,_level,_depth);
@@ -474,27 +453,21 @@
   int depth;
   int loccupancy;
   int occupancy;
-  int threshold;
+  int slush;
   int node;
-  depth=mindepth=oc_huff_tree_mindepth(_bitree,_binode);
+  depth=mindepth=oc_huff_bitree_mindepth(_bitree,_binode);
   occupancy=1<<mindepth;
+  slush=_binode>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH;
   do{
     loccupancy=occupancy;
-    occupancy=oc_huff_tree_occupancy(_bitree,_binode,++depth);
-    threshold=1<<OC_MAXI(depth-(_binode>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH),0);
+    occupancy=oc_huff_bitree_occupancy(_bitree,_binode,++depth);
   }
-  while(occupancy>loccupancy&&occupancy>=threshold);
+  while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-slush,0));
   depth--;
   node=*_node;
-  if(depth<=0){
-    _tree[node]=_bitree[_binode];
-    *_node=node+1;
-  }
-  else{
-    _tree[node]=depth;
-    *_node=node+1+(1<<depth);
-    oc_huff_node_fill(_tree,_node,node+1,_bitree,_binode,depth,depth);
-  }
+  _tree[node]=depth;
+  *_node=node+1+(1<<depth);
+  oc_huff_node_fill(_tree,_node,node+1,_bitree,_binode,depth,depth);
   return node;
 }
 
@@ -507,21 +480,33 @@
  ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
   int i;
   for(i=0;i<TH_NHUFFMAN_TABLES;i++){
-    ogg_int16_t   nodes[255*3];
+    ogg_int16_t   nodes[255*2];
     ogg_int16_t  *tree;
     size_t        size;
     int           node;
+    int           root;
     /*Unpack the full tree into a temporary buffer.*/
     node=0;
-    oc_huff_tree_unpack(_opb,nodes,sizeof(nodes)/sizeof(*nodes),&node);
+    root=oc_huff_tree_unpack(_opb,nodes,sizeof(nodes)/sizeof(*nodes),&node);
     if(node>sizeof(nodes)/sizeof(*nodes))return TH_EBADHEADER;
     /*Figure out how big the collapsed tree will be.*/
-    size=oc_huff_tree_collapse_size(nodes,0,0);
+    size=root<=0?3:oc_huff_bitree_collapse_size(nodes,0,0);
     tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree));
     if(tree==NULL)return TH_EFAULT;
-    /*And collapse it.*/
-    node=0;
-    oc_huff_tree_collapse(tree,&node,nodes,0);
+    /*It's legal to have a tree with just a single node, which requires no bits
+       to decode and always returns the same token.
+      However, no encoder actually does this (yet).
+      To avoid a special case in oc_huff_token_decode(), we construct a tree
+       that looks ahead one bit, and then advances the stream 0 bits.*/
+    if(root<=0){
+      tree[0]=1;
+      tree[1]=tree[2]=-(0<<8|-root);
+    }
+    /*Otherwise, collapse the tree.*/
+    else{
+      node=0;
+      oc_huff_tree_collapse(tree,&node,nodes,0);
+    }
     _nodes[i]=tree;
   }
   return 0;
@@ -571,34 +556,12 @@
   long                 bits;
   int                  node;
   int                  n;
-  n=_tree[0];
   ptr=_opb->ptr;
   window=_opb->window;
   stop=_opb->stop;
   available=_opb->bits;
-  /*Handle the case of a codebook with a single value.
-    Branch prediction should make this esoteric case free.*/
-  if(n<=0)return -n&255;
-  if(n>available){
-    for(;;){
-      /*We don't bother setting eof because we won't check for it after we've
-         started decoding DCT tokens.*/
-      if(ptr>=stop){
-        available=OC_LOTS_OF_BITS;
-        break;
-      }
-      if(available>OC_PB_WINDOW_SIZE-8)break;
-      available+=8;
-      window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
-    }
-    /*Note: We never request more than 24 bits, so there's no need to fill in
-       the last partial byte here.*/
-  }
-  bits=window>>OC_PB_WINDOW_SIZE-n;
-  node=_tree[1+bits];
-  while(node>0){
-    window<<=n;
-    available-=n;
+  node=0;
+  for(;;){
     n=_tree[node];
     if(n>available){
       for(;;){
@@ -617,6 +580,9 @@
     }
     bits=window>>OC_PB_WINDOW_SIZE-n;
     node=_tree[node+1+bits];
+    if(node<=0)break;
+    window<<=n;
+    available-=n;
   }
   node=-node;
   n=node>>8;



More information about the commits mailing list