[xiph-commits] r17230 - experimental/derf/theora-ptalarbvorm/lib
tterribe at svn.xiph.org
tterribe at svn.xiph.org
Tue May 18 08:59:52 PDT 2010
Author: tterribe
Date: 2010-05-18 08:59:52 -0700 (Tue, 18 May 2010)
New Revision: 17230
Modified:
experimental/derf/theora-ptalarbvorm/lib/huffdec.c
Log:
Remove a bunch of the special-case handling for a Huffman tree with one node
(e.g., that decodes a single, constant token value using 0 bits).
This simplifies the tree-unpacking code a lot, and removes a special case from
the actual token decode.
This special case was basically free on x86 thanks to branch prediction, but
eliminating it reduces the fast-path cycle count on c64x by almost 32%.
Modified: experimental/derf/theora-ptalarbvorm/lib/huffdec.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/huffdec.c 2010-05-17 22:38:07 UTC (rev 17229)
+++ experimental/derf/theora-ptalarbvorm/lib/huffdec.c 2010-05-18 15:59:52 UTC (rev 17230)
@@ -46,22 +46,28 @@
It also has very large memory requirements, which impairs cache coherency.
We store the tree packed in an array of 16-bit integers (words).
- Each node of the tree consists of a single word, followed consecutiably by
- zero or more indexes of its children.
- Let n be the value of this first word.
+ These come in two flavors.
+ The first is a "bitree", or binary tree, where each node always has exactly
+ two children, stored in consecutive words.
+ If the child is negative or zero, then it is a leaf node.
+ These are stored directly in the child pointer to save space, since they only
+ require a single word.
+ The token value is -leaf.
+ If the child is positive, then it is the index of another internal node in
+ the table.
- If n is negative or zero, then this is a leaf node.
- The token in the leaf node is (-n&255).
- The number of bits that should have been read to reach this child, starting
- from the current node, is (-n>>8).
-
- If n is positive, then it is the number of bits that need to be read to
- traverse the node.
+ The second flavor is a general "tree", where each node consists of a single
+ word, followed consecutively by two or more indices of its children.
+ Let n be the value of this first word.
+ This is the number of bits that need to be read to traverse the node, and
+ must be positive.
1<<n entries follow in the array, each an index to a child node.
- As an optimization to save space, leaf nodes are stored directly, instead of
- storing an index, since they only require a single word.
If a leaf node would have been before reading n bits, then it is duplicated
the necessary number of times in this table.
+ Leaf nodes pack both a token value and their actual depth in the tree.
+ The token in the leaf node is (-leaf&255).
+ The number of bits that need to be consumed to reach the leaf, starting from
+ the current node, is (-leaf>>8).
@ARTICLE{Hash95,
author="Reza Hashemian",
@@ -177,46 +183,7 @@
#define OC_ROOT_HUFF_SLUSH (3)
-/*Determines the size in words of a Huffman tree node that represents a
- subtree of depth _nbits.
- _nbits: The depth of the subtree.
- If this is <=0, the node is a leaf node.
- Otherwise 1<<_nbits pointers are allocated for children.
- Return: The number of words required to store the node.*/
-static size_t oc_huff_node_size(int _nbits){
- return _nbits>0?1+(1<<_nbits):1;
-}
-
-/*Determines the size in words of a Huffman subtree.
- _tree: The complete Huffman tree.
- _node: The index of the root of the desired subtree.
- Return: The number of words required to store the tree.*/
-static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){
- size_t size;
- int n;
- n=_tree[_node];
- size=oc_huff_node_size(n);
- if(n>0){
- int nchildren;
- int i;
- nchildren=1<<n;
- i=0;
- do{
- int child;
- child=_tree[_node+i+1];
- if(child<=0)i+=1<<n-(-child>>8);
- else{
- size+=oc_huff_tree_size(_tree,child);
- i++;
- }
- }
- while(i<nchildren);
- }
- return size;
-}
-
-
/*Unpacks a sub-tree from the given buffer.
_opb: The buffer to unpack from.
_bitree: The tree to store the sub-tree in.
@@ -240,13 +207,12 @@
}
/*Read an internal node:*/
if(!bits){
- *_binode=binode+3;
- if(binode+3>_nbinodes)return 0;
- _bitree[binode+0]=1;
+ *_binode=binode+2;
+ if(binode+2>_nbinodes)return 0;
+ _bitree[binode+0]=
+ (ogg_int16_t)oc_huff_tree_unpack(_opb,_bitree,_nbinodes,_binode);
_bitree[binode+1]=
(ogg_int16_t)oc_huff_tree_unpack(_opb,_bitree,_nbinodes,_binode);
- _bitree[binode+2]=
- (ogg_int16_t)oc_huff_tree_unpack(_opb,_bitree,_nbinodes,_binode);
}
/*Read a leaf node:*/
else{
@@ -263,79 +229,102 @@
token=OC_DCT_TOKEN_MAP[bits];
if(ntokens<=1){
/*If we only have the one leaf, return it directly.*/
- if(binode>0)return -(1<<8|token);
- /*Unless we are the root of the tree; then we have to store it as a
- node explicitly.*/
- *_binode=binode+1;
- if(binode+1>_nbinodes)return 0;
- _bitree[binode]=-token;
+ return -token;
}
else{
+ int last_row_sz;
int i;
int j;
/*Fill in a complete binary tree pointing to the internal nodes.*/
- *_binode=binode+3*(ntokens-1);
- if(binode+3*(ntokens-1)>_nbinodes)return 0;
- for(i=1;i<ntokens>>1;i<<=1){
+ *_binode=binode+2*(ntokens-1);
+ if(binode+2*(ntokens-1)>_nbinodes)return 0;
+ last_row_sz=ntokens>>1;
+ for(i=1;i<last_row_sz;i<<=1){
int child;
- child=binode+3*(2*i-1);
+ child=binode+2*(2*i-1);
for(j=0;j<i;j++){
- _bitree[binode+3*(i-1+j)+0]=1;
- _bitree[binode+3*(i-1+j)+1]=(ogg_int16_t)child;
- child+=3;
- _bitree[binode+3*(i-1+j)+2]=(ogg_int16_t)child;
- child+=3;
+ _bitree[binode+2*(i-1+j)+0]=(ogg_int16_t)child;
+ child+=2;
+ _bitree[binode+2*(i-1+j)+1]=(ogg_int16_t)child;
+ child+=2;
}
}
/*And now the last row pointing to the leaf nodes.*/
- for(i=j=0;j<ntokens>>1;j++){
- _bitree[binode+3*((ntokens>>1)-1+j)+0]=1;
- _bitree[binode+3*((ntokens>>1)-1+j)+1]=(ogg_int16_t)-(1<<8|token+i++);
- _bitree[binode+3*((ntokens>>1)-1+j)+2]=(ogg_int16_t)-(1<<8|token+i++);
+ for(i=j=0;j<last_row_sz;j++){
+ _bitree[binode+2*(last_row_sz-1+j)+0]=(ogg_int16_t)-(token+i++);
+ _bitree[binode+2*(last_row_sz-1+j)+1]=(ogg_int16_t)-(token+i++);
}
}
}
- return binode;
+ return binode>0?binode:1;
}
/*Finds the depth of shortest branch of the given sub-tree.
- The tree must be binary.
_bitree: A packed binary Huffman tree.
_binode: The root of the given sub-tree.
- _bitree[_binode] must be <=1.
Return: The smallest depth of a leaf node in this sub-tree.
0 indicates this sub-tree is a leaf node.*/
-static int oc_huff_tree_mindepth(const ogg_int16_t *_bitree,int _binode){
+static int oc_huff_bitree_mindepth(const ogg_int16_t *_bitree,int _binode){
int child;
int depth0;
int depth1;
- if(_bitree[_binode]<=0)return 0;
+ child=_bitree[_binode+0];
+ depth0=child<=0?0:oc_huff_bitree_mindepth(_bitree,child);
child=_bitree[_binode+1];
- depth0=child<=0?0:oc_huff_tree_mindepth(_bitree,child);
- child=_bitree[_binode+2];
- depth1=child<=0?0:oc_huff_tree_mindepth(_bitree,child);
+ depth1=child<=0?0:oc_huff_bitree_mindepth(_bitree,child);
return OC_MINI(depth0,depth1)+1;
}
/*Finds the number of internal nodes at a given depth, plus the number of
leaves at that depth or shallower.
- The tree must be binary.
_bitree: A packed binary Huffman tree.
_binode: The root of the given sub-tree.
- _bitree[_binode] must be <=1.
Return: The number of unique entries that would be contained in a jump table
of the given depth.*/
-static int oc_huff_tree_occupancy(const ogg_int16_t *_bitree,int _binode,
+static int oc_huff_bitree_occupancy(const ogg_int16_t *_bitree,int _binode,
int _depth){
- if(_depth<=0||_bitree[_binode]<=0)return 1;
- else{
- int child1;
- int child2;
- child1=_bitree[_binode+1];
- child2=_bitree[_binode+2];
- return (child1<=0?1:oc_huff_tree_occupancy(_bitree,child1,_depth-1))+
- (child2<=0?1:oc_huff_tree_occupancy(_bitree,child2,_depth-1));
+ int child1;
+ int child2;
+ if(_depth<=0)return 1;
+ child1=_bitree[_binode+0];
+ child2=_bitree[_binode+1];
+ return (child1<=0?1:oc_huff_bitree_occupancy(_bitree,child1,_depth-1))+
+ (child2<=0?1:oc_huff_bitree_occupancy(_bitree,child2,_depth-1));
+}
+
+/*Determines the size in words of a Huffman tree node that represents a
+ subtree of depth _nbits.
+ _nbits: The depth of the subtree.
+ This must be greater than zero.
+ Return: The number of words required to store the node.*/
+static size_t oc_huff_node_size(int _nbits){
+ return 1+(1<<_nbits);
+}
+
+/*Determines the size in words of a Huffman subtree.
+ _tree: The complete Huffman tree.
+ _node: The index of the root of the desired subtree.
+ Return: The number of words required to store the tree.*/
+static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){
+ size_t size;
+ int nchildren;
+ int n;
+ int i;
+ n=_tree[_node];
+ size=oc_huff_node_size(n);
+ nchildren=1<<n;
+ i=0;
+ do{
+ int child;
+ child=_tree[_node+i+1];
+ if(child<=0)i+=1<<n-(-child>>8);
+ else{
+ size+=oc_huff_tree_size(_tree,child);
+ i++;
+ }
}
+ while(i<nchildren);
+ return size;
}
/*Makes a copy of the given Huffman sub-tree.
@@ -345,64 +334,62 @@
Return: The index of the first available slot after the sub-tree copy.*/
static int oc_huff_tree_copy(ogg_int16_t *_dst,
const ogg_int16_t *_src,int _node){
+ int nchildren;
+ int ret;
+ int i;
int n;
n=_src[_node];
_dst[_node++]=(ogg_int16_t)n;
- if(n<=0)return _node;
- else{
- int nchildren;
- int ret;
- int i;
- nchildren=1<<n;
- ret=_node+nchildren;
- for(i=0;i<nchildren;i++){
- int child;
- child=_src[_node+i];
- _dst[_node+i]=(ogg_int16_t)child;
- if(child>0){
- /*Note: This assumes that storage for children is in order, and
- consecutive.*/
- ret=oc_huff_tree_copy(_dst,_src,child);
- }
+ nchildren=1<<n;
+ ret=_node+nchildren;
+ for(i=0;i<nchildren;i++){
+ int child;
+ child=_src[_node+i];
+ _dst[_node+i]=(ogg_int16_t)child;
+ if(child>0){
+ /*Note: The assignment to ret here assumes that storage for children is
+ in order, and consecutive (e.g., child==ret before the call).*/
+ ret=oc_huff_tree_copy(_dst,_src,child);
}
- return ret;
}
+ return ret;
}
-static size_t oc_huff_tree_collapse_size(ogg_int16_t *_bitree,int _binode,
+/*Computes the number of words of storage that will be required by
+ oc_huff_tree_collapse() for the given sub-tree.
+ _bitree: The binary tree to collapse.
+ _binode: The index of the root of the sub-tree to fill it with.
+ _bitree[_binode] must ==1.
+ _depth: The depth of the nodes that our parent in the collapsed sub-tree
+ will fill its table with.
+ Pass 0 for the root of the tree.
+ Return: The number of words required to store the collapsed tree.*/
+static size_t oc_huff_bitree_collapse_size(ogg_int16_t *_bitree,int _binode,
int _depth){
size_t size;
- int mindepth;
- int depth;
- int threshold;
- int loccupancy;
- int occupancy;
int child1;
int child2;
- int n;
- n=_bitree[_binode];
- if(n>0&&_depth>0){
- child1=_bitree[_binode+1];
- child2=_bitree[_binode+2];
- return (child1<=0?0:oc_huff_tree_collapse_size(_bitree,child1,_depth-1))+
- (child2<=0?0:oc_huff_tree_collapse_size(_bitree,child2,_depth-1));
+ if(_depth>0)size=0;
+ else{
+ int mindepth;
+ int slush;
+ int loccupancy;
+ int occupancy;
+ _depth=mindepth=oc_huff_bitree_mindepth(_bitree,_binode);
+ occupancy=1<<mindepth;
+ slush=_binode>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH;
+ do{
+ loccupancy=occupancy;
+ occupancy=oc_huff_bitree_occupancy(_bitree,_binode,++_depth);
+ }
+ while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(_depth-slush,0));
+ _depth--;
+ size=oc_huff_node_size(_depth);
}
- depth=mindepth=oc_huff_tree_mindepth(_bitree,_binode);
- occupancy=1<<mindepth;
- do{
- loccupancy=occupancy;
- occupancy=oc_huff_tree_occupancy(_bitree,_binode,++depth);
- threshold=1<<OC_MAXI(depth-(_binode>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH),0);
- }
- while(occupancy>loccupancy&&occupancy>=threshold);
- depth--;
- size=oc_huff_node_size(depth);
- if(depth>0){
- child1=_bitree[_binode+1];
- child2=_bitree[_binode+2];
- size+=child1<=0?0:oc_huff_tree_collapse_size(_bitree,child1,depth-1);
- size+=child2<=0?0:oc_huff_tree_collapse_size(_bitree,child2,depth-1);
- }
+ child1=_bitree[_binode+0];
+ child2=_bitree[_binode+1];
+ if(child1>0)size+=oc_huff_bitree_collapse_size(_bitree,child1,_depth-1);
+ if(child2>0)size+=oc_huff_bitree_collapse_size(_bitree,child2,_depth-1);
return size;
}
@@ -427,31 +414,23 @@
parent.*/
static void oc_huff_node_fill(ogg_int16_t *_tree,int *_node,int _offs,
const ogg_int16_t *_bitree,int _binode,int _level,int _depth){
- int leaf;
- int n;
- int i;
- n=_bitree[_binode];
- if(n<=0){
- leaf=-((_depth-_level<<8)|(-n&255));
- for(i=0;i<1<<_level;i++)_tree[_offs+i]=(ogg_int16_t)leaf;
- }
- else if(_level<=0){
- _tree[_offs]=oc_huff_tree_collapse(_tree,_node,_bitree,_binode);
- }
+ if(_level<=0)_tree[_offs]=oc_huff_tree_collapse(_tree,_node,_bitree,_binode);
else{
int child1;
int child2;
+ int leaf;
+ int i;
_level--;
- child1=_bitree[_binode+1];
+ child1=_bitree[_binode+0];
if(child1<=0){
- leaf=-((_depth-_level<<8)|(-child1&255));
+ leaf=-(_depth-_level<<8|-child1);
for(i=0;i<1<<_level;i++)_tree[_offs+i]=(ogg_int16_t)leaf;
}
else oc_huff_node_fill(_tree,_node,_offs,_bitree,child1,_level,_depth);
_offs+=1<<_level;
- child2=_bitree[_binode+2];
+ child2=_bitree[_binode+1];
if(child2<=0){
- leaf=-((_depth-_level<<8)|(-child2&255));
+ leaf=-(_depth-_level<<8|-child2);
for(i=0;i<1<<_level;i++)_tree[_offs+i]=(ogg_int16_t)leaf;
}
else oc_huff_node_fill(_tree,_node,_offs,_bitree,child2,_level,_depth);
@@ -474,27 +453,21 @@
int depth;
int loccupancy;
int occupancy;
- int threshold;
+ int slush;
int node;
- depth=mindepth=oc_huff_tree_mindepth(_bitree,_binode);
+ depth=mindepth=oc_huff_bitree_mindepth(_bitree,_binode);
occupancy=1<<mindepth;
+ slush=_binode>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH;
do{
loccupancy=occupancy;
- occupancy=oc_huff_tree_occupancy(_bitree,_binode,++depth);
- threshold=1<<OC_MAXI(depth-(_binode>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH),0);
+ occupancy=oc_huff_bitree_occupancy(_bitree,_binode,++depth);
}
- while(occupancy>loccupancy&&occupancy>=threshold);
+ while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-slush,0));
depth--;
node=*_node;
- if(depth<=0){
- _tree[node]=_bitree[_binode];
- *_node=node+1;
- }
- else{
- _tree[node]=depth;
- *_node=node+1+(1<<depth);
- oc_huff_node_fill(_tree,_node,node+1,_bitree,_binode,depth,depth);
- }
+ _tree[node]=depth;
+ *_node=node+1+(1<<depth);
+ oc_huff_node_fill(_tree,_node,node+1,_bitree,_binode,depth,depth);
return node;
}
@@ -507,21 +480,33 @@
ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
int i;
for(i=0;i<TH_NHUFFMAN_TABLES;i++){
- ogg_int16_t nodes[255*3];
+ ogg_int16_t nodes[255*2];
ogg_int16_t *tree;
size_t size;
int node;
+ int root;
/*Unpack the full tree into a temporary buffer.*/
node=0;
- oc_huff_tree_unpack(_opb,nodes,sizeof(nodes)/sizeof(*nodes),&node);
+ root=oc_huff_tree_unpack(_opb,nodes,sizeof(nodes)/sizeof(*nodes),&node);
if(node>sizeof(nodes)/sizeof(*nodes))return TH_EBADHEADER;
/*Figure out how big the collapsed tree will be.*/
- size=oc_huff_tree_collapse_size(nodes,0,0);
+ size=root<=0?3:oc_huff_bitree_collapse_size(nodes,0,0);
tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree));
if(tree==NULL)return TH_EFAULT;
- /*And collapse it.*/
- node=0;
- oc_huff_tree_collapse(tree,&node,nodes,0);
+ /*It's legal to have a tree with just a single node, which requires no bits
+ to decode and always returns the same token.
+ However, no encoder actually does this (yet).
+ To avoid a special case in oc_huff_token_decode(), we construct a tree
+ that looks ahead one bit, and then advances the stream 0 bits.*/
+ if(root<=0){
+ tree[0]=1;
+ tree[1]=tree[2]=-(0<<8|-root);
+ }
+ /*Otherwise, collapse the tree.*/
+ else{
+ node=0;
+ oc_huff_tree_collapse(tree,&node,nodes,0);
+ }
_nodes[i]=tree;
}
return 0;
@@ -571,34 +556,12 @@
long bits;
int node;
int n;
- n=_tree[0];
ptr=_opb->ptr;
window=_opb->window;
stop=_opb->stop;
available=_opb->bits;
- /*Handle the case of a codebook with a single value.
- Branch prediction should make this esoteric case free.*/
- if(n<=0)return -n&255;
- if(n>available){
- for(;;){
- /*We don't bother setting eof because we won't check for it after we've
- started decoding DCT tokens.*/
- if(ptr>=stop){
- available=OC_LOTS_OF_BITS;
- break;
- }
- if(available>OC_PB_WINDOW_SIZE-8)break;
- available+=8;
- window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
- }
- /*Note: We never request more than 24 bits, so there's no need to fill in
- the last partial byte here.*/
- }
- bits=window>>OC_PB_WINDOW_SIZE-n;
- node=_tree[1+bits];
- while(node>0){
- window<<=n;
- available-=n;
+ node=0;
+ for(;;){
n=_tree[node];
if(n>available){
for(;;){
@@ -617,6 +580,9 @@
}
bits=window>>OC_PB_WINDOW_SIZE-n;
node=_tree[node+1+bits];
+ if(node<=0)break;
+ window<<=n;
+ available-=n;
}
node=-node;
n=node>>8;
More information about the commits
mailing list