[xiph-commits] r9326 - experimental/derf/theora-exp/lib
tterribe at motherfish-iii.xiph.org
tterribe at motherfish-iii.xiph.org
Mon May 30 08:32:53 PDT 2005
Author: tterribe
Date: 2005-05-30 08:32:45 -0700 (Mon, 30 May 2005)
New Revision: 9326
Modified:
experimental/derf/theora-exp/lib/decint.h
experimental/derf/theora-exp/lib/decode.c
experimental/derf/theora-exp/lib/huffdec.c
experimental/derf/theora-exp/lib/huffdec.h
Log:
Simple modification to Huffman decoding that lets us use fewer read operations.
The decoder now looks ahead by n bits, but can advance by less than n.
This gives us greater flexibility in building the decoder tree, since we no
longer need a complete binary tree of depth n to look ahead n bits.
The cost is some wasted space, but by duplicating no more than 50% of our
look-up table entries, we can nearly halve the number of reads required.
Simplistic benchmarks give a 3-4% speed-up in the total decode time.
Modified: experimental/derf/theora-exp/lib/decint.h
===================================================================
--- experimental/derf/theora-exp/lib/decint.h 2005-05-30 14:50:57 UTC (rev 9325)
+++ experimental/derf/theora-exp/lib/decint.h 2005-05-30 15:32:45 UTC (rev 9326)
@@ -109,5 +109,13 @@
*_ret is set to 0 on failure.
Return: 0 on success, or a negative value on failure.*/
extern int theora_read1(oggpack_buffer *_opb,long *_ret);
+/*Look ahead n bits, where n <= 31 for libogg1.
+ In the event that there are some bits remaining, but fewer than n, then the
+ remaining bits are returned, with the missing bits set to 0, and the
+ function succeeds.
+ The stream can be advanced afterwards with oggpackB_adv().
+ *_ret is set to 0 on failure.
+ Return: 0 on success, or a negative value on failure.*/
+extern int theora_look(oggpack_buffer *_opb,int _nbits,long *_ret);
#endif
Modified: experimental/derf/theora-exp/lib/decode.c
===================================================================
--- experimental/derf/theora-exp/lib/decode.c 2005-05-30 14:50:57 UTC (rev 9325)
+++ experimental/derf/theora-exp/lib/decode.c 2005-05-30 15:32:45 UTC (rev 9326)
@@ -64,7 +64,36 @@
return mask;
}
+/*Look ahead n bits, where n <= 31 for libogg1.
+ In the event that there are some bits remaining, but fewer than n, then the
+ remaining bits are returned, with the missing bits set to 0, and the
+ function succeeds.
+ The stream can be advanced afterwards with oggpackB_adv().
+ *_ret is set to 0 on failure.
+ Return: 0 on success, or a negative value on failure.*/
+int theora_look(oggpack_buffer *_opb,int _nbits,long *_ret){
+ int nbits;
+ *_ret=oggpackB_look(_opb,_nbits);
+ if(*_ret>=0)return 0;
+ /*libogg1 fails if we try to look past the end of the stream.
+ We might be looking ahead more bits than we actually need, however, and so
+ we must return the ones that are actually there.*/
+ /*There's no accessor for the storage field, which we need to figure out
+ how many bits _are_ left in the buffer (without resorting to trial and
+ error, which would be silly).*/
+ nbits=(_opb->storage<<3)-oggpackB_bits(_opb);
+ if(nbits>0){
+ /*If there are some bits left, return them.*/
+ *_ret=oggpackB_look(_opb,nbits)<<_nbits-nbits;
+ /*Success should be guaranteed.*/
+ return 0;
+ }
+ /*If there are no bits left, then we truly should fail.*/
+ *_ret=0;
+ return -1;
+}
+
/*The mode alphabets for the various mode coding schemes.
Scheme 0 uses a custom alphabet, which is not stored in this table.*/
static const int OC_MODE_ALPHABETS[7][OC_NMODES]={
Modified: experimental/derf/theora-exp/lib/huffdec.c
===================================================================
--- experimental/derf/theora-exp/lib/huffdec.c 2005-05-30 14:50:57 UTC (rev 9325)
+++ experimental/derf/theora-exp/lib/huffdec.c 2005-05-30 15:32:45 UTC (rev 9326)
@@ -8,6 +8,23 @@
#define _ogg_offsetof(_type,_field)\
((size_t)((char *)&((_type *)0)->_field-(char *)0))
+/*The log_2 of the size of a lookup table is allowed to grow to relative to
+ the number of unique nodes it contains.
+ E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is
+ wasted (each node will have an amortized cost of at most 20 bytes when using
+ 4-byte pointers).
+ Larger numbers can decode tokens with fewer read operations, while smaller
+ numbers may save more space (requiring as little as 8 bytes amortized per
+ node, though there will be more nodes).
+ With a sample file:
+ 32233473 read calls are required when no tree collapsing is done (100.0%).
+ 19269269 read calls are required when OC_HUFF_SLUSH is 0 (59.8%).
+ 11144969 read calls are required when OC_HUFF_SLUSH is 1 (34.6%).
+ 10538563 read calls are required when OC_HUFF_SLUSH is 2 (32.7%).
+ 10192578 read calls are required when OC_HUFF_SLUSH is 3 (31.6%).
+ Since a value of 1 gets us the vast majority of the speed-up with only a
+ small amount of wasted memory, this is what we use.*/
+#define OC_HUFF_SLUSH (1)
/*Allocates a Huffman tree node that represents a subtree of depth _nbits.
@@ -40,8 +57,12 @@
if(_node->nbits){
int nchildren;
int i;
+ int inext;
nchildren=1<<_node->nbits;
- for(i=0;i<nchildren;i++)oc_huff_tree_free(_node->nodes[i]);
+ for(i=0;i<nchildren;i=inext){
+ inext=i+(1<<_node->nbits-_node->nodes[i]->depth);
+ oc_huff_tree_free(_node->nodes[i]);
+ }
}
oc_huff_node_free(_node);
}
@@ -63,6 +84,7 @@
if(!bits){
int ret;
binode=oc_huff_node_alloc(1);
+ binode->depth=_depth>1;
ret=oc_huff_tree_unpack(_opb,binode->nodes,_depth);
if(ret>=0)ret=oc_huff_tree_unpack(_opb,binode->nodes+1,_depth);
if(ret<0){
@@ -74,6 +96,7 @@
else{
if(theora_read(_opb,OC_NDCT_TOKEN_BITS,&bits)<0)return OC_BADHEADER;
binode=oc_huff_node_alloc(0);
+ binode->depth=_depth>1;
binode->token=(unsigned char)bits;
}
*_binode=binode;
@@ -89,12 +112,33 @@
static int oc_huff_tree_mindepth(oc_huff_node *_binode){
int depth0;
int depth1;
+ int cdepth;
if(_binode->nbits==0)return 0;
depth0=oc_huff_tree_mindepth(_binode->nodes[0]);
depth1=oc_huff_tree_mindepth(_binode->nodes[1]);
return OC_MINI(depth0,depth1)+1;
}
+/*Finds the number of internal nodes at a given depth, plus the number of
+ leaves at that depth or shallower.
+ The tree must be binary.
+ _binode: The root of the given sub-tree.
+ _binode->nbits must be 0 or 1.
+ Return: The number of entries that would be contained in a jump table of the
+ given depth.*/
+static int oc_huff_tree_occupancy(oc_huff_node *_binode,int _depth){
+ int depth0;
+ int depth1;
+ int cdepth;
+ if(_binode->nbits==0||_depth<=0)return 1;
+ else{
+ return oc_huff_tree_occupancy(_binode->nodes[0],_depth-1)+
+ oc_huff_tree_occupancy(_binode->nodes[1],_depth-1);
+ }
+}
+
+static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode);
+
/*Fills the given nodes table with all the children in the sub-tree at the
given depth.
The nodes in the sub-tree with a depth less than that stored in the table
@@ -103,14 +147,23 @@
_nodes: The nodes table to fill.
_binode: The root of the sub-tree to fill it with.
_binode->nbits must be 0 or 1.
- _depth: The depth of the nodes to fill the table with.
- 0 indicates the current node should be stored in the table.*/
+ _level: The current level in the table.
+ 0 indicates that the current node should be stored, regardless of
+ whether it is a leaf node or an internal node.
+ _depth: The depth of the nodes to fill the table with, relative to their
+ parent.*/
static void oc_huff_node_fill(oc_huff_node **_nodes,
- oc_huff_node *_binode,int _depth){
- if(_depth--<=0)_nodes[0]=_binode;
+ oc_huff_node *_binode,int _level,int _depth){
+ if(_level<=0||_binode->nbits==0){
+ int i;
+ _binode->depth=_depth-_level;
+ _nodes[0]=oc_huff_tree_collapse(_binode);
+ for(i=1;i<1<<_level;i++)_nodes[i]=_nodes[0];
+ }
else{
- oc_huff_node_fill(_nodes,_binode->nodes[0],_depth);
- oc_huff_node_fill(_nodes+(1<<_depth),_binode->nodes[1],_depth);
+ _level--;
+ oc_huff_node_fill(_nodes,_binode->nodes[0],_level,_depth);
+ oc_huff_node_fill(_nodes+(1<<_level),_binode->nodes[1],_level,_depth);
oc_huff_node_free(_binode);
}
}
@@ -125,15 +178,23 @@
oc_huff_node *root;
int nchildren;
int mindepth;
+ int depth;
+ int loccupancy;
+ int occupancy;
int i;
- mindepth=oc_huff_tree_mindepth(_binode);
- if(mindepth<=1)return _binode;
- root=oc_huff_node_alloc(mindepth);
- oc_huff_node_fill(root->nodes,_binode,mindepth);
- nchildren=1<<mindepth;
- for(i=0;i<nchildren;i++){
- root->nodes[i]=oc_huff_tree_collapse(root->nodes[i]);
+ int inext;
+ depth=mindepth=oc_huff_tree_mindepth(_binode);
+ occupancy=1<<mindepth;
+ do{
+ loccupancy=occupancy;
+ occupancy=oc_huff_tree_occupancy(_binode,++depth);
}
+ while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-OC_HUFF_SLUSH,0));
+ depth--;
+ if(depth<=1)return _binode;
+ root=oc_huff_node_alloc(depth);
+ root->depth=_binode->depth;
+ oc_huff_node_fill(root->nodes,_binode,depth,depth);
return root;
}
@@ -143,17 +204,22 @@
static oc_huff_node *oc_huff_tree_copy(const oc_huff_node *_node){
oc_huff_node *ret;
ret=oc_huff_node_alloc(_node->nbits);
+ ret->depth=_node->depth;
if(_node->nbits){
int nchildren;
int i;
+ int inext;
nchildren=1<<_node->nbits;
- for(i=0;i<nchildren;i++)ret->nodes[i]=oc_huff_tree_copy(_node->nodes[i]);
+ for(i=0;i<nchildren;){
+ ret->nodes[i]=oc_huff_tree_copy(_node->nodes[i]);
+ inext=i+(1<<_node->nbits-ret->nodes[i]->depth);
+ while(++i<inext)ret->nodes[i]=ret->nodes[i-1];
+ }
}
else ret->token=_node->token;
return ret;
}
-
/*Unpacks a set of Huffman trees, and reduces them to a collapsed
representation.
_opb: The buffer to unpack the trees from.
@@ -194,8 +260,9 @@
int oc_huff_token_decode(oggpack_buffer *_opb,const oc_huff_node *_node){
long bits;
while(_node->nbits!=0){
- theora_read(_opb,_node->nbits,&bits);
+ theora_look(_opb,_node->nbits,&bits);
_node=_node->nodes[bits];
+ oggpackB_adv(_opb,_node->depth);
}
return _node->token;
}
Modified: experimental/derf/theora-exp/lib/huffdec.h
===================================================================
--- experimental/derf/theora-exp/lib/huffdec.h 2005-05-30 14:50:57 UTC (rev 9325)
+++ experimental/derf/theora-exp/lib/huffdec.h 2005-05-30 15:32:45 UTC (rev 9326)
@@ -7,11 +7,16 @@
typedef struct oc_huff_node oc_huff_node;
/*A node in the Huffman tree.
- Instead of storing every branching in the tree, sub-trees which are complete
- are collapsed into one node.
+ Instead of storing every branching in the tree, subtrees can be collapsed
+ into one node, with a table of size 1<<nbits pointing directly to its
+ descedents nbits levels down.
This allows more than one bit to be read at a time, and avoids following all
the intermediate branches with next to no increased code complexity once
- the collapsed tree has been built.*/
+ the collapsed tree has been built.
+ We do _not_ require that a subtree be complete to be collapsed, but instead
+ store duplicate pointers in the table, and record the actual depth of the
+ node below its parent.
+ This tells us the number of bits to advance the stream after reaching it.*/
struct oc_huff_node{
/*The number of bits of the code needed to descend through this node.
0 indicates a leaf node.
@@ -21,6 +26,12 @@
/*The value of a token stored in a leaf node.
The value in non-leaf nodes is undefined.*/
unsigned char token;
+ /*The depth of the current node, relative to its parent in the collapsed
+ tree.
+ This can be less than its parent's nbits value, in which case there are
+ 1<<nbits-depth copies of this node in the table, and the bitstream should
+ only be advanced depth bits after reaching this node.*/
+ unsigned char depth;
/*The table of child nodes.
The ACTUAL size of this array is 1<<nbits, despite what the declaration
below claims.
More information about the commits
mailing list