[xiph-commits] r17221 - in branches/theora-gumboot: . lib
gumboot at svn.xiph.org
gumboot at svn.xiph.org
Sun May 16 05:46:19 PDT 2010
Author: gumboot
Date: 2010-05-16 05:46:19 -0700 (Sun, 16 May 2010)
New Revision: 17221
Modified:
branches/theora-gumboot/
branches/theora-gumboot/CHANGES
branches/theora-gumboot/lib/analyze.c
branches/theora-gumboot/lib/bitpack.c
branches/theora-gumboot/lib/bitpack.h
branches/theora-gumboot/lib/decint.h
branches/theora-gumboot/lib/decode.c
branches/theora-gumboot/lib/huffdec.c
branches/theora-gumboot/lib/huffdec.h
Log:
Integrate latest changes from ptalarbvorm, thereby exaggerating the damage I've done while messing around.
Property changes on: branches/theora-gumboot
___________________________________________________________________
Modified: svn:mergeinfo
- /trunk/theora:14138-15320,15741
+ /experimental/derf/theora-ptalarbvorm:17164-17220
/trunk/theora:14138-15320,15741
Modified: branches/theora-gumboot/CHANGES
===================================================================
--- branches/theora-gumboot/CHANGES 2010-05-16 11:13:00 UTC (rev 17220)
+++ branches/theora-gumboot/CHANGES 2010-05-16 12:46:19 UTC (rev 17221)
@@ -136,7 +136,7 @@
- Granulepos scheme modified to match other codecs. This bumps
the bitstream revision to 3.2.1. Bitstreams marked 3.2.0 are
handled correctly by this decoder. Older decoders will show
- a one frame sync error in the less noticable direction.
+ a one frame sync error in the less noticeable direction.
libtheora 1.0alpha8 (2007 September 18)
Modified: branches/theora-gumboot/lib/analyze.c
===================================================================
--- branches/theora-gumboot/lib/analyze.c 2010-05-16 11:13:00 UTC (rev 17220)
+++ branches/theora-gumboot/lib/analyze.c 2010-05-16 12:46:19 UTC (rev 17221)
@@ -758,7 +758,6 @@
int nonzero;
unsigned uncoded_ssd;
unsigned coded_ssd;
- int coded_dc;
oc_token_checkpoint *checkpoint;
oc_fragment *frags;
int mb_mode;
@@ -918,11 +917,10 @@
{
/*In retrospect, should we have skipped this block?*/
oc_enc_frag_sub(_enc,data,src,dst,ystride);
- coded_ssd=coded_dc=0;
+ coded_ssd=0;
if(borderi<0){
for(pi=0;pi<64;pi++){
coded_ssd+=data[pi]*data[pi];
- coded_dc+=data[pi];
}
}
else{
@@ -930,13 +928,10 @@
mask=_enc->state.borders[borderi].mask;
for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){
coded_ssd+=data[pi]*data[pi];
- coded_dc+=data[pi];
}
}
/*Scale to match DCT domain.*/
coded_ssd<<=4;
- /*We actually only want the AC contribution to the SSD.*/
- coded_ssd-=coded_dc*coded_dc>>2;
#if defined(OC_COLLECT_METRICS)
_enc->frag_ssd[_fragi]=coded_ssd;
}
@@ -1924,7 +1919,6 @@
unsigned uncoded_ssd;
int uncoded_dc;
unsigned dc_dequant;
- int dc_flag;
int mapii;
int mapi;
int pli;
@@ -1949,7 +1943,6 @@
if(borderi<0){
for(pi=0;pi<64;pi++){
uncoded_ssd+=buffer[pi]*buffer[pi];
- uncoded_dc+=buffer[pi];
}
}
else{
@@ -1957,18 +1950,16 @@
mask=_enc->state.borders[borderi].mask;
for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){
uncoded_ssd+=buffer[pi]*buffer[pi];
- uncoded_dc+=buffer[pi];
}
}
/*Scale to match DCT domain.*/
uncoded_ssd<<=4;
- /*We actually only want the AC contribution to the SSD.*/
- uncoded_ssd-=uncoded_dc*uncoded_dc>>2;
uncoded_ssd=OC_RD_SCALE(uncoded_ssd,_rd_scale[bi]);
- /*DC is a special case; if there's more than a full-quantizer improvement
- in the effective DC component, always force-code the block.*/
- dc_flag=abs(uncoded_dc)>dc_dequant<<1;
- uncoded_ssd|=-dc_flag;
+ /*Motion is a special case; if there is more than a full-pixel motion
+ against the prior frame, penalize skipping. TODO: The factor of
+ two here is a kludge, but it tested out better than a hard limit*/
+ if(_enc->mb_info[_mbi].block_mv[bi][0]!=0||
+ _enc->mb_info[_mbi].block_mv[bi][1]!=0)uncoded_ssd*=2;
_pipe->skip_ssd[0][fragi-_pipe->froffset[0]]=_ssd[bi]=uncoded_ssd;
}
mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
@@ -1990,25 +1981,22 @@
if(borderi<0){
for(pi=0;pi<64;pi++){
uncoded_ssd+=buffer[pi]*buffer[pi];
- uncoded_dc+=buffer[pi];
}
}
else{
mask=_enc->state.borders[borderi].mask;
for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){
uncoded_ssd+=buffer[pi]*buffer[pi];
- uncoded_dc+=buffer[pi];
}
}
/*Scale to match DCT domain.*/
uncoded_ssd<<=4;
- /*We actually only want the AC contribution to the SSD.*/
- uncoded_ssd-=uncoded_dc*uncoded_dc>>2;
uncoded_ssd=OC_RD_SCALE(uncoded_ssd,_rd_scale[4]);
- /*DC is a special case; if there's more than a full-quantizer improvement
- in the effective DC component, always force-code the block.*/
- dc_flag=abs(uncoded_dc)>dc_dequant<<1;
- uncoded_ssd|=-dc_flag;
+ /*Motion is a special case; if there is more than a full-pixel motion
+ against the prior frame, penalize skipping. TODO: The factor of
+ two here is a kludge, but it tested out better than a hard limit*/
+ if(_enc->mb_info[_mbi].unref_mv[OC_FRAME_PREV][0]!=0||
+ _enc->mb_info[_mbi].unref_mv[OC_FRAME_PREV][1]!=0)uncoded_ssd*=2;
_pipe->skip_ssd[pli][fragi-_pipe->froffset[pli]]=_ssd[mapii]=uncoded_ssd;
}
map_nidxs=(map_nidxs-4<<1)+4;
Modified: branches/theora-gumboot/lib/bitpack.c
===================================================================
--- branches/theora-gumboot/lib/bitpack.c 2010-05-16 11:13:00 UTC (rev 17220)
+++ branches/theora-gumboot/lib/bitpack.c 2010-05-16 12:46:19 UTC (rev 17221)
@@ -32,10 +32,10 @@
const unsigned char *stop;
oc_pb_window window;
int available;
+ stop=_b->stop;
+ ptr=_b->ptr;
window=_b->window;
available=_b->bits;
- ptr=_b->ptr;
- stop=_b->stop;
while(available<=OC_PB_WINDOW_SIZE-8&&ptr<stop){
available+=8;
window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
@@ -82,8 +82,8 @@
available-=_bits;
window<<=1;
window<<=_bits-1;
+ _b->window=window;
_b->bits=available;
- _b->window=window;
return result;
}
@@ -100,8 +100,8 @@
result=window>>OC_PB_WINDOW_SIZE-1;
available--;
window<<=1;
+ _b->window=window;
_b->bits=available;
- _b->window=window;
return result;
}
Modified: branches/theora-gumboot/lib/bitpack.h
===================================================================
--- branches/theora-gumboot/lib/bitpack.h 2010-05-16 11:13:00 UTC (rev 17220)
+++ branches/theora-gumboot/lib/bitpack.h 2010-05-16 12:46:19 UTC (rev 17221)
@@ -35,9 +35,9 @@
struct oc_pack_buf{
+ const unsigned char *stop;
+ const unsigned char *ptr;
oc_pb_window window;
- const unsigned char *ptr;
- const unsigned char *stop;
int bits;
int eof;
};
Modified: branches/theora-gumboot/lib/decint.h
===================================================================
--- branches/theora-gumboot/lib/decint.h 2010-05-16 11:13:00 UTC (rev 17220)
+++ branches/theora-gumboot/lib/decint.h 2010-05-16 12:46:19 UTC (rev 17221)
@@ -37,7 +37,7 @@
struct th_setup_info{
/*The Huffman codes.*/
- oc_huff_node *huff_tables[TH_NHUFFMAN_TABLES];
+ ogg_int16_t *huff_tables[TH_NHUFFMAN_TABLES];
/*The quantization parameters.*/
th_quant_info qinfo;
};
@@ -55,7 +55,7 @@
/*Buffer in which to assemble packets.*/
oc_pack_buf opb;
/*Huffman decode trees.*/
- oc_huff_node *huff_tables[TH_NHUFFMAN_TABLES];
+ ogg_int16_t *huff_tables[TH_NHUFFMAN_TABLES];
/*The index of the first token in each plane for each coefficient.*/
ptrdiff_t ti0[3][64];
/*The number of outstanding EOB runs at the start of each coefficient in each
Modified: branches/theora-gumboot/lib/decode.c
===================================================================
--- branches/theora-gumboot/lib/decode.c 2010-05-16 11:13:00 UTC (rev 17220)
+++ branches/theora-gumboot/lib/decode.c 2010-05-16 12:46:19 UTC (rev 17221)
@@ -371,7 +371,7 @@
ret=oc_state_init(&_dec->state,_info,3);
if(ret<0)return ret;
ret=oc_huff_trees_copy(_dec->huff_tables,
- (const oc_huff_node *const *)_setup->huff_tables);
+ (const ogg_int16_t *const *)_setup->huff_tables);
if(ret<0){
oc_state_clear(&_dec->state);
return ret;
Modified: branches/theora-gumboot/lib/huffdec.c
===================================================================
--- branches/theora-gumboot/lib/huffdec.c 2010-05-16 11:13:00 UTC (rev 17220)
+++ branches/theora-gumboot/lib/huffdec.c 2010-05-16 12:46:19 UTC (rev 17221)
@@ -22,10 +22,60 @@
#include "decint.h"
-/*The ANSI offsetof macro is broken on some platforms (e.g., older DECs).*/
-#define _ogg_offsetof(_type,_field)\
- ((size_t)((char *)&((_type *)0)->_field-(char *)0))
+/*Instead of storing every branching in the tree, subtrees can be collapsed
+ into one node, with a table of size 1<<nbits pointing directly to its
+ descedents nbits levels down.
+ This allows more than one bit to be read at a time, and avoids following all
+ the intermediate branches with next to no increased code complexity once
+ the collapsed tree has been built.
+ We do _not_ require that a subtree be complete to be collapsed, but instead
+ store duplicate pointers in the table, and record the actual depth of the
+ node below its parent.
+ This tells us the number of bits to advance the stream after reaching it.
+
+ This turns out to be equivalent to the method described in \cite{Hash95},
+ without the requirement that codewords be sorted by length.
+ If the codewords were sorted by length (so-called ``canonical-codes''), they
+ could be decoded much faster via either Lindell and Moffat's approach or
+ Hashemian's Condensed Huffman Code approach, the latter of which has an
+ extremely small memory footprint.
+ We can't use Choueka et al.'s finite state machine approach, which is
+ extremely fast, because we can't allow multiple symbols to be output at a
+ time; the codebook can and does change between symbols.
+ It also has very large memory requirements, which impairs cache coherency.
+
+ We store the tree packed in an array of 16-bit integers (words).
+ Each node of the tree consists of a single word, followed consecutiably by
+ zero or more indexes of its children.
+ Let n be the value of this first word.
+
+ If n is negative or zero, then this is a leaf node.
+ The token in the leaf node is (-n&255).
+ The number of bits that should have been read to reach this child, starting
+ from the current node, is (-n>>8).
+
+ If n is positive, then it is the number of bits that need to be read to
+ traverse the node.
+ 1<<n entries follow in the array, each an index to a child node.
+ As an optimization to save space, leaf nodes are stored directly, instead of
+ storing an index, since they only require a single word.
+ If a leaf node would have been before reading n bits, then it is duplicated
+ the necessary number of times in this table.
+
+ @ARTICLE{Hash95,
+ author="Reza Hashemian",
+ title="Memory Efficient and High-Speed Search {Huffman} Coding",
+ journal="{IEEE} Transactions on Communications",
+ volume=43,
+ number=10,
+ pages="2576--2581",
+ month=Oct,
+ year=1995
+ }*/
+
+
+
/*The number of internal tokens associated with each of the spec tokens.*/
static const unsigned char OC_DCT_TOKEN_MAP_ENTRIES[TH_NDCT_TOKENS]={
1,1,1,4,8,1,1,8,1,1,1,1,1,2,2,2,2,4,8,2,2,2,4,2,2,2,2,2,8,2,4,8
@@ -99,61 +149,7 @@
40
};
-/*These three functions are really part of the bitpack.c module, but
- they are only used here.
- Declaring local static versions so they can be inlined saves considerable
- function call overhead.*/
-static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){
- const unsigned char *ptr;
- const unsigned char *stop;
- oc_pb_window window;
- int available;
- window=_b->window;
- available=_b->bits;
- ptr=_b->ptr;
- stop=_b->stop;
- /*This version of _refill() doesn't bother setting eof because we won't
- check for it after we've started decoding DCT tokens.*/
- if(ptr>=stop)available=OC_LOTS_OF_BITS;
- while(available<=OC_PB_WINDOW_SIZE-8){
- available+=8;
- window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
- if(ptr>=stop)available=OC_LOTS_OF_BITS;
- }
- _b->ptr=ptr;
- if(_bits>available)window|=*ptr>>(available&7);
- _b->bits=available;
- return window;
-}
-
-
-/*Read in bits without advancing the bit pointer.
- Here we assume 0<=_bits&&_bits<=32.*/
-static long oc_pack_look(oc_pack_buf *_b,int _bits){
- oc_pb_window window;
- int available;
- long result;
- window=_b->window;
- available=_b->bits;
- if(_bits==0)return 0;
- if(_bits>available)_b->window=window=oc_pack_refill(_b,_bits);
- result=window>>OC_PB_WINDOW_SIZE-_bits;
- return result;
-}
-
-/*Advance the bit pointer.*/
-static void oc_pack_adv(oc_pack_buf *_b,int _bits){
- /*We ignore the special cases for _bits==0 and _bits==32 here, since they are
- never used actually used.
- OC_HUFF_SLUSH (defined below) would have to be at least 27 to actually read
- 32 bits in a single go, and would require a 32 GB lookup table (assuming
- 8 byte pointers, since 4 byte pointers couldn't fit such a table).*/
- _b->window<<=_bits;
- _b->bits-=_bits;
-}
-
-
/*The log_2 of the size of a lookup table is allowed to grow to relative to
the number of unique nodes it contains.
E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is
@@ -169,47 +165,53 @@
10538563 read calls are required when OC_HUFF_SLUSH is 2 (32.7%).
10192578 read calls are required when OC_HUFF_SLUSH is 3 (31.6%).
Since a value of 1 gets us the vast majority of the speed-up with only a
- small amount of wasted memory, this is what we use.*/
+ small amount of wasted memory, this is what we use.
+ This value must be less than 7, or you could create a tree with more than
+ 32767 entries, which would overflow the 16-bit words used to index it.*/
#define OC_HUFF_SLUSH (1)
+/*The root of the tree is the fast path, and a larger value here is more
+ beneficial than elsewhere in the tree.
+ 3 appears to give the best performance, trading off between increased use of
+ the single-read fast path and cache footprint for the tables.
+ using 3 here The VP3 tables are about 2.6x larger using 3 here than using 1.*/
+#define OC_ROOT_HUFF_SLUSH (3)
-/*Determines the size in bytes of a Huffman tree node that represents a
+/*Determines the size in words of a Huffman tree node that represents a
subtree of depth _nbits.
_nbits: The depth of the subtree.
- If this is 0, the node is a leaf node.
+ If this is <=0, the node is a leaf node.
Otherwise 1<<_nbits pointers are allocated for children.
- Return: The number of bytes required to store the node.*/
+ Return: The number of words required to store the node.*/
static size_t oc_huff_node_size(int _nbits){
- size_t size;
- size=_ogg_offsetof(oc_huff_node,nodes);
- if(_nbits>0)size+=sizeof(oc_huff_node *)*(1<<_nbits);
- return size;
+ return _nbits>0?1+(1<<_nbits):1;
}
-static oc_huff_node *oc_huff_node_init(char **_storage,size_t _size,int _nbits){
- oc_huff_node *ret;
- ret=(oc_huff_node *)*_storage;
- ret->nbits=(unsigned char)_nbits;
- (*_storage)+=_size;
- return ret;
-}
-
-/*Determines the size in bytes of a Huffman tree.
- _nbits: The depth of the subtree.
- If this is 0, the node is a leaf node.
- Otherwise storage for 1<<_nbits pointers are added for children.
- Return: The number of bytes required to store the tree.*/
-static size_t oc_huff_tree_size(const oc_huff_node *_node){
+/*Determines the size in words of a Huffman subtree.
+ _tree: The complete Huffman tree.
+ _node: The index of the root of the desired subtree.
+ Return: The number of words required to store the tree.*/
+static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){
size_t size;
- size=oc_huff_node_size(_node->nbits);
- if(_node->nbits){
+ int n;
+ n=_tree[_node];
+ size=oc_huff_node_size(n);
+ if(n>0){
int nchildren;
int i;
- nchildren=1<<_node->nbits;
- for(i=0;i<nchildren;i+=1<<_node->nbits-_node->nodes[i]->depth){
- size+=oc_huff_tree_size(_node->nodes[i]);
+ nchildren=1<<n;
+ i=0;
+ do{
+ int child;
+ child=_tree[_node+i+1];
+ if(child<=0)i+=1<<n-(-child>>8);
+ else{
+ size+=oc_huff_tree_size(_tree,child);
+ i++;
+ }
}
+ while(i<nchildren);
}
return size;
}
@@ -217,208 +219,283 @@
/*Unpacks a sub-tree from the given buffer.
_opb: The buffer to unpack from.
- _binodes: The nodes to store the sub-tree in.
- _nbinodes: The number of nodes available for the sub-tree.
- Return: 0 on success, or a negative value on error.*/
+ _bitree: The tree to store the sub-tree in.
+ _nbinodes: The number of nodes available for the tree.
+ _binode: The index of the tree to store it in.
+ This is updated to point to the first available slot past the
+ sub-tree on return.
+ If an error occurs, this is set larger than _nbinodes.
+ Return: The node index of the sub-tree, or leaf node value if this was a leaf
+ node and this wasn't the root of the tree.*/
static int oc_huff_tree_unpack(oc_pack_buf *_opb,
- oc_huff_node *_binodes,int _nbinodes){
- oc_huff_node *binode;
- long bits;
- int nused;
- if(_nbinodes<1)return TH_EBADHEADER;
- binode=_binodes;
- nused=0;
+ ogg_int16_t *_bitree,int _nbinodes,int *_binode){
+ long bits;
+ int binode;
+ binode=*_binode;
+ if(binode>_nbinodes)return 0;
bits=oc_pack_read1(_opb);
- if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
+ if(oc_pack_bytes_left(_opb)<0){
+ *_binode=_nbinodes+1;
+ return 0;
+ }
/*Read an internal node:*/
if(!bits){
- int ret;
- nused++;
- binode->nbits=1;
- binode->depth=1;
- binode->nodes[0]=_binodes+nused;
- ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused);
- if(ret>=0){
- nused+=ret;
- binode->nodes[1]=_binodes+nused;
- ret=oc_huff_tree_unpack(_opb,_binodes+nused,_nbinodes-nused);
- }
- if(ret<0)return ret;
- nused+=ret;
+ *_binode=binode+3;
+ if(binode+3>_nbinodes)return 0;
+ _bitree[binode+0]=1;
+ _bitree[binode+1]=
+ (ogg_int16_t)oc_huff_tree_unpack(_opb,_bitree,_nbinodes,_binode);
+ _bitree[binode+2]=
+ (ogg_int16_t)oc_huff_tree_unpack(_opb,_bitree,_nbinodes,_binode);
}
/*Read a leaf node:*/
else{
int ntokens;
int token;
- int i;
bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS);
- if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
- /*Find out how many internal tokens we translate this external token into.*/
+ if(oc_pack_bytes_left(_opb)<0){
+ *_binode=_nbinodes+1;
+ return 0;
+ }
+ /*Find out how many internal tokens we translate this external token into.
+ This must be a power of two.*/
ntokens=OC_DCT_TOKEN_MAP_ENTRIES[bits];
- if(_nbinodes<2*ntokens-1)return TH_EBADHEADER;
- /*Fill in a complete binary tree pointing to the internal tokens.*/
- for(i=1;i<ntokens;i<<=1){
+ token=OC_DCT_TOKEN_MAP[bits];
+ if(ntokens<=1){
+ /*If we only have the one leaf, return it directly.*/
+ if(binode>0)return -(1<<8|token);
+ /*Unless we are the root of the tree; then we have to store it as a
+ node explicitly.*/
+ *_binode=binode+1;
+ if(binode+1>_nbinodes)return 0;
+ _bitree[binode]=-token;
+ }
+ else{
+ int i;
int j;
- binode=_binodes+nused;
- nused+=i;
- for(j=0;j<i;j++){
- binode[j].nbits=1;
- binode[j].depth=1;
- binode[j].nodes[0]=_binodes+nused+2*j;
- binode[j].nodes[1]=_binodes+nused+2*j+1;
+ /*Fill in a complete binary tree pointing to the internal nodes.*/
+ *_binode=binode+3*(ntokens-1);
+ if(binode+3*(ntokens-1)>_nbinodes)return 0;
+ for(i=1;i<ntokens>>1;i<<=1){
+ int child;
+ child=binode+3*(2*i-1);
+ for(j=0;j<i;j++){
+ _bitree[binode+3*(i-1+j)+0]=1;
+ _bitree[binode+3*(i-1+j)+1]=(ogg_int16_t)child;
+ child+=3;
+ _bitree[binode+3*(i-1+j)+2]=(ogg_int16_t)child;
+ child+=3;
+ }
}
+ /*And now the last row pointing to the leaf nodes.*/
+ for(i=j=0;j<ntokens>>1;j++){
+ _bitree[binode+3*((ntokens>>1)-1+j)+0]=1;
+ _bitree[binode+3*((ntokens>>1)-1+j)+1]=(ogg_int16_t)-(1<<8|token+i++);
+ _bitree[binode+3*((ntokens>>1)-1+j)+2]=(ogg_int16_t)-(1<<8|token+i++);
+ }
}
- /*And now the leaf nodes with those tokens.*/
- token=OC_DCT_TOKEN_MAP[bits];
- for(i=0;i<ntokens;i++){
- binode=_binodes+nused++;
- binode->nbits=0;
- binode->depth=1;
- binode->token=token+i;
- }
}
- return nused;
+ return binode;
}
/*Finds the depth of shortest branch of the given sub-tree.
The tree must be binary.
+ _bitree: A packed binary Huffman tree.
_binode: The root of the given sub-tree.
- _binode->nbits must be 0 or 1.
+ _bitree[_binode] must be <=1.
Return: The smallest depth of a leaf node in this sub-tree.
0 indicates this sub-tree is a leaf node.*/
-static int oc_huff_tree_mindepth(oc_huff_node *_binode){
+static int oc_huff_tree_mindepth(const ogg_int16_t *_bitree,int _binode){
+ int child;
int depth0;
int depth1;
- if(_binode->nbits==0)return 0;
- depth0=oc_huff_tree_mindepth(_binode->nodes[0]);
- depth1=oc_huff_tree_mindepth(_binode->nodes[1]);
+ if(_bitree[_binode]<=0)return 0;
+ child=_bitree[_binode+1];
+ depth0=child<=0?0:oc_huff_tree_mindepth(_bitree,child);
+ child=_bitree[_binode+2];
+ depth1=child<=0?0:oc_huff_tree_mindepth(_bitree,child);
return OC_MINI(depth0,depth1)+1;
}
/*Finds the number of internal nodes at a given depth, plus the number of
leaves at that depth or shallower.
The tree must be binary.
+ _bitree: A packed binary Huffman tree.
_binode: The root of the given sub-tree.
- _binode->nbits must be 0 or 1.
- Return: The number of entries that would be contained in a jump table of the
- given depth.*/
-static int oc_huff_tree_occupancy(oc_huff_node *_binode,int _depth){
- if(_binode->nbits==0||_depth<=0)return 1;
+ _bitree[_binode] must be <=1.
+ Return: The number of unique entries that would be contained in a jump table
+ of the given depth.*/
+static int oc_huff_tree_occupancy(const ogg_int16_t *_bitree,int _binode,
+ int _depth){
+ if(_depth<=0||_bitree[_binode]<=0)return 1;
else{
- return oc_huff_tree_occupancy(_binode->nodes[0],_depth-1)+
- oc_huff_tree_occupancy(_binode->nodes[1],_depth-1);
+ int child1;
+ int child2;
+ child1=_bitree[_binode+1];
+ child2=_bitree[_binode+2];
+ return (child1<=0?1:oc_huff_tree_occupancy(_bitree,child1,_depth-1))+
+ (child2<=0?1:oc_huff_tree_occupancy(_bitree,child2,_depth-1));
}
}
-/*Makes a copy of the given Huffman tree.
- _node: The Huffman tree to copy.
- Return: The copy of the Huffman tree.*/
-static oc_huff_node *oc_huff_tree_copy(const oc_huff_node *_node,
- char **_storage){
- oc_huff_node *ret;
- ret=oc_huff_node_init(_storage,oc_huff_node_size(_node->nbits),_node->nbits);
- ret->depth=_node->depth;
- if(_node->nbits){
+/*Makes a copy of the given Huffman sub-tree.
+ _dst: The destination tree.
+ _src: The destination tree.
+ _node: The index Huffman sub-tree to copy.
+ Return: The index of the first available slot after the sub-tree copy.*/
+static int oc_huff_tree_copy(ogg_int16_t *_dst,
+ const ogg_int16_t *_src,int _node){
+ int n;
+ n=_src[_node];
+ _dst[_node++]=(ogg_int16_t)n;
+ if(n<=0)return _node;
+ else{
int nchildren;
+ int ret;
int i;
- int inext;
- nchildren=1<<_node->nbits;
- for(i=0;i<nchildren;){
- ret->nodes[i]=oc_huff_tree_copy(_node->nodes[i],_storage);
- inext=i+(1<<_node->nbits-ret->nodes[i]->depth);
- while(++i<inext)ret->nodes[i]=ret->nodes[i-1];
+ nchildren=1<<n;
+ ret=_node+nchildren;
+ for(i=0;i<nchildren;i++){
+ int child;
+ child=_src[_node+i];
+ _dst[_node+i]=(ogg_int16_t)child;
+ if(child>0){
+ /*Note: This assumes that storage for children is in order, and
+ consecutive.*/
+ ret=oc_huff_tree_copy(_dst,_src,child);
+ }
}
+ return ret;
}
- else ret->token=_node->token;
- return ret;
}
-static size_t oc_huff_tree_collapse_size(oc_huff_node *_binode,int _depth){
+static size_t oc_huff_tree_collapse_size(ogg_int16_t *_bitree,int _binode,
+ int _depth){
size_t size;
int mindepth;
int depth;
+ int threshold;
int loccupancy;
int occupancy;
- if(_binode->nbits!=0&&_depth>0){
- return oc_huff_tree_collapse_size(_binode->nodes[0],_depth-1)+
- oc_huff_tree_collapse_size(_binode->nodes[1],_depth-1);
+ int child1;
+ int child2;
+ int n;
+ n=_bitree[_binode];
+ if(n>0&&_depth>0){
+ child1=_bitree[_binode+1];
+ child2=_bitree[_binode+2];
+ return (child1<=0?0:oc_huff_tree_collapse_size(_bitree,child1,_depth-1))+
+ (child2<=0?0:oc_huff_tree_collapse_size(_bitree,child2,_depth-1));
}
- depth=mindepth=oc_huff_tree_mindepth(_binode);
+ depth=mindepth=oc_huff_tree_mindepth(_bitree,_binode);
occupancy=1<<mindepth;
do{
loccupancy=occupancy;
- occupancy=oc_huff_tree_occupancy(_binode,++depth);
+ occupancy=oc_huff_tree_occupancy(_bitree,_binode,++depth);
+ threshold=1<<OC_MAXI(depth-(_binode>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH),0);
}
- while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-OC_HUFF_SLUSH,0));
+ while(occupancy>loccupancy&&occupancy>=threshold);
depth--;
size=oc_huff_node_size(depth);
if(depth>0){
- size+=oc_huff_tree_collapse_size(_binode->nodes[0],depth-1);
- size+=oc_huff_tree_collapse_size(_binode->nodes[1],depth-1);
+ child1=_bitree[_binode+1];
+ child2=_bitree[_binode+2];
+ size+=child1<=0?0:oc_huff_tree_collapse_size(_bitree,child1,depth-1);
+ size+=child2<=0?0:oc_huff_tree_collapse_size(_bitree,child2,depth-1);
}
return size;
}
-static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode,
- char **_storage);
+static int oc_huff_tree_collapse(ogg_int16_t *_tree,int *_node,
+ const ogg_int16_t *_bitree,int _binode);
/*Fills the given nodes table with all the children in the sub-tree at the
given depth.
The nodes in the sub-tree with a depth less than that stored in the table
are freed.
The sub-tree must be binary and complete up until the given depth.
- _nodes: The nodes table to fill.
- _binode: The root of the sub-tree to fill it with.
- _binode->nbits must be 0 or 1.
+ _tree: The storage for the collapsed Huffman tree.
+ _node: The index of the next available storage in the tree.
+ _offs: The index of the nodes to fill.
+ _bitree: The binary tree to collapse.
+ _binode: The index of the root of the sub-tree to fill it with.
+ _bitree[_binode] must be <=1 .
_level: The current level in the table.
0 indicates that the current node should be stored, regardless of
whether it is a leaf node or an internal node.
_depth: The depth of the nodes to fill the table with, relative to their
parent.*/
-static void oc_huff_node_fill(oc_huff_node **_nodes,
- oc_huff_node *_binode,int _level,int _depth,char **_storage){
- if(_level<=0||_binode->nbits==0){
- int i;
- _binode->depth=(unsigned char)(_depth-_level);
- _nodes[0]=oc_huff_tree_collapse(_binode,_storage);
- for(i=1;i<1<<_level;i++)_nodes[i]=_nodes[0];
+static void oc_huff_node_fill(ogg_int16_t *_tree,int *_node,int _offs,
+ const ogg_int16_t *_bitree,int _binode,int _level,int _depth){
+ int leaf;
+ int n;
+ int i;
+ n=_bitree[_binode];
+ if(n<=0){
+ leaf=-((_depth-_level<<8)|(-n&255));
+ for(i=0;i<1<<_level;i++)_tree[_offs+i]=(ogg_int16_t)leaf;
}
+ else if(_level<=0){
+ _tree[_offs]=oc_huff_tree_collapse(_tree,_node,_bitree,_binode);
+ }
else{
+ int child1;
+ int child2;
_level--;
- oc_huff_node_fill(_nodes,_binode->nodes[0],_level,_depth,_storage);
- _nodes+=1<<_level;
- oc_huff_node_fill(_nodes,_binode->nodes[1],_level,_depth,_storage);
+ child1=_bitree[_binode+1];
+ if(child1<=0){
+ leaf=-((_depth-_level<<8)|(-child1&255));
+ for(i=0;i<1<<_level;i++)_tree[_offs+i]=(ogg_int16_t)leaf;
+ }
+ else oc_huff_node_fill(_tree,_node,_offs,_bitree,child1,_level,_depth);
+ _offs+=1<<_level;
+ child2=_bitree[_binode+2];
+ if(child2<=0){
+ leaf=-((_depth-_level<<8)|(-child2&255));
+ for(i=0;i<1<<_level;i++)_tree[_offs+i]=(ogg_int16_t)leaf;
+ }
+ else oc_huff_node_fill(_tree,_node,_offs,_bitree,child2,_level,_depth);
}
}
/*Finds the largest complete sub-tree rooted at the current node and collapses
it into a single node.
This procedure is then applied recursively to all the children of that node.
- _binode: The root of the sub-tree to collapse.
- _binode->nbits must be 0 or 1.
+ _tree: The storage for the collapsed Huffman tree.
+ _node: The index of the next available storage in the tree.
+ _offs: The index of the nodes to fill.
+ _bitree: The binary tree to collapse.
+ _binode: The index of the root of the sub-tree to fill it with.
+ _bitree[_binode] must be <=1 .
Return: The new root of the collapsed sub-tree.*/
-static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode,
- char **_storage){
- oc_huff_node *root;
- size_t size;
- int mindepth;
- int depth;
- int loccupancy;
- int occupancy;
- depth=mindepth=oc_huff_tree_mindepth(_binode);
+static int oc_huff_tree_collapse(ogg_int16_t *_tree,int *_node,
+ const ogg_int16_t *_bitree,int _binode){
+ int mindepth;
+ int depth;
+ int loccupancy;
+ int occupancy;
+ int threshold;
+ int node;
+ depth=mindepth=oc_huff_tree_mindepth(_bitree,_binode);
occupancy=1<<mindepth;
do{
loccupancy=occupancy;
- occupancy=oc_huff_tree_occupancy(_binode,++depth);
+ occupancy=oc_huff_tree_occupancy(_bitree,_binode,++depth);
+ threshold=1<<OC_MAXI(depth-(_binode>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH),0);
}
- while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-OC_HUFF_SLUSH,0));
+ while(occupancy>loccupancy&&occupancy>=threshold);
depth--;
- if(depth<=0)return oc_huff_tree_copy(_binode,_storage);
- size=oc_huff_node_size(depth);
- root=oc_huff_node_init(_storage,size,depth);
- root->depth=_binode->depth;
- oc_huff_node_fill(root->nodes,_binode,depth,depth,_storage);
- return root;
+ node=*_node;
+ if(depth<=0){
+ _tree[node]=_bitree[_binode];
+ *_node=node+1;
+ }
+ else{
+ _tree[node]=depth;
+ *_node=node+1+(1<<depth);
+ oc_huff_node_fill(_tree,_node,node+1,_bitree,_binode,depth,depth);
+ }
+ return node;
}
/*Unpacks a set of Huffman trees, and reduces them to a collapsed
@@ -427,22 +504,25 @@
_nodes: The table to fill with the Huffman trees.
Return: 0 on success, or a negative value on error.*/
int oc_huff_trees_unpack(oc_pack_buf *_opb,
- oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
+ ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
int i;
for(i=0;i<TH_NHUFFMAN_TABLES;i++){
- oc_huff_node nodes[511];
- char *storage;
+ ogg_int16_t nodes[255*3];
+ ogg_int16_t *tree;
size_t size;
- int ret;
+ int node;
/*Unpack the full tree into a temporary buffer.*/
- ret=oc_huff_tree_unpack(_opb,nodes,sizeof(nodes)/sizeof(*nodes));
- if(ret<0)return ret;
+ node=0;
+ oc_huff_tree_unpack(_opb,nodes,sizeof(nodes)/sizeof(*nodes),&node);
+ if(node>sizeof(nodes)/sizeof(*nodes))return TH_EBADHEADER;
/*Figure out how big the collapsed tree will be.*/
- size=oc_huff_tree_collapse_size(nodes,0);
- storage=(char *)_ogg_calloc(1,size);
- if(storage==NULL)return TH_EFAULT;
+ size=oc_huff_tree_collapse_size(nodes,0,0);
+ tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree));
+ if(tree==NULL)return TH_EFAULT;
/*And collapse it.*/
- _nodes[i]=oc_huff_tree_collapse(nodes,&storage);
+ node=0;
+ oc_huff_tree_collapse(tree,&node,nodes,0);
+ _nodes[i]=tree;
}
return 0;
}
@@ -450,40 +530,100 @@
/*Makes a copy of the given set of Huffman trees.
_dst: The array to store the copy in.
_src: The array of trees to copy.*/
-int oc_huff_trees_copy(oc_huff_node *_dst[TH_NHUFFMAN_TABLES],
- const oc_huff_node *const _src[TH_NHUFFMAN_TABLES]){
+int oc_huff_trees_copy(ogg_int16_t *_dst[TH_NHUFFMAN_TABLES],
+ const ogg_int16_t *const _src[TH_NHUFFMAN_TABLES]){
+ int total;
int i;
+ total=0;
for(i=0;i<TH_NHUFFMAN_TABLES;i++){
- size_t size;
- char *storage;
- size=oc_huff_tree_size(_src[i]);
- storage=(char *)_ogg_calloc(1,size);
- if(storage==NULL){
+ size_t size;
+ ogg_int16_t *tree;
+ size=oc_huff_tree_size(_src[i],0);
+ total+=size;
+ tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree));
+ if(tree==NULL){
while(i-->0)_ogg_free(_dst[i]);
return TH_EFAULT;
}
- _dst[i]=oc_huff_tree_copy(_src[i],&storage);
+ oc_huff_tree_copy(tree,_src[i],0);
+ _dst[i]=tree;
}
return 0;
}
/*Frees the memory used by a set of Huffman trees.
_nodes: The array of trees to free.*/
-void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
+void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
int i;
for(i=0;i<TH_NHUFFMAN_TABLES;i++)_ogg_free(_nodes[i]);
}
+
/*Unpacks a single token using the given Huffman tree.
_opb: The buffer to unpack the token from.
_node: The tree to unpack the token with.
Return: The token value.*/
-int oc_huff_token_decode(oc_pack_buf *_opb,const oc_huff_node *_node){
- long bits;
- while(_node->nbits!=0){
- bits=oc_pack_look(_opb,_node->nbits);
- _node=_node->nodes[bits];
- oc_pack_adv(_opb,_node->depth);
+int oc_huff_token_decode(oc_pack_buf *_opb,const ogg_int16_t *_tree){
+ const unsigned char *ptr;
+ const unsigned char *stop;
+ oc_pb_window window;
+ int available;
+ long bits;
+ int node;
+ int n;
+ n=_tree[0];
+ ptr=_opb->ptr;
+ window=_opb->window;
+ stop=_opb->stop;
+ available=_opb->bits;
+ /*Handle the case of a codebook with a single value.
+ Branch prediction should make this esoteric case free.*/
+ if(n<=0)return -n&255;
+ if(n>available){
+ for(;;){
+ /*We don't bother setting eof because we won't check for it after we've
+ started decoding DCT tokens.*/
+ if(ptr>=stop){
+ available=OC_LOTS_OF_BITS;
+ break;
+ }
+ if(available>OC_PB_WINDOW_SIZE-8)break;
+ available+=8;
+ window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
+ }
+ /*Note: We never request more than 24 bits, so there's no need to fill in
+ the last partial byte here.*/
}
- return _node->token;
+ bits=window>>OC_PB_WINDOW_SIZE-n;
+ node=_tree[1+bits];
+ while(node>0){
+ window<<=n;
+ available-=n;
+ n=_tree[node];
+ if(n>available){
+ for(;;){
+ /*We don't bother setting eof because we won't check for it after we've
+ started decoding DCT tokens.*/
+ if(ptr>=stop){
+ available=OC_LOTS_OF_BITS;
+ break;
+ }
+ if(available>OC_PB_WINDOW_SIZE-8)break;
+ available+=8;
+ window|=(oc_pb_window)*ptr++<<OC_PB_WINDOW_SIZE-available;
+ }
+ /*Note: We never request more than 24 bits, so there's no need to fill in
+ the last partial byte here.*/
+ }
+ bits=window>>OC_PB_WINDOW_SIZE-n;
+ node=_tree[node+1+bits];
+ }
+ node=-node;
+ n=node>>8;
+ window<<=n;
+ available-=n;
+ _opb->ptr=ptr;
+ _opb->window=window;
+ _opb->bits=available;
+ return node&255;
}
Modified: branches/theora-gumboot/lib/huffdec.h
===================================================================
--- branches/theora-gumboot/lib/huffdec.h 2010-05-16 11:13:00 UTC (rev 17220)
+++ branches/theora-gumboot/lib/huffdec.h 2010-05-16 12:46:19 UTC (rev 17221)
@@ -22,71 +22,11 @@
-typedef struct oc_huff_node oc_huff_node;
-
-/*A node in the Huffman tree.
- Instead of storing every branching in the tree, subtrees can be collapsed
- into one node, with a table of size 1<<nbits pointing directly to its
- descedents nbits levels down.
- This allows more than one bit to be read at a time, and avoids following all
- the intermediate branches with next to no increased code complexity once
- the collapsed tree has been built.
- We do _not_ require that a subtree be complete to be collapsed, but instead
- store duplicate pointers in the table, and record the actual depth of the
- node below its parent.
- This tells us the number of bits to advance the stream after reaching it.
-
- This turns out to be equivalent to the method described in \cite{Hash95},
- without the requirement that codewords be sorted by length.
- If the codewords were sorted by length (so-called ``canonical-codes''), they
- could be decoded much faster via either Lindell and Moffat's approach or
- Hashemian's Condensed Huffman Code approach, the latter of which has an
- extremely small memory footprint.
- We can't use Choueka et al.'s finite state machine approach, which is
- extremely fast, because we can't allow multiple symbols to be output at a
- time; the codebook can and does change between symbols.
- It also has very large memory requirements, which impairs cache coherency.
-
- @ARTICLE{Hash95,
- author="Reza Hashemian",
- title="Memory Efficient and High-Speed Search {Huffman} Coding",
- journal="{IEEE} Transactions on Communications",
- volume=43,
- number=10,
- pages="2576--2581",
- month=Oct,
- year=1995
- }*/
-struct oc_huff_node{
- /*The number of bits of the code needed to descend through this node.
- 0 indicates a leaf node.
- Otherwise there are 1<<nbits nodes in the nodes table, which can be
- indexed by reading nbits bits from the stream.*/
- unsigned char nbits;
- /*The value of a token stored in a leaf node.
- The value in non-leaf nodes is undefined.*/
- unsigned char token;
- /*The depth of the current node, relative to its parent in the collapsed
- tree.
- This can be less than its parent's nbits value, in which case there are
- 1<<nbits-depth copies of this node in the table, and the bitstream should
- only be advanced depth bits after reaching this node.*/
- unsigned char depth;
- /*The table of child nodes.
- The ACTUAL size of this array is 1<<nbits, despite what the declaration
- below claims.
- The exception is that for leaf nodes the size is 0.*/
- oc_huff_node *nodes[2];
-};
-
-
-
int oc_huff_trees_unpack(oc_pack_buf *_opb,
- oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]);
-int oc_huff_trees_copy(oc_huff_node *_dst[TH_NHUFFMAN_TABLES],
- const oc_huff_node *const _src[TH_NHUFFMAN_TABLES]);
-void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]);
-int oc_huff_token_decode(oc_pack_buf *_opb,const oc_huff_node *_node);
+ ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]);
+int oc_huff_trees_copy(ogg_int16_t *_dst[TH_NHUFFMAN_TABLES],
+ const ogg_int16_t *const _src[TH_NHUFFMAN_TABLES]);
+void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]);
+int oc_huff_token_decode(oc_pack_buf *_opb,const ogg_int16_t *_node);
-
#endif
More information about the commits
mailing list