[xiph-commits] r17328 - experimental/derf/theora-ptalarbvorm/lib
gmaxwell at svn.xiph.org
gmaxwell at svn.xiph.org
Thu Jul 8 11:03:11 PDT 2010
Author: gmaxwell
Date: 2010-07-08 11:03:11 -0700 (Thu, 08 Jul 2010)
New Revision: 17328
Modified:
experimental/derf/theora-ptalarbvorm/lib/analyze.c
experimental/derf/theora-ptalarbvorm/lib/encint.h
experimental/derf/theora-ptalarbvorm/lib/rate.c
experimental/derf/theora-ptalarbvorm/lib/tokenize.c
Log:
Rename speed level 2 to 3 and provide a new speed level 2. Provides a new simple and fast tokenizer for these speed levels. The new speed level 2 is intended to provide quality similar to speed level 1 but is currently 33% faster through the use of the new tokenizer, disabling 3qi, and 4mv. Speed level 3 also uses the new tokenizer and is about 33% faster than the prior speel level 2.
Modified: experimental/derf/theora-ptalarbvorm/lib/analyze.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/analyze.c 2010-07-07 22:38:00 UTC (rev 17327)
+++ experimental/derf/theora-ptalarbvorm/lib/analyze.c 2010-07-08 18:03:11 UTC (rev 17328)
@@ -761,9 +761,11 @@
int nmv_offs;
int ac_bits;
int borderi;
+ int nqis;
int qti;
int qii;
int dc;
+ nqis=_enc->state.nqis;
frags=_enc->state.frags;
frag_offs=_enc->state.frag_buf_offs[_fragi];
ystride=_enc->state.ref_ystride[_pli];
@@ -841,8 +843,14 @@
dc=data[0];
/*Tokenize.*/
checkpoint=*_stack;
- ac_bits=oc_enc_tokenize_ac(_enc,_pli,_fragi,data,dequant,dct,nonzero+1,
- _stack,OC_RD_ISCALE(_enc->lambda,_rd_iscale),qti?0:3);
+ if(_enc->sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
+ ac_bits=oc_enc_tokenize_ac(_enc,_pli,_fragi,data,dequant,dct,nonzero+1,
+ _stack,OC_RD_ISCALE(_enc->lambda,_rd_iscale),qti?0:3);
+ }
+ else{
+ ac_bits=oc_enc_tokenize_ac_fast(_enc,_pli,_fragi,data,dequant,dct,nonzero+1,
+ _stack,OC_RD_ISCALE(_enc->lambda,_rd_iscale),qti?0:3);
+ }
/*Reconstruct.
TODO: nonzero may need to be adjusted after tokenization.*/
dequant_dc=dequant[0];
@@ -866,8 +874,10 @@
data[0]=dc*dequant_dc;
oc_idct8x8(&_enc->state,data,nonzero+1);
}
- oc_qii_state_advance(&qs,_pipe->qs+_pli,qii);
- ac_bits+=qs.bits-_pipe->qs[_pli].bits;
+ if(nqis>1){
+ oc_qii_state_advance(&qs,_pipe->qs+_pli,qii);
+ ac_bits+=qs.bits-_pipe->qs[_pli].bits;
+ }
if(!qti)oc_enc_frag_recon_intra(_enc,dst,ystride,data);
else{
oc_enc_frag_recon_inter(_enc,dst,
@@ -923,7 +933,10 @@
_mo->ac_bits+=ac_bits;
oc_fr_code_block(_fr);
}
- *(_pipe->qs+_pli)=*&qs;
+ /*GCC 4.4.4 generates a warning here because it can't tell that
+ the init code in the nqis check above will run anytime this
+ line runs.*/
+ if(nqis>1)*(_pipe->qs+_pli)=*&qs;
frags[_fragi].dc=dc;
frags[_fragi].coded=1;
return 1;
@@ -1267,6 +1280,23 @@
return luma;
}
+static unsigned oc_mb_activity_fast(oc_enc_ctx *_enc,unsigned _mbi,
+ unsigned _activity[4], unsigned _intra_satd[12]){
+ int bi;
+ for(bi=0;bi<4;bi++){
+ unsigned act;
+ act=(11*_intra_satd[bi]>>8)*_intra_satd[bi];
+ if(act<8<<12){
+ /*The region is flat.*/
+ act=OC_MINI(act,5<<12);
+ }
+ _activity[bi]=act;
+ }
+ /*TODO: Once frag_intra_satd returns the signed DC value instead
+ of the absolute value, this should pass it through.*/
+ return 1;
+}
+
/*Compute the masking scales for the blocks in a macro block.
All masking is computed from the luma blocks.
We derive scaling factors for the chroma blocks from these, and use the same
@@ -1374,6 +1404,46 @@
return activity_sum;
}
+static void oc_mb_intra_satd(oc_enc_ctx *_enc,unsigned _mbi,
+ unsigned _frag_satd[12]){
+ const unsigned char *src;
+ const ptrdiff_t *frag_buf_offs;
+ const ptrdiff_t *sb_map;
+ const oc_mb_map_plane *mb_map;
+ const unsigned char *map_idxs;
+ int map_nidxs;
+ int mapii;
+ int mapi;
+ int ystride;
+ int pli;
+ int bi;
+ ptrdiff_t fragi;
+ ptrdiff_t frag_offs;
+ unsigned dc;
+ frag_buf_offs=_enc->state.frag_buf_offs;
+ sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
+ src=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_IO]];
+ ystride=_enc->state.ref_ystride[0];
+ for(bi=0;bi<4;bi++){
+ fragi=sb_map[bi];
+ frag_offs=frag_buf_offs[fragi];
+ _frag_satd[bi]=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
+ }
+ mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
+ map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+ map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+ /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
+ ystride=_enc->state.ref_ystride[1];
+ for(mapii=4;mapii<map_nidxs;mapii++){
+ mapi=map_idxs[mapii];
+ pli=mapi>>2;
+ bi=mapi&3;
+ fragi=mb_map[pli][bi];
+ frag_offs=frag_buf_offs[fragi];
+ _frag_satd[mapii]=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
+ }
+}
+
/*Select luma block-level quantizers for a MB in an INTRA frame.*/
static unsigned oc_analyze_intra_mb_luma(oc_enc_ctx *_enc,
const oc_qii_state *_qs,unsigned _mbi,const unsigned _rd_scale[4]){
@@ -1612,6 +1682,7 @@
oc_sb_flags *sb_flags;
signed char *mb_modes;
const oc_mb_map *mb_maps;
+ const oc_sb_map *sb_maps;
oc_fragment *frags;
unsigned stripe_sby;
unsigned mcu_nvsbs;
@@ -1639,6 +1710,7 @@
sb_flags=_enc->state.sb_flags;
mb_modes=_enc->state.mb_modes;
mb_maps=(const oc_mb_map *)_enc->state.mb_maps;
+ sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
frags=_enc->state.frags;
notstart=0;
notdone=1;
@@ -1665,7 +1737,15 @@
ptrdiff_t fragi;
mbi=sbi<<2|quadi;
/*Activity masking.*/
- luma=oc_mb_activity(_enc,mbi,activity);
+ if(_enc->sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
+ luma=oc_mb_activity(_enc,mbi,activity);
+ }
+ else{
+ unsigned intra_satd[12];
+ oc_mb_intra_satd(_enc,mbi,intra_satd);
+ luma=oc_mb_activity_fast(_enc,mbi,activity,intra_satd);
+ for(bi=0;bi<4;bi++)frags[sb_maps[mbi>>2][mbi&3][bi]].qii=0;
+ }
activity_sum+=oc_mb_masking(rd_scale,rd_iscale,
chroma_rd_scale,activity,activity_avg,luma,luma_avg);
luma_sum+=luma;
@@ -1676,7 +1756,8 @@
_enc->sp_level<OC_SP_LEVEL_NOMC&&_enc->keyframe_frequency_force>1){
oc_mcenc_search(_enc,mbi);
}
- oc_analyze_intra_mb_luma(_enc,pipe.qs+0,mbi,rd_scale);
+ if(_enc->sp_level<OC_SP_LEVEL_FAST_ANALYSIS)
+ oc_analyze_intra_mb_luma(_enc,pipe.qs+0,mbi,rd_scale);
mb_modes[mbi]=OC_MODE_INTRA;
oc_enc_mb_transform_quantize_intra_luma(_enc,&pipe,
mbi,rd_scale,rd_iscale);
@@ -1789,10 +1870,13 @@
satd=_frag_satd[bi];
*(ft+0)=*&fr;
oc_fr_code_block(ft+0);
- oc_qii_state_advance(qt+0,&qs,0);
cur_overhead=ft[0].bits-fr.bits;
best_rate=oc_dct_cost2(_enc,&best_ssd,0,0,_qti,satd)
- +(cur_overhead+qt[0].bits-qs.bits<<OC_BIT_SCALE);
+ +(cur_overhead<<OC_BIT_SCALE);
+ if(nqis>1){
+ oc_qii_state_advance(qt+0,&qs,0);
+ best_rate+=qt[0].bits-qs.bits<<OC_BIT_SCALE;
+ }
best_ssd=OC_RD_SCALE(best_ssd,_rd_scale[bi]);
best_cost=OC_MODE_RD_COST(ssd+best_ssd,rate+best_rate,lambda);
best_fri=0;
@@ -1910,7 +1994,7 @@
_modec->rate=rate;
}
-static void oc_skip_cost(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe,
+static unsigned oc_skip_cost(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe,
unsigned _mbi,const unsigned _rd_scale[4],unsigned _ssd[12]){
const unsigned char *src;
const unsigned char *ref;
@@ -1923,6 +2007,7 @@
oc_mv *mvs;
int map_nidxs;
unsigned uncoded_ssd;
+ unsigned total_ssd;
int mapii;
int mapi;
int pli;
@@ -1937,6 +2022,7 @@
frag_buf_offs=_enc->state.frag_buf_offs;
sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
mvs=_enc->mb_info[_mbi].block_mv;
+ total_ssd=0;
for(bi=0;bi<4;bi++){
fragi=sb_map[bi];
borderi=frags[fragi].borderi;
@@ -1956,6 +2042,7 @@
hard limit.*/
if(mvs[bi][0]!=0||mvs[bi][1]!=0)uncoded_ssd*=2;
_pipe->skip_ssd[0][fragi-_pipe->froffset[0]]=_ssd[bi]=uncoded_ssd;
+ total_ssd+=uncoded_ssd>>4;
}
mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
@@ -1986,50 +2073,13 @@
a hard limit*/
if(mvs[OC_FRAME_PREV][0]!=0||mvs[OC_FRAME_PREV][1]!=0)uncoded_ssd*=2;
_pipe->skip_ssd[pli][fragi-_pipe->froffset[pli]]=_ssd[mapii]=uncoded_ssd;
+ total_ssd+=uncoded_ssd>>4;
}
map_nidxs=(map_nidxs-4<<1)+4;
}
+ return total_ssd;
}
-static void oc_mb_intra_satd(oc_enc_ctx *_enc,unsigned _mbi,
- unsigned _frag_satd[12]){
- const unsigned char *src;
- const ptrdiff_t *frag_buf_offs;
- const ptrdiff_t *sb_map;
- const oc_mb_map_plane *mb_map;
- const unsigned char *map_idxs;
- int map_nidxs;
- int mapii;
- int mapi;
- int ystride;
- int pli;
- int bi;
- ptrdiff_t fragi;
- ptrdiff_t frag_offs;
- unsigned dc;
- frag_buf_offs=_enc->state.frag_buf_offs;
- sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
- src=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_IO]];
- ystride=_enc->state.ref_ystride[0];
- for(bi=0;bi<4;bi++){
- fragi=sb_map[bi];
- frag_offs=frag_buf_offs[fragi];
- _frag_satd[bi]=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
- }
- mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
- map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
- map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
- /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
- ystride=_enc->state.ref_ystride[1];
- for(mapii=4;mapii<map_nidxs;mapii++){
- mapi=map_idxs[mapii];
- pli=mapi>>2;
- bi=mapi&3;
- fragi=mb_map[pli][bi];
- frag_offs=frag_buf_offs[fragi];
- _frag_satd[mapii]=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
- }
-}
static void oc_cost_intra(oc_enc_ctx *_enc,oc_mode_choice *_modec,
unsigned _mbi,const oc_fr_state *_fr,const oc_qii_state *_qs,
@@ -2297,6 +2347,8 @@
unsigned sbi_end;
int refi;
int pli;
+ int sp_level;
+ sp_level=_enc->sp_level;
set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt];
_enc->state.frame_type=OC_INTER_FRAME;
oc_mode_scheme_chooser_reset(&_enc->chooser);
@@ -2350,6 +2402,7 @@
unsigned skip_ssd[12];
unsigned intra_satd[12];
unsigned luma;
+ unsigned uncoded_ssd;
int mb_mv_bits_0;
int mb_gmv_bits_0;
int inter_mv_pref;
@@ -2362,15 +2415,21 @@
int bi;
ptrdiff_t fragi;
mbi=sbi<<2|quadi;
+ oc_mb_intra_satd(_enc,mbi,intra_satd);
/*Activity masking.*/
- luma=oc_mb_activity(_enc,mbi,activity);
+ if(sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
+ luma=oc_mb_activity(_enc,mbi,activity);
+ }
+ else{
+ luma=oc_mb_activity_fast(_enc,mbi,activity,intra_satd);
+ }
luma_sum+=luma;
activity_sum+=oc_mb_masking(rd_scale,rd_iscale,
chroma_rd_scale,activity,activity_avg,luma,luma_avg);
/*Motion estimation:
We always do a basic 1MV search for all macroblocks, coded or not,
keyframe or not.*/
- if(!_recode&&_enc->sp_level<OC_SP_LEVEL_NOMC)oc_mcenc_search(_enc,mbi);
+ if(!_recode&&sp_level<OC_SP_LEVEL_NOMC)oc_mcenc_search(_enc,mbi);
dx=dy=0;
/*Find the block choice with the lowest estimated coding cost.
If a Cb or Cr block is coded but no Y' block from a macro block then
@@ -2384,7 +2443,6 @@
embs[mbi].analysis_mv[0],sizeof(embs[mbi].unref_mv));
embs[mbi].refined=0;
}
- oc_mb_intra_satd(_enc,mbi,intra_satd);
/*Estimate the cost of coding this MB in a keyframe.*/
if(_allow_keyframe){
oc_cost_intra(_enc,modes+OC_MODE_INTRA,mbi,
@@ -2396,10 +2454,10 @@
}
}
/*Estimate the cost in a delta frame for various modes.*/
- oc_skip_cost(_enc,&pipe,mbi,rd_scale,skip_ssd);
- oc_cost_inter_nomv(_enc,modes+OC_MODE_INTER_NOMV,mbi,
- OC_MODE_INTER_NOMV,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
- if(_enc->sp_level<OC_SP_LEVEL_NOMC){
+ uncoded_ssd=oc_skip_cost(_enc,&pipe,mbi,rd_scale,skip_ssd);
+ if(sp_level<OC_SP_LEVEL_NOMC){
+ oc_cost_inter_nomv(_enc,modes+OC_MODE_INTER_NOMV,mbi,
+ OC_MODE_INTER_NOMV,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
oc_cost_intra(_enc,modes+OC_MODE_INTRA,mbi,
pipe.fr+0,pipe.qs+0,intra_satd,skip_ssd,rd_scale);
mb_mv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_INTER_MV,mbi,
@@ -2409,8 +2467,6 @@
OC_MODE_INTER_MV_LAST,last_mv,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
oc_cost_inter(_enc,modes+OC_MODE_INTER_MV_LAST2,mbi,
OC_MODE_INTER_MV_LAST2,prior_mv,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
- oc_cost_inter4mv(_enc,modes+OC_MODE_INTER_MV_FOUR,mbi,
- embs[mbi].block_mv,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
oc_cost_inter_nomv(_enc,modes+OC_MODE_GOLDEN_NOMV,mbi,
OC_MODE_GOLDEN_NOMV,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
mb_gmv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_GOLDEN_MV,mbi,
@@ -2423,8 +2479,15 @@
We have to be careful to remember which ones we've refined so that
we don't refine it again if we re-encode this frame.*/
inter_mv_pref=_enc->lambda*3;
+ if(sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
+ oc_cost_inter4mv(_enc,modes+OC_MODE_INTER_MV_FOUR,mbi,
+ embs[mbi].block_mv,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
+ }
+ else{
+ modes[OC_MODE_INTER_MV_FOUR].cost=UINT_MAX;
+ }
if(modes[OC_MODE_INTER_MV_FOUR].cost<modes[OC_MODE_INTER_MV].cost&&
- modes[OC_MODE_INTER_MV_FOUR].cost<modes[OC_MODE_GOLDEN_MV].cost){
+ modes[OC_MODE_INTER_MV_FOUR].cost<modes[OC_MODE_GOLDEN_MV].cost){
if(!(embs[mbi].refined&0x80)){
oc_mcenc_refine4mv(_enc,mbi);
embs[mbi].refined|=0x80;
@@ -2477,7 +2540,11 @@
mb_mode=OC_MODE_INTER_MV;
}
}
- else{
+ else {
+ oc_cost_inter_nomv(_enc,modes+OC_MODE_INTER_NOMV,mbi,
+ OC_MODE_INTER_NOMV,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
+ oc_cost_intra(_enc,modes+OC_MODE_INTRA,mbi,
+ pipe.fr+0,pipe.qs+0,intra_satd,skip_ssd,rd_scale);
oc_cost_inter_nomv(_enc,modes+OC_MODE_GOLDEN_NOMV,mbi,
OC_MODE_GOLDEN_NOMV,pipe.fr+0,pipe.qs+0,skip_ssd,rd_scale);
mb_mode=OC_MODE_INTER_NOMV;
Modified: experimental/derf/theora-ptalarbvorm/lib/encint.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/encint.h 2010-07-07 22:38:00 UTC (rev 17327)
+++ experimental/derf/theora-ptalarbvorm/lib/encint.h 2010-07-08 18:03:11 UTC (rev 17328)
@@ -49,13 +49,15 @@
#define OC_PACKET_READY (1)
/*All features enabled.*/
-#define OC_SP_LEVEL_SLOW (0)
+#define OC_SP_LEVEL_SLOW (0)
/*Enable early skip.*/
-#define OC_SP_LEVEL_EARLY_SKIP (1)
+#define OC_SP_LEVEL_EARLY_SKIP (1)
+/*Use analysis shortcuts, single quantizer, and faster tokenization.*/
+#define OC_SP_LEVEL_FAST_ANALYSIS (2)
/*Disable motion compensation.*/
-#define OC_SP_LEVEL_NOMC (2)
+#define OC_SP_LEVEL_NOMC (3)
/*Maximum valid speed level.*/
-#define OC_SP_LEVEL_MAX (2)
+#define OC_SP_LEVEL_MAX (3)
/*The number of extra bits of precision at which to store rate metrics.*/
@@ -522,6 +524,9 @@
int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi,
ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
int _zzi,oc_token_checkpoint **_stack,int _lambda,int _acmin);
+int oc_enc_tokenize_ac_fast(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi,
+ ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
+ int _zzi,oc_token_checkpoint **_stack,int _lambda,int _acmin);
void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc,
const oc_token_checkpoint *_stack,int _n);
void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc,
Modified: experimental/derf/theora-ptalarbvorm/lib/rate.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/rate.c 2010-07-07 22:38:00 UTC (rev 17327)
+++ experimental/derf/theora-ptalarbvorm/lib/rate.c 2010-07-08 18:03:11 UTC (rev 17328)
@@ -190,7 +190,8 @@
This may need to be revised if the R-D cost estimation or qii flag
optimization strategies change.*/
nqis=1;
- if(lq<(OC_Q57(56)>>3)&&!_enc->vp3_compatible){
+ if(lq<(OC_Q57(56)>>3)&&!_enc->vp3_compatible&&
+ _enc->sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MAXI(qi-1,0),0,
lq+(OC_Q57(7)+5)/10);
if(qi1!=qi)_enc->state.qis[nqis++]=qi1;
Modified: experimental/derf/theora-ptalarbvorm/lib/tokenize.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/tokenize.c 2010-07-07 22:38:00 UTC (rev 17327)
+++ experimental/derf/theora-ptalarbvorm/lib/tokenize.c 2010-07-08 18:03:11 UTC (rev 17328)
@@ -80,7 +80,7 @@
21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,
21,21,21,21,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,
19,19,19,19,19,19,19,19,18,18,18,18,17,17,16,15,14,13,12,10,
- -1,
+ 7,
9,11,13,14,15,16,17,17,18,18,18,18,19,19,19,19,19,19,19,19,
20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,21,21,21,21,
21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,
@@ -171,7 +171,7 @@
25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
15, 14, 13, 12, 11, 10, 9, 8, 7, 6,
5, 4, 3, 2, 1, 1, 1, 1, 0, 0,
- -1,
+ 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
2, 3, 0, 1, 2, 3, 4, 5, 6, 7,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
@@ -746,6 +746,240 @@
return bits;
}
+/*Simplistic R/D tokenizer.
+ This could be made more accurate by using more sophisticated
+ rate predictions for zeros.
+ It could be made faster by switching from R/D decisions to static
+ lambda-derived rounding biases.*/
+int oc_enc_tokenize_ac_fast(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi,
+ ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
+ int _zzi,oc_token_checkpoint **_stack,int _lambda,int _acmin){
+ OC_ALIGN16(ogg_int16_t coef[64]);
+ const unsigned char *dct_fzig_zag;
+ ogg_uint16_t *eob_run;
+ oc_token_checkpoint *stack;
+ int huffi;
+ int zzi;
+ int zzj;
+ int zzk;
+ int total_bits;
+ int zr[4];
+ stack=*_stack;
+ total_bits=0;
+ /*The apparent bit-cost of coding a zero from observing the trellis
+ quantizer is pre-combined with lambda.
+ Four predictive cases are considered: The last optimized value is zero (+2) or
+ non-zero and the non-optimized value is zero (+1) or non-zero.*/
+ zr[0]=3*_lambda>>1;
+ zr[1]=_lambda;
+ zr[2]=4*_lambda;
+ zr[3]=7*_lambda>>1;
+ eob_run=_enc->eob_run[_pli];
+ dct_fzig_zag=_enc->state.opt_data.dct_fzig_zag;
+ huffi=_enc->huff_idxs[_enc->state.frame_type][1][_pli+1>>1];
+ memcpy(coef,_qdct,_zzi*sizeof(*coef));
+ for(zzj=zzi=1;zzj<_zzi&&!coef[zzj];zzj++);
+ while(zzj<_zzi){
+ int v;
+ int d0;
+ int d1;
+ int sign;
+ int k;
+ int eob;
+ int dq0;
+ int dq1;
+ int dd0;
+ int dd1;
+ int next_zero;
+ int eob_bits;
+ int dct_fzig_zzj;
+ dct_fzig_zzj=dct_fzig_zag[zzj];
+ v=_dct[OC_FZIG_ZAG[zzj]];
+ d0=coef[zzj];
+ eob=eob_run[zzi];
+ for(zzk=zzj+1;zzk<_zzi&&!coef[zzk];zzk++);
+ next_zero=zzk-zzj+62>>6;
+ dq0=d0*_dequant[zzj];
+ dd0=dq0-v;
+ dd0*=dd0;
+ sign=-(d0<0);
+ k=d0+sign^sign;
+ d1=(k-(zzj>_acmin))+sign^sign;
+ dq1=d1*_dequant[zzj];
+ dd1=dq1-v;
+ dd1*=dd1;
+ /*The cost of ending an eob run is included when the alternative is to
+ extend this eob run.
+ A per qi/zzi weight would probably be useful.
+ Including it in the overall tokenization cost was not helpful.
+ The same is true at the far end of the zero run plus token case.*/
+ if(eob>0&&d1==0&&zzk==_zzi){
+ eob_bits=oc_token_bits(_enc,huffi,zzi,OC_DCT_EOB1_TOKEN);
+ }
+ else eob_bits=0;
+ if(zzj==zzi){
+ /*No active zero run.*/
+ int best_token;
+ int best_eb;
+ int token;
+ int best_bits;
+ int bits;
+ int cost;
+ best_token=*(OC_DCT_VALUE_TOKEN_PTR+d0);
+ best_bits=oc_token_bits(_enc,huffi,zzi,best_token);
+ if(d1!=0){
+ token=*(OC_DCT_VALUE_TOKEN_PTR+d1);
+ bits=oc_token_bits(_enc,huffi,zzi,token);
+ cost=dd1+(bits+eob_bits)*_lambda;
+ }
+ else{
+ token=bits=0;
+ cost=dd1+zr[next_zero];
+ }
+ if((dd0+(best_bits+eob_bits)*_lambda)>cost){
+ _qdct[dct_fzig_zzj]=dq1;
+ if(d1==0){
+ zzj=zzk;
+ continue;
+ }
+ best_bits=bits;
+ best_token=token;
+ best_eb=*(OC_DCT_VALUE_EB_PTR+d1);
+ }
+ else{
+ best_eb=*(OC_DCT_VALUE_EB_PTR+d0);
+ _qdct[dct_fzig_zzj]=dq0;
+ }
+ oc_enc_tokenlog_checkpoint(_enc,stack++,_pli,zzi);
+ if(eob>0){
+ oc_enc_eob_log(_enc,_pli,zzi,eob);
+ eob_run[zzi]=0;
+ }
+ oc_enc_token_log(_enc,_pli,zzi,best_token,best_eb);
+ total_bits+=best_bits;
+ }
+ else{
+ int d;
+ int dc_reserve;
+ int best_token;
+ int best_eb;
+ int best_bits;
+ int best_cost;
+ int best_bits1;
+ int best_token1;
+ int best_eb1;
+ int zr_bits;
+ int eob2;
+ int eob_bits2;
+ int bits;
+ int token;
+ int nzeros;
+ nzeros=zzj-zzi;
+ dc_reserve=zzi+62>>6;
+ /*A zero run, followed by the value alone.*/
+ best_token=best_token1=OC_DCT_SHORT_ZRL_TOKEN+(nzeros+55>>6);
+ best_eb=best_eb1=nzeros-1;
+ eob2=eob_run[zzj];
+ if(eob2>0){
+ eob_bits2=oc_token_bits(_enc,huffi,zzj,OC_DCT_EOB1_TOKEN);
+ }
+ else eob_bits2=0;
+ zr_bits=oc_token_bits(_enc,huffi,zzi,best_token)+eob_bits2;
+ best_bits=zr_bits
+ +oc_token_bits(_enc,huffi,zzj,*(OC_DCT_VALUE_TOKEN_PTR+d0));
+ d=d0;
+
+ best_bits1=0;
+ if(d1!=0){
+ best_bits1=zr_bits
+ +oc_token_bits(_enc,huffi,zzj,*(OC_DCT_VALUE_TOKEN_PTR+d1));
+ }
+ if(nzeros<17+dc_reserve){
+ if(k<=2){
+ /*+/- 1 combo token.*/
+ token=OC_DCT_RUN_CAT1_TOKEN[nzeros-1];
+ bits=oc_token_bits(_enc,huffi,zzi,token);
+ if(k==2&&bits<=best_bits1){
+ best_bits1=bits;
+ best_token1=token;
+ best_eb1=OC_DCT_RUN_CAT1_EB[nzeros-1][-sign];
+ }
+ if(k==1&&bits<=best_bits){
+ best_bits=bits;
+ best_token=token;
+ best_eb=OC_DCT_RUN_CAT1_EB[nzeros-1][-sign];
+ }
+ }
+ if(nzeros<3+dc_reserve&&2<=k&&k<=4){
+ /*+/- 2/3 combo token.*/
+ token=OC_DCT_RUN_CAT2A+(nzeros>>1);
+ bits=oc_token_bits(_enc,huffi,zzi,token);
+ if(k==4&&bits<=best_bits1){
+ best_bits1=bits;
+ best_token1=token;
+ best_eb1=OC_DCT_RUN_CAT2_EB[nzeros-1][-sign][1];
+ }
+ if(k!=4&&bits<=best_bits){
+ best_bits=bits;
+ best_token=token;
+ best_eb=OC_DCT_RUN_CAT2_EB[nzeros-1][-sign][k-2];
+ }
+ }
+ }
+ best_cost=dd0+(best_bits+eob_bits)*_lambda;
+ if(d1==0&&(dd1+zr[2+next_zero])<=best_cost){
+ _qdct[dct_fzig_zzj]=0;
+ zzj=zzk;
+ continue;
+ }
+ if(d1!=0&&dd1+(best_bits1+eob_bits)*_lambda<best_cost){
+ best_bits=best_bits1;
+ best_token=best_token1;
+ best_eb=best_eb1;
+ d=d1;
+ _qdct[dct_fzig_zzj]=dq1;
+ }
+ else _qdct[dct_fzig_zzj]=dq0;
+ oc_enc_tokenlog_checkpoint(_enc,stack++,_pli,zzi);
+ if(eob){
+ oc_enc_eob_log(_enc,_pli,zzi,eob);
+ eob_run[zzi]=0;
+ }
+ oc_enc_token_log(_enc,_pli,zzi,best_token,best_eb);
+ /*If a zero run won vs. the combo token we still need to code this value.*/
+ if(best_token<=OC_DCT_ZRL_TOKEN){
+ oc_enc_tokenlog_checkpoint(_enc,stack++,_pli,zzj);
+ if(eob2){
+ oc_enc_eob_log(_enc,_pli,zzj,eob2);
+ /*The cost of any EOB run we disrupted is ignored because doing so
+ improved PSNR/SSIM by a small amount.*/
+ best_bits-=eob_bits2;
+ eob_run[zzj]=0;
+ }
+ oc_enc_token_log(_enc,_pli,zzj,*(OC_DCT_VALUE_TOKEN_PTR+d),*(OC_DCT_VALUE_EB_PTR+d));
+ }
+ total_bits+=best_bits;
+ }
+ zzi=zzj+1;
+ zzj=zzk;
+ }
+ /*Code an EOB run to complete this block.
+ The cost of the EOB run is not included in the total as explained in
+ in a comment in the trellis tokenizer above.*/
+ if(zzi<64){
+ int eob;
+ eob=eob_run[zzi]+1;
+ oc_enc_tokenlog_checkpoint(_enc,stack++,_pli,zzi);
+ if(eob>=4095){
+ oc_enc_token_log(_enc,_pli,zzi,OC_DCT_REPEAT_RUN3_TOKEN,eob);
+ eob=0;
+ }
+ eob_run[zzi]=eob;
+ }
+ *_stack=stack;
+ return total_bits;
+}
+
void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc,
int _pli,int _fragy0,int _frag_yend){
const oc_fragment_plane *fplane;
More information about the commits
mailing list