[xiph-commits] r15732 - branches/theora-thusnelda/lib/dec
tterribe at svn.xiph.org
tterribe at svn.xiph.org
Mon Mar 2 17:23:55 PST 2009
Author: tterribe
Date: 2009-03-02 17:23:55 -0800 (Mon, 02 Mar 2009)
New Revision: 15732
Modified:
branches/theora-thusnelda/lib/dec/state.c
Log:
Port Monty's faster oc_state_get_mv_offsets() back to the decoder, and fix an
erroneous duplicated statement.
Modified: branches/theora-thusnelda/lib/dec/state.c
===================================================================
--- branches/theora-thusnelda/lib/dec/state.c 2009-03-03 01:21:06 UTC (rev 15731)
+++ branches/theora-thusnelda/lib/dec/state.c 2009-03-03 01:23:55 UTC (rev 15732)
@@ -527,7 +527,6 @@
void oc_state_vtable_init_c(oc_theora_state *_state){
- _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
_state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
_state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c;
_state->opt_vtable.state_frag_copy=oc_state_frag_copy_c;
@@ -736,10 +735,6 @@
Return: The number of offsets returned: 1 or 2.*/
int oc_state_get_mv_offsets(oc_theora_state *_state,int _offsets[2],
int _dx,int _dy,int _ystride,int _pli){
- int xprec;
- int yprec;
- int xfrac;
- int yfrac;
/*Here is a brief description of how Theora handles motion vectors:
Motion vector components are specified to half-pixel accuracy in
undecimated directions of each plane, and quarter-pixel accuracy in
@@ -754,36 +749,89 @@
non-zero fractional parts.
The second offset is computed by dividing (not shifting) by the
appropriate amount, always truncating _away_ from zero.*/
+#if 0
+ /*This version of the code doesn't use any tables, but is slower.*/
+ int xprec;
+ int yprec;
+ int xfrac;
+ int yfrac;
+ int offs;
/*These two variables decide whether we are in half- or quarter-pixel
precision in each component.*/
xprec=1+(!(_state->info.pixel_fmt&1)&&_pli);
yprec=1+(!(_state->info.pixel_fmt&2)&&_pli);
- /*These two variables are either 0 if all the fractional bits are 0 or 1 if
- any of them are non-zero.*/
- xfrac=!!(_dx&(1<<xprec)-1);
- yfrac=!!(_dy&(1<<yprec)-1);
- _offsets[0]=(_dx>>xprec)+(_dy>>yprec)*_ystride;
+ /*These two variables are either 0 if all the fractional bits are zero or -1
+ if any of them are non-zero.*/
+ xfrac=OC_SIGNMASK(-(_dx&(xprec|1)));
+ yfrac=OC_SIGNMASK(-(_dy&(yprec|1)));
+ offs=(_dx>>xprec)+(_dy>>yprec)*_ystride;
if(xfrac||yfrac){
- /*This branchless code is equivalent to:
- if(_dx<0)_offests[0]=-(-_dx>>xprec);
- else _offsets[0]=(_dx>>xprec);
- if(_dy<0)_offsets[0]-=(-_dy>>yprec)*_ystride;
- else _offsets[0]+=(_dy>>yprec)*_ystride;
- _offsets[1]=_offsets[0];
- if(xfrac){
- if(_dx<0)_offsets[1]++;
- else _offsets[1]--;
+ int xmask;
+ int ymask;
+ xmask=OC_SIGNMASK(_dx);
+ ymask=OC_SIGNMASK(_dy);
+ yfrac&=_ystride;
+ _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask);
+ _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask);
+ return 2;
+ }
+ else{
+ _offsets[0]=offs;
+ return 1;
+ }
+#else
+ /*Using tables simplifies the code, and there's enough arithmetic to hide the
+ latencies of the memory references.*/
+ static const signed char OC_MVMAP[2][64]={
+ {
+ -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8,
+ -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1, 0,
+ 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
+ 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15
+ },
+ {
+ -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4,
+ -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1, 0, 0, 0,
+ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7
}
- if(yfrac){
- if(_dy<0)_offsets[1]+=_ystride;
- else _offsets[1]-=_ystride;
- }*/
- _offsets[1]=_offsets[0];
- _offsets[_dx>=0]+=xfrac;
- _offsets[_dy>=0]+=_ystride&-yfrac;
+ };
+ static const signed char OC_MVMAP2[2][64]={
+ {
+ -1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1,
+ 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1, 0,-1,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+ },
+ {
+ -1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1,
+ 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1, 0,-1,-1,-1,
+ 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
+ 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1
+ }
+ };
+ int qpx;
+ int qpy;
+ int mx;
+ int my;
+ int mx2;
+ int my2;
+ int offs;
+ qpy=!(_state->info.pixel_format&2)&&_pli;
+ my=OC_MVMAP[qpy][_dy+31];
+ my2=OC_MVMAP2[qpy][_dy+31];
+ qpx=!(_state->info.pixel_format&1)&&_pli;
+ mx=OC_MVMAP[qpx][_dx+31];
+ mx2=OC_MVMAP2[qpx][_dx+31];
+ offs=my*_ystride+mx;
+ if(mx2||my2){
+ _offsets[1]=offs+my2*_ystride+mx2;
+ _offsets[0]=offs;
return 2;
}
- else return 1;
+ _offsets[0]=offs;
+ return 1;
+#endif
}
void oc_state_frag_recon(oc_theora_state *_state,oc_fragment *_frag,
More information about the commits
mailing list