[xiph-commits] r17237 - experimental/derf/theora-ptalarbvorm/lib

tterribe at svn.xiph.org tterribe at svn.xiph.org
Fri May 21 20:20:49 PDT 2010


Author: tterribe
Date: 2010-05-21 20:20:49 -0700 (Fri, 21 May 2010)
New Revision: 17237

Modified:
   experimental/derf/theora-ptalarbvorm/lib/decode.c
Log:
Replace most of the bit parsing for run lengths, MB modes, and MVs with calls
 to oc_huff_token_decode() using statically constructed tables.
This means there's just a single function to optimize that covers most of the
 bit reading, and eliminates the virtual function calls used for MB modes and
 MVs.
This gives a small (<1%) speed-up on x86-32, mostly because the decoder spent
 a fairly small amount of time in these functions to begin with.


Modified: experimental/derf/theora-ptalarbvorm/lib/decode.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/decode.c	2010-05-22 02:52:06 UTC (rev 17236)
+++ experimental/derf/theora-ptalarbvorm/lib/decode.c	2010-05-22 03:20:49 UTC (rev 17237)
@@ -129,7 +129,7 @@
    is not yet available everywhere; this should be equivalent.*/
 #define OC_DCT_EOB_FINISH (~(size_t)0>>1)
 
-/*The location of the (6) run legth bits in the code word.
+/*The location of the (6) run length bits in the code word.
   These are placed at index 0 and given 8 bits (even though 6 would suffice)
    because it may be faster to extract the lower byte on some platforms.*/
 #define OC_DCT_CW_RLEN_SHIFT (0)
@@ -297,8 +297,6 @@
 
 
 static int oc_sb_run_unpack(oc_pack_buf *_opb){
-  long bits;
-  int ret;
   /*Coding scheme:
        Codeword            Run Length
      0                       1
@@ -308,32 +306,26 @@
      11110xxx                10-17
      111110xxxx              18-33
      111111xxxxxxxxxxxx      34-4129*/
-  bits=oc_pack_read1(_opb);
-  if(bits==0)return 1;
-  bits=oc_pack_read(_opb,2);
-  if((bits&2)==0)return 2+(int)bits;
-  else if((bits&1)==0){
-    bits=oc_pack_read1(_opb);
-    return 4+(int)bits;
+  static const ogg_int16_t OC_SB_RUN_TREE[22]={
+    4,
+     -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
+     -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
+     -(3<<8|2),-(3<<8|2),-(3<<8|3),-(3<<8|3),
+     -(4<<8|4),-(4<<8|5),-(4<<8|2<<4|6-6),17,
+      2,
+       -(2<<8|2<<4|10-6),-(2<<8|2<<4|14-6),-(2<<8|4<<4|18-6),-(2<<8|12<<4|34-6)
+  };
+  int ret;
+  ret=oc_huff_token_decode(_opb,OC_SB_RUN_TREE);
+  if(ret>=0x10){
+    int offs;
+    offs=ret&0x1F;
+    ret=6+offs+(int)oc_pack_read(_opb,ret-offs>>4);
   }
-  bits=oc_pack_read(_opb,3);
-  if((bits&4)==0)return 6+(int)bits;
-  else if((bits&2)==0){
-    ret=10+((bits&1)<<2);
-    bits=oc_pack_read(_opb,2);
-    return ret+(int)bits;
-  }
-  else if((bits&1)==0){
-    bits=oc_pack_read(_opb,4);
-    return 18+(int)bits;
-  }
-  bits=oc_pack_read(_opb,12);
-  return 34+(int)bits;
+  return ret;
 }
 
 static int oc_block_run_unpack(oc_pack_buf *_opb){
-  long bits;
-  long bits2;
   /*Coding scheme:
      Codeword             Run Length
      0x                      1-2
@@ -342,22 +334,26 @@
      1110xx                  7-10
      11110xx                 11-14
      11111xxxx               15-30*/
-  bits=oc_pack_read(_opb,2);
-  if((bits&2)==0)return 1+(int)bits;
-  else if((bits&1)==0){
-    bits=oc_pack_read1(_opb);
-    return 3+(int)bits;
-  }
-  bits=oc_pack_read(_opb,2);
-  if((bits&2)==0)return 5+(int)bits;
-  else if((bits&1)==0){
-    bits=oc_pack_read(_opb,2);
-    return 7+(int)bits;
-  }
-  bits=oc_pack_read(_opb,3);
-  if((bits&4)==0)return 11+bits;
-  bits2=oc_pack_read(_opb,2);
-  return 15+((bits&3)<<2)+bits2;
+  static const ogg_int16_t OC_BLOCK_RUN_TREE[61]={
+    5,
+     -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
+     -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
+     -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
+     -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
+     -(3<<8|3),-(3<<8|3),-(3<<8|3),-(3<<8|3),
+     -(3<<8|4),-(3<<8|4),-(3<<8|4),-(3<<8|4),
+     -(4<<8|5),-(4<<8|5),-(4<<8|6),-(4<<8|6),
+     33,       36,       39,       44,
+      1,-(1<<8|7),-(1<<8|8),
+      1,-(1<<8|9),-(1<<8|10),
+      2,-(2<<8|11),-(2<<8|12),-(2<<8|13),-(2<<8|14),
+      4,
+       -(4<<8|15),-(4<<8|16),-(4<<8|17),-(4<<8|18),
+       -(4<<8|19),-(4<<8|20),-(4<<8|21),-(4<<8|22),
+       -(4<<8|23),-(4<<8|24),-(4<<8|25),-(4<<8|26),
+       -(4<<8|27),-(4<<8|28),-(4<<8|29),-(4<<8|30)
+  };
+  return oc_huff_token_decode(_opb,OC_BLOCK_RUN_TREE);
 }
 
 
@@ -659,25 +655,33 @@
 }
 
 
+/*Coding scheme:
+   Codeword            Mode Index
+   0                       0
+   10                      1
+   110                     2
+   1110                    3
+   11110                   4
+   111110                  5
+   1111110                 6
+   1111111                 7*/
+static const ogg_int16_t OC_VLC_MODE_TREE[26]={
+  4,
+   -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
+   -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
+   -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
+   -(3<<8|2),-(3<<8|2),-(4<<8|3),17,
+    3,
+     -(1<<8|4),-(1<<8|4),-(1<<8|4),-(1<<8|4),
+     -(2<<8|5),-(2<<8|5),-(3<<8|6),-(3<<8|7)
+};
 
-typedef int (*oc_mode_unpack_func)(oc_pack_buf *_opb);
+static const ogg_int16_t OC_CLC_MODE_TREE[9]={
+  3,
+   -(3<<8|0),-(3<<8|1),-(3<<8|2),-(3<<8|3),
+   -(3<<8|4),-(3<<8|5),-(3<<8|6),-(3<<8|7)
+};
 
-static int oc_vlc_mode_unpack(oc_pack_buf *_opb){
-  long val;
-  int  i;
-  for(i=0;i<7;i++){
-    val=oc_pack_read1(_opb);
-    if(!val)break;
-  }
-  return i;
-}
-
-static int oc_clc_mode_unpack(oc_pack_buf *_opb){
-  long val;
-  val=oc_pack_read(_opb,3);
-  return (int)val;
-}
-
 /*Unpacks the list of macro block modes for INTER frames.*/
 static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
   const oc_mb_map     *mb_maps;
@@ -685,7 +689,7 @@
   const oc_fragment   *frags;
   const unsigned char *alphabet;
   unsigned char        scheme0_alphabet[8];
-  oc_mode_unpack_func  mode_unpack;
+  const ogg_int16_t   *mode_tree;
   size_t               nmbs;
   size_t               mbi;
   long                 val;
@@ -707,8 +711,7 @@
     alphabet=scheme0_alphabet;
   }
   else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
-  if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack;
-  else mode_unpack=oc_vlc_mode_unpack;
+  mode_tree=mode_scheme==7?OC_CLC_MODE_TREE:OC_VLC_MODE_TREE;
   mb_modes=_dec->state.mb_modes;
   mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
   nmbs=_dec->state.nmbs;
@@ -719,7 +722,9 @@
       /*Check for a coded luma block in this macro block.*/
       for(bi=0;bi<4&&!frags[mb_maps[mbi][0][bi]].coded;bi++);
       /*We found one, decode a mode.*/
-      if(bi<4)mb_modes[mbi]=alphabet[(*mode_unpack)(&_dec->opb)];
+      if(bi<4){
+        mb_modes[mbi]=alphabet[oc_huff_token_decode(&_dec->opb,mode_tree)];
+      }
       /*There were none: INTER_NOMV is forced.*/
       else mb_modes[mbi]=OC_MODE_INTER_NOMV;
     }
@@ -728,44 +733,62 @@
 
 
 
-typedef int (*oc_mv_comp_unpack_func)(oc_pack_buf *_opb);
+static const ogg_int16_t OC_VLC_MV_COMP_TREE[101]={
+  5,
+   -(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),
+   -(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),
+   -(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),
+   -(4<<8|32+2),-(4<<8|32+2),-(4<<8|32-2),-(4<<8|32-2),
+   -(4<<8|32+3),-(4<<8|32+3),-(4<<8|32-3),-(4<<8|32-3),
+   33,          36,          39,          42,
+   45,          50,          55,          60,
+   65,          74,          83,          92,
+    1,-(1<<8|32+4),-(1<<8|32-4),
+    1,-(1<<8|32+5),-(1<<8|32-5),
+    1,-(1<<8|32+6),-(1<<8|32-6),
+    1,-(1<<8|32+7),-(1<<8|32-7),
+    2,-(2<<8|32+8),-(2<<8|32-8),-(2<<8|32+9),-(2<<8|32-9),
+    2,-(2<<8|32+10),-(2<<8|32-10),-(2<<8|32+11),-(2<<8|32-11),
+    2,-(2<<8|32+12),-(2<<8|32-12),-(2<<8|32+13),-(2<<8|32-13),
+    2,-(2<<8|32+14),-(2<<8|32-14),-(2<<8|32+15),-(2<<8|32-15),
+    3,
+     -(3<<8|32+16),-(3<<8|32-16),-(3<<8|32+17),-(3<<8|32-17),
+     -(3<<8|32+18),-(3<<8|32-18),-(3<<8|32+19),-(3<<8|32-19),
+    3,
+     -(3<<8|32+20),-(3<<8|32-20),-(3<<8|32+21),-(3<<8|32-21),
+     -(3<<8|32+22),-(3<<8|32-22),-(3<<8|32+23),-(3<<8|32-23),
+    3,
+     -(3<<8|32+24),-(3<<8|32-24),-(3<<8|32+25),-(3<<8|32-25),
+     -(3<<8|32+26),-(3<<8|32-26),-(3<<8|32+27),-(3<<8|32-27),
+    3,
+     -(3<<8|32+28),-(3<<8|32-28),-(3<<8|32+29),-(3<<8|32-29),
+     -(3<<8|32+30),-(3<<8|32-30),-(3<<8|32+31),-(3<<8|32-31)
+};
 
-static int oc_vlc_mv_comp_unpack(oc_pack_buf *_opb){
-  long bits;
-  int  mask;
-  int  mv;
-  bits=oc_pack_read(_opb,3);
-  switch(bits){
-    case  0:return 0;
-    case  1:return 1;
-    case  2:return -1;
-    case  3:
-    case  4:{
-      mv=(int)(bits-1);
-      bits=oc_pack_read1(_opb);
-    }break;
-    /*case  5:
-    case  6:
-    case  7:*/
-    default:{
-      mv=1<<bits-3;
-      bits=oc_pack_read(_opb,bits-2);
-      mv+=(int)(bits>>1);
-      bits&=1;
-    }break;
-  }
-  mask=-(int)bits;
-  return mv+mask^mask;
-}
+static const ogg_int16_t OC_CLC_MV_COMP_TREE[65]={
+  6,
+   -(6<<8|32 +0),-(6<<8|32 -0),-(6<<8|32 +1),-(6<<8|32 -1),
+   -(6<<8|32 +2),-(6<<8|32 -2),-(6<<8|32 +3),-(6<<8|32 -3),
+   -(6<<8|32 +4),-(6<<8|32 -4),-(6<<8|32 +5),-(6<<8|32 -5),
+   -(6<<8|32 +6),-(6<<8|32 -6),-(6<<8|32 +7),-(6<<8|32 -7),
+   -(6<<8|32 +8),-(6<<8|32 -8),-(6<<8|32 +9),-(6<<8|32 -9),
+   -(6<<8|32+10),-(6<<8|32-10),-(6<<8|32+11),-(6<<8|32-11),
+   -(6<<8|32+12),-(6<<8|32-12),-(6<<8|32+13),-(6<<8|32-13),
+   -(6<<8|32+14),-(6<<8|32-14),-(6<<8|32+15),-(6<<8|32-15),
+   -(6<<8|32+16),-(6<<8|32-16),-(6<<8|32+17),-(6<<8|32-17),
+   -(6<<8|32+18),-(6<<8|32-18),-(6<<8|32+19),-(6<<8|32-19),
+   -(6<<8|32+20),-(6<<8|32-20),-(6<<8|32+21),-(6<<8|32-21),
+   -(6<<8|32+22),-(6<<8|32-22),-(6<<8|32+23),-(6<<8|32-23),
+   -(6<<8|32+24),-(6<<8|32-24),-(6<<8|32+25),-(6<<8|32-25),
+   -(6<<8|32+26),-(6<<8|32-26),-(6<<8|32+27),-(6<<8|32-27),
+   -(6<<8|32+28),-(6<<8|32-28),-(6<<8|32+29),-(6<<8|32-29),
+   -(6<<8|32+30),-(6<<8|32-30),-(6<<8|32+31),-(6<<8|32-31)
+};
 
-static int oc_clc_mv_comp_unpack(oc_pack_buf *_opb){
-  long bits;
-  int  mask;
-  int  mv;
-  bits=oc_pack_read(_opb,6);
-  mv=(int)bits>>1;
-  mask=-((int)bits&1);
-  return mv+mask^mask;
+
+static void oc_mv_unpack(oc_pack_buf *_opb,const ogg_int16_t *_tree,oc_mv _mv){
+  _mv[0]=(signed char)(oc_huff_token_decode(_opb,_tree)-32);
+  _mv[1]=(signed char)(oc_huff_token_decode(_opb,_tree)-32);
 }
 
 /*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
@@ -774,7 +797,7 @@
   const oc_mb_map        *mb_maps;
   const signed char      *mb_modes;
   oc_set_chroma_mvs_func  set_chroma_mvs;
-  oc_mv_comp_unpack_func  mv_comp_unpack;
+  const ogg_int16_t      *mv_comp_tree;
   oc_fragment            *frags;
   oc_mv                  *frag_mvs;
   const unsigned char    *map_idxs;
@@ -786,7 +809,7 @@
   long                    val;
   set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
   val=oc_pack_read1(&_dec->opb);
-  mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack;
+  mv_comp_tree=val?OC_CLC_MV_COMP_TREE:OC_VLC_MV_COMP_TREE;
   map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
   map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
   memset(last_mv,0,sizeof(last_mv));
@@ -826,8 +849,7 @@
               codedi++;
               fragi=mb_maps[mbi][0][bi];
               frags[fragi].mb_mode=mb_mode;
-              lbmvs[bi][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
-              lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+              oc_mv_unpack(&_dec->opb,mv_comp_tree,lbmvs[bi]);
               memcpy(frag_mvs[fragi],lbmvs[bi],sizeof(lbmvs[bi]));
             }
             else lbmvs[bi][0]=lbmvs[bi][1]=0;
@@ -849,8 +871,8 @@
         }break;
         case OC_MODE_INTER_MV:{
           memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
-          mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
-          mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+          oc_mv_unpack(&_dec->opb,mv_comp_tree,mbmv);
+          memcpy(last_mv[0],mbmv,sizeof(last_mv[0]));
         }break;
         case OC_MODE_INTER_MV_LAST:memcpy(mbmv,last_mv[0],sizeof(mbmv));break;
         case OC_MODE_INTER_MV_LAST2:{
@@ -859,8 +881,7 @@
           memcpy(last_mv[0],mbmv,sizeof(last_mv[0]));
         }break;
         case OC_MODE_GOLDEN_MV:{
-          mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
-          mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
+          oc_mv_unpack(&_dec->opb,mv_comp_tree,mbmv);
         }break;
         default:memset(mbmv,0,sizeof(mbmv));break;
       }



More information about the commits mailing list