[xiph-commits] r17257 - in branches/theora-gumboot/lib: . x86

gumboot at svn.xiph.org gumboot at svn.xiph.org
Sat May 29 16:46:16 PDT 2010


Author: gumboot
Date: 2010-05-29 16:46:16 -0700 (Sat, 29 May 2010)
New Revision: 17257

Modified:
   branches/theora-gumboot/lib/decode.c
   branches/theora-gumboot/lib/state.c
   branches/theora-gumboot/lib/x86/mmxstate.c
Log:
Get mb_modes and motion vectors into a more easily indexed format so that chroma can access the same memory as luma.  Push other things forward so that the old tables needn't be populated.  Unroll more code for greater ugliness.


Modified: branches/theora-gumboot/lib/decode.c
===================================================================
--- branches/theora-gumboot/lib/decode.c	2010-05-29 23:38:46 UTC (rev 17256)
+++ branches/theora-gumboot/lib/decode.c	2010-05-29 23:46:16 UTC (rev 17257)
@@ -709,7 +709,6 @@
   int                  mode_scheme;
   ogg_uint16_t        *sb_masks;
   signed char         *rmb_modes[4];
-  signed char         *mb_modes;
   int                  sbx,sby;
   val=oc_pack_read(&_dec->opb,3);
   mode_scheme=(int)val;
@@ -736,17 +735,14 @@
   rmb_modes[1]=rmb_modes[0]+nhmbs;
   rmb_modes[2]=rmb_modes[1]+1;
   rmb_modes[3]=rmb_modes[0]+1;
-  mb_modes=_dec->state.mb_modes;
   for (mbi=sby=0;sby<_dec->state.fplanes[0].nsbs;sby+=nhsbs,mbi+=nhmbs)
     for (sbx=0;sbx<nhsbs;sbx++,mbi+=2){
       int i,mask=sb_masks[sby+sbx];
-      for (i=0;i<4;i++,mask>>=4){
+      for (i=0;i<4;i++,mask>>=4)
         if (rmb_modes[i][mbi]!=OC_MODE_INVALID)
           rmb_modes[i][mbi]=(mask&15)
            ?alphabet[oc_huff_token_decode(&_dec->opb,mode_tree)]
            :OC_MODE_INTER_NOMV;
-        *mb_modes++=rmb_modes[i][mbi];/*legacy*/
-      }
     }
 }
 
@@ -814,17 +810,17 @@
    block modes and motion vectors to the individual fragments.*/
 static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
   const oc_mb_map        *mb_maps;
-  const signed char      *mb_modes;
+  oc_mv                 (*rmb_mvs[4])[4];
+  signed char            *rmb_modes[4];
   oc_set_chroma_mvs_func  set_chroma_mvs;
   const ogg_int16_t      *mv_comp_tree;
   oc_fragment            *frags;
-  oc_mv                  *frag_mvs;
   const unsigned char    *map_idxs;
   int                     map_nidxs;
   oc_mv                   last_mv[2];
-  oc_mv                   cbmvs[4];
-  size_t                  nmbs;
-  size_t                  mbi;
+  size_t                  nhmbs;
+  int                     sbx,sby;
+  size_t                  old_mbi,mbi,i;
   long                    val;
   set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
   val=oc_pack_read1(&_dec->opb);
@@ -833,89 +829,104 @@
   map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
   memset(last_mv,0,sizeof(last_mv));
   frags=_dec->state.frags;
-  frag_mvs=_dec->state.frag_mvs;
   mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
-  mb_modes=_dec->state.mb_modes;
-  nmbs=_dec->state.nmbs;
-  for(mbi=0;mbi<nmbs;mbi++){
-    int          mb_mode;
-    mb_mode=mb_modes[mbi];
-    if(mb_mode!=OC_MODE_INVALID){
-      oc_mv        mbmv;
-      ptrdiff_t    fragi;
-      int          coded[13];
-      int          codedi;
-      int          ncoded;
-      int          mapi;
-      int          mapii;
-      /*Search for at least one coded fragment.*/
-      ncoded=mapii=0;
-      do{
-        mapi=map_idxs[mapii];
-        fragi=mb_maps[mbi][mapi>>2][mapi&3];
-        if(frags[fragi].coded)coded[ncoded++]=mapi;
-      }
-      while(++mapii<map_nidxs);
-      if(ncoded<=0)continue;
-      switch(mb_mode){
-        case OC_MODE_INTER_MV_FOUR:{
-          oc_mv       lbmvs[4];
-          int         bi;
-          /*Mark the tail of the list, so we don't accidentally go past it.*/
-          coded[ncoded]=-1;
-          for(bi=codedi=0;bi<4;bi++){
-            if(coded[codedi]==bi){
-              codedi++;
-              fragi=mb_maps[mbi][0][bi];
-              frags[fragi].mb_mode=mb_mode;
-              oc_mv_unpack(&_dec->opb,mv_comp_tree,lbmvs[bi]);
-              memcpy(frag_mvs[fragi],lbmvs[bi],sizeof(lbmvs[bi]));
-            }
-            else lbmvs[bi][0]=lbmvs[bi][1]=0;
+  nhmbs=_dec->state.nhmbs;
+  rmb_modes[0]=_dec->state.raster_mb_modes;
+  rmb_modes[1]=rmb_modes[0]+nhmbs;
+  rmb_modes[2]=rmb_modes[1]+1;
+  rmb_modes[3]=rmb_modes[0]+1;
+  rmb_mvs[0]=_dec->state.raster_mb_mvs;
+  rmb_mvs[1]=rmb_mvs[0]+nhmbs;
+  rmb_mvs[2]=rmb_mvs[1]+1;
+  rmb_mvs[3]=rmb_mvs[0]+1;
+  old_mbi=0;
+  for (mbi=sby=0;sby<_dec->state.fplanes[0].nsbs;sby+=_dec->state.fplanes[0].nhsbs,mbi+=nhmbs)
+    for (sbx=0;sbx<_dec->state.fplanes[0].nhsbs;sbx++,mbi+=2){
+//      int mask=_dec->state.sb_masks[sby+sbx];
+//      int umask=_dec->state.sb_masks[_dec->state.fplanes[1].sboffset+sby_uv+(sbx>>XDECI)]&SOMETHINGCLEVER;
+//      int vmask=_dec->state.sb_masks[_dec->state.fplanes[2].sboffset+sby_uv+(sbx>>XDECI)]&SOMETHINGCLEVER;
+      /* TODO: use the superblock masks directly rather than iterating through
+       * frags[].  Also use these bitmaps to update frags[].mb_mode in whatever
+       * order is convenient.  This should be faster (if there's enough
+       * cleverness in the implementation) and it will eliminate use of
+       * mb_maps[].
+       */
+
+      for (i=0;i<4;i++,old_mbi++){
+        int          mb_mode;
+        mb_mode=rmb_modes[i][mbi];
+        if(mb_mode!=OC_MODE_INVALID){
+          oc_mv       *mbmv;
+          ptrdiff_t    fragi;
+          int          coded[13];
+          int          codedi;
+          int          ncoded;
+          int          mapi;
+          int          mapii;
+          /*Search for at least one coded fragment.*/
+          ncoded=mapii=0;
+          do{
+            mapi=map_idxs[mapii];
+            fragi=mb_maps[old_mbi][mapi>>2][mapi&3];
+            if(frags[fragi].coded)coded[ncoded++]=mapi;
           }
-          if(codedi>0){
-            memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
-            memcpy(last_mv[0],lbmvs[coded[codedi-1]],sizeof(last_mv[0]));
+          while(++mapii<map_nidxs);
+          if(ncoded<=0)continue;
+          mbmv=rmb_mvs[i][mbi];
+          switch(mb_mode){
+            case OC_MODE_INTER_MV_FOUR:{
+              int         bi;
+              /*Mark the tail of the list, so we don't accidentally go past it.*/
+              coded[ncoded]=-1;
+              for(bi=codedi=0;bi<4;bi++){
+                if(coded[codedi]==bi){
+                  codedi++;
+                  fragi=mb_maps[old_mbi][0][bi];
+                  frags[fragi].mb_mode=mb_mode;
+                  oc_mv_unpack(&_dec->opb,mv_comp_tree,mbmv[bi]);
+                }
+                else mbmv[bi][0]=mbmv[bi][1]=0;
+              }
+              if(codedi>0){
+                memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
+                memcpy(last_mv[0],mbmv[coded[codedi-1]],sizeof(last_mv[0]));
+              }
+              if(codedi<ncoded){
+                for(;codedi<ncoded;codedi++){
+                  mapi=coded[codedi];
+                  bi=mapi&3;
+                  fragi=mb_maps[old_mbi][mapi>>2][bi];
+                  frags[fragi].mb_mode=mb_mode;
+                }
+              }
+            }break;
+            case OC_MODE_INTER_MV:
+              memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
+              oc_mv_unpack(&_dec->opb,mv_comp_tree,last_mv[0]);
+              /*@FALLTHROUGH@*/
+            case OC_MODE_INTER_MV_LAST:memcpy(*mbmv,last_mv[0],sizeof(*mbmv));break;
+            case OC_MODE_INTER_MV_LAST2:{
+              memcpy(*mbmv,last_mv[1],sizeof(*mbmv));
+              memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
+              memcpy(last_mv[0],*mbmv,sizeof(last_mv[0]));
+            }break;
+            case OC_MODE_GOLDEN_MV:{
+              oc_mv_unpack(&_dec->opb,mv_comp_tree,*mbmv);
+            }break;
+            default:memset(*mbmv,0,sizeof(*mbmv));break;
           }
-          if(codedi<ncoded){
-            (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
-            for(;codedi<ncoded;codedi++){
+          /*4MV mode fills in the fragments itself.
+            For all other modes we can use this common code.*/
+          if(mb_mode!=OC_MODE_INTER_MV_FOUR){
+            for(codedi=0;codedi<ncoded;codedi++){
               mapi=coded[codedi];
-              bi=mapi&3;
-              fragi=mb_maps[mbi][mapi>>2][bi];
+              fragi=mb_maps[old_mbi][mapi>>2][mapi&3];
               frags[fragi].mb_mode=mb_mode;
-              memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(cbmvs[bi]));
             }
           }
-        }break;
-        case OC_MODE_INTER_MV:{
-          memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
-          oc_mv_unpack(&_dec->opb,mv_comp_tree,mbmv);
-          memcpy(last_mv[0],mbmv,sizeof(last_mv[0]));
-        }break;
-        case OC_MODE_INTER_MV_LAST:memcpy(mbmv,last_mv[0],sizeof(mbmv));break;
-        case OC_MODE_INTER_MV_LAST2:{
-          memcpy(mbmv,last_mv[1],sizeof(mbmv));
-          memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
-          memcpy(last_mv[0],mbmv,sizeof(last_mv[0]));
-        }break;
-        case OC_MODE_GOLDEN_MV:{
-          oc_mv_unpack(&_dec->opb,mv_comp_tree,mbmv);
-        }break;
-        default:memset(mbmv,0,sizeof(mbmv));break;
-      }
-      /*4MV mode fills in the fragments itself.
-        For all other modes we can use this common code.*/
-      if(mb_mode!=OC_MODE_INTER_MV_FOUR){
-        for(codedi=0;codedi<ncoded;codedi++){
-          mapi=coded[codedi];
-          fragi=mb_maps[mbi][mapi>>2][mapi&3];
-          frags[fragi].mb_mode=mb_mode;
-          memcpy(frag_mvs[fragi],mbmv,sizeof(mbmv));
         }
       }
     }
-  }
 }
 
 static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
@@ -1610,8 +1621,15 @@
    counts.*/
 static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
  oc_dec_pipeline_state *_pipe,int _pli){
+  static const char rasterise[16]={
+    0, 1, 3, 2,
+    0, 2, 3, 1,
+    0, 2, 3, 1,
+    3, 2, 0, 1,
+  };
   oc_fragment             *frags;
   ogg_uint16_t            *sb_masks;
+  int                      nhmbs;
   int                      mbi,
                            mb_stepx,
                            mb_stepy;
@@ -1619,31 +1637,36 @@
                            sb_end,
                            sb_newline;
   int                      pixel_fmt;
-  int                      mbo[4]={0,_dec->state.nhmbs,_dec->state.nhmbs+1,1};
+  int                      mbo[4];
 
   sb_masks = _dec->state.sb_masks;
   frags=_dec->state.frags;
+  nhmbs=_dec->state.nhmbs;
 
-  pixel_fmt=_pli?TH_PF_NFORMATS/*_dec->state.info.pixel_fmt*/:TH_PF_444;
+  pixel_fmt=_pli?_dec->state.info.pixel_fmt:TH_PF_444;
 
   sbi = _dec->state.fplanes[_pli].sboffset + (_pipe->fragy0[_pli] >> 2) * _dec->state.fplanes[_pli].nhsbs;
   sb_newline=sbi+_dec->state.fplanes[_pli].nhsbs;
   sb_end = _dec->state.fplanes[_pli].sboffset + (_pipe->fragy_end[_pli] + 3 >> 2) * _dec->state.fplanes[_pli].nhsbs;
 
-  mb_stepx=!(pixel_fmt&2)?1:2;
-  mb_stepy=!(pixel_fmt&1)?1:2;
+  mb_stepx=!(pixel_fmt&1)?2:1;
+  mb_stepy=!(pixel_fmt&2)?2:1;
 
-  mbi=(_pipe->fragy0[_pli]>>mb_stepy-1)*_dec->state.nhmbs;
+  mbo[0]=0;
+  mbo[1]=mbo[0]+nhmbs*mb_stepy;
+  mbo[2]=mbo[1]+mb_stepx;
+  mbo[3]=mbo[0]+mb_stepx;
 
-  for ( ; sbi < sb_end; sbi++,mbi+=1<<3-mb_stepx)
+  mbi=(_pipe->fragy0[_pli]>>2-mb_stepy)*nhmbs;
+
+  for ( ; sbi < sb_end; sbi++,mbi+=1<<mb_stepx)
   {
     ptrdiff_t *fragip;
     ogg_uint16_t bmask;
     int quadi;
 
     if(sbi>=sb_newline){
-      mbi-=_dec->state.nhmbs;
-      mbi+=_dec->state.nhmbs<<3-mb_stepy;
+      mbi+=(nhmbs<<mb_stepy)-nhmbs;
       sb_newline+=_dec->state.fplanes[_pli].nhsbs;
     }
 
@@ -1659,62 +1682,133 @@
          to protect us from buffer overflow.*/
       OC_ALIGN8(ogg_int16_t dct_coeffs[4][64 + 8]);
       int bi;
+      int last_zzi[4];
+      int mask = 0;
+      int mb_mode;
+      ogg_uint16_t dc_quant;
+      oc_mv *mb_mvs;
+      int frag_buf_off;
+      oc_mv cmv[4];
 
       if ((bmask & 15) == 0)
         continue;
 
-      switch (pixel_fmt){
-      case TH_PF_444:{
-        static const char rasterise[16] =
-        {
-          0, 1, 3, 2,
-          0, 2, 3, 1,
-          0, 2, 3, 1,
-          3, 2, 0, 1,
-        };
-        int last_zzi[4];
-        int mask = 0;
-        int mb_mode = _dec->state.frame_type==OC_INTRA_FRAME?OC_MODE_INTRA:_dec->state.mb_modes[(sbi<<2)+(quadi>>2)];
-        ogg_uint16_t dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
-        int frag_buf_off = _dec->state.frag_buf_offs[fragip[quadi==12?2:0]];
-        oc_mv *mv;
+      mb_mode = _dec->state.frame_type==OC_INTRA_FRAME?OC_MODE_INTRA:_dec->state.raster_mb_modes[mbi+mbo[quadi>>2]];
+      dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
+      mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi>>2]];
+      frag_buf_off = _dec->state.frag_buf_offs[fragip[quadi==12?2:0]];
 
-        for (bi = 0; bi < 4; bi++)
-        {
-          ptrdiff_t fragi;
-          int obi;
-          if ((bmask & (1 << bi)) == 0) continue;
-          fragi = fragip[bi];
-          obi = rasterise[quadi + bi];
-          assert(fragi >= 0 && frags[fragi].coded);
-          assert(frags[fragi].mb_mode == mb_mode);
+      for (bi = 0; bi < 4; bi++)
+      {
+        ptrdiff_t fragi;
+        int obi;
+        if ((bmask & (1 << bi)) == 0) continue;
+        fragi = fragip[bi];
+        obi = rasterise[quadi + bi];
 
-          last_zzi[obi] = oc_dec_get_dct_coeffs(dct_coeffs[obi], _dec, _pipe, _pli, frags + fragi);
-          mask |= 1 << obi;
-          mv = &_dec->state.frag_mvs[fragi]; /* this just captures any valid pointer for the moment */
-        }
+        last_zzi[obi] = oc_dec_get_dct_coeffs(dct_coeffs[obi], _dec, _pipe, _pli, frags + fragi);
+        mask |= 1 << obi;
+      }
 
-        assert(_dec->state.frame_type==OC_INTRA_FRAME||mb_mode==_dec->state.raster_mb_modes[mbi+mbo[quadi>>2]]);
+      switch (pixel_fmt){
+      case TH_PF_444:
         if (mb_mode==OC_MODE_INTER_MV_FOUR)
-          oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask,&_dec->state.frag_mvs[fragip[quadi==12?2:0]]);
-        else{
-//        assert(mv[0]==_dec->state.raster_mvs[mbi][0][0]&&mv[1]==_dec->state.raster_mvs[mbi][0][1]);
-          oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask,OC_FRAME_FOR_MODE(mb_mode),*mv);
+          oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask,mb_mvs);
+        else
+          oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
+        break;
+
+      case TH_PF_422:
+        /* TODO: code the reference frame index and the motion vector into a
+         * single word and then compare left and right copies -- if they're the
+         * same then do things quickly instead of like this:
+         */
+        if (mask&5){
+          if (mb_mode==OC_MODE_INTER_MV_FOUR){
+            cmv[0][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0],1,1);
+            cmv[0][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1],1,1);
+            cmv[2][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][0]+mb_mvs[3][0],1,1);
+            cmv[2][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][1]+mb_mvs[3][1],1,1);
+            oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&5,cmv);
+          }
+          else
+            oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&5,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
         }
-      }break;
-      default:
-        for (bi = 0; bi < 4; bi++)
-        {
-          ptrdiff_t fragi;
-          int last_zzi;
-          if ((bmask & (1 << bi)) == 0) continue;
-          fragi = fragip[bi];
-          assert(fragi >= 0 && frags[fragi].coded);
 
-          last_zzi = oc_dec_get_dct_coeffs(dct_coeffs[0], _dec, _pipe, _pli, frags + fragi);
-          ogg_uint16_t dc_quant = _pipe->dequant[_pli][0][frags[fragi].mb_mode!=OC_MODE_INTRA][0];
-          oc_state_frag_recon(&_dec->state,fragi,_pli, dct_coeffs[0],last_zzi,dc_quant);
+        mb_mode = _dec->state.frame_type==OC_INTRA_FRAME?OC_MODE_INTRA:_dec->state.raster_mb_modes[mbi+mbo[quadi>>2]+1];
+        dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
+        mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi>>2]+1];
+
+        if (mask&10){
+          if (mb_mode==OC_MODE_INTER_MV_FOUR){
+            cmv[1][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0],1,1);
+            cmv[1][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1],1,1);
+            cmv[3][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][0]+mb_mvs[3][0],1,1);
+            cmv[3][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][1]+mb_mvs[3][1],1,1);
+            oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&10,cmv);
+          }
+          else
+            oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&10,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
         }
+        break;
+
+      case TH_PF_420:
+        /* TODO: code the reference frame index and the motion vector into a
+         * single word and then compare left and right copies -- if they're the
+         * same then do things quickly instead of like this:
+         */
+        if (mask&1)
+          if (mb_mode==OC_MODE_INTER_MV_FOUR){
+            cmv[0][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
+            cmv[0][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
+            oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&1,cmv);
+          }
+          else
+            oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&1,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
+
+        mb_mode = _dec->state.frame_type==OC_INTRA_FRAME?OC_MODE_INTRA:_dec->state.raster_mb_modes[mbi+mbo[quadi>>2]+1];
+        dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
+        mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi>>2]+1];
+
+        if (mask&2)
+          if (mb_mode==OC_MODE_INTER_MV_FOUR){
+            cmv[1][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
+            cmv[1][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
+            oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&2,cmv);
+          }
+          else
+            oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&2,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
+
+        mb_mode = _dec->state.frame_type==OC_INTRA_FRAME?OC_MODE_INTRA:_dec->state.raster_mb_modes[mbi+mbo[quadi>>2]+nhmbs];
+        dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
+        mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi>>2]+nhmbs];
+
+        /* TODO: code the reference frame index and the motion vector into a
+         * single word and then compare left and right copies -- if they're the
+         * same then do things quickly instead of like this:
+         */
+        if (mask&4)
+          if (mb_mode==OC_MODE_INTER_MV_FOUR){
+            cmv[2][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
+            cmv[2][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
+            oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&4,cmv);
+          }
+          else
+            oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&4,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
+
+        mb_mode = _dec->state.frame_type==OC_INTRA_FRAME?OC_MODE_INTRA:_dec->state.raster_mb_modes[mbi+mbo[quadi>>2]+nhmbs+1];
+        dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
+        mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi>>2]+nhmbs+1];
+
+        if (mask&8)
+          if (mb_mode==OC_MODE_INTER_MV_FOUR){
+            cmv[3][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
+            cmv[3][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
+            oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&8,cmv);
+          }
+          else
+            oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&8,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
+        break;
       }
     }
   }
@@ -2479,9 +2573,8 @@
     {
       cairo_t           *c;
       const oc_fragment *frags;
-      oc_mv             *frag_mvs;
       const signed char *mb_modes;
-      oc_mb_map         *mb_maps;
+      size_t             nhfrags;
       size_t             nmbs;
       size_t             mbi;
       int                row2;
@@ -2529,9 +2622,8 @@
       }
       c=cairo_create(cs);
       frags=_dec->state.frags;
-      frag_mvs=_dec->state.frag_mvs;
-      mb_modes=_dec->state.mb_modes;
-      mb_maps=_dec->state.mb_maps;
+      mb_modes=_dec->state.raster_mb_modes;
+      nhfrags=_dec->state.fplanes[0].nhfrags;
       nmbs=_dec->state.nmbs;
       row2=0;
       col2=0;
@@ -2539,8 +2631,8 @@
         float x;
         float y;
         int   bi;
-        y=h-(row2+((col2+1>>1)&1))*16-16;
-        x=(col2>>1)*16;
+        y=h-row2*16-16;
+        x=col2*16;
         cairo_set_line_width(c,1.);
         /*Keyframe (all intra) red box.*/
         if(_dec->state.frame_type==OC_INTRA_FRAME){
@@ -2553,16 +2645,9 @@
           }
         }
         else{
-          const signed char *frag_mv;
-          ptrdiff_t          fragi;
-          for(bi=0;bi<4;bi++){
-            fragi=mb_maps[mbi][0][bi];
-            if(fragi>=0&&frags[fragi].coded){
-              frag_mv=frag_mvs[fragi];
-              break;
-            }
-          }
-          if(bi<4){
+          const oc_mv (*mb_mvs)[4]=_dec->state.raster_mb_mvs[mbi];
+          if(_dec->state.sb_masks[(row2>>1)*_dec->state.nhsbs+(col2>>1)]
+           &((row2&1)?0x0ff0:0xf00f)&((col2&1)?0xff00:0x00ff)){
             switch(mb_modes[mbi]){
               case OC_MODE_INTRA:{
                 if(_dec->telemetry_mbmode&0x02){
@@ -2589,13 +2674,13 @@
                   cairo_stroke(c);
                 }
                 if(_dec->telemetry_mv&0x04){
-                  cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+                  cairo_move_to(c,x+8+mb_mvs[0][0],y+8-mb_mvs[0][1]);
                   cairo_set_source_rgba(c,1.,1.,1.,.9);
                   cairo_set_line_width(c,3.);
-                  cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+                  cairo_line_to(c,x+8+mb_mvs[0][0]*.66,y+8-mb_mvs[0][1]*.66);
                   cairo_stroke_preserve(c);
                   cairo_set_line_width(c,2.);
-                  cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+                  cairo_line_to(c,x+8+mb_mvs[0][0]*.33,y+8-mb_mvs[0][1]*.33);
                   cairo_stroke_preserve(c);
                   cairo_set_line_width(c,1.);
                   cairo_line_to(c,x+8,y+8);
@@ -2612,13 +2697,13 @@
                   cairo_stroke(c);
                 }
                 if(_dec->telemetry_mv&0x08){
-                  cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+                  cairo_move_to(c,x+8+mb_mvs[0][0],y+8-mb_mvs[0][1]);
                   cairo_set_source_rgba(c,1.,1.,1.,.9);
                   cairo_set_line_width(c,3.);
-                  cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+                  cairo_line_to(c,x+8+mb_mvs[0][0]*.66,y+8-mb_mvs[0][1]*.66);
                   cairo_stroke_preserve(c);
                   cairo_set_line_width(c,2.);
-                  cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+                  cairo_line_to(c,x+8+mb_mvs[0][0]*.33,y+8-mb_mvs[0][1]*.33);
                   cairo_stroke_preserve(c);
                   cairo_set_line_width(c,1.);
                   cairo_line_to(c,x+8,y+8);
@@ -2638,13 +2723,13 @@
                   cairo_stroke(c);
                 }
                 if(_dec->telemetry_mv&0x10){
-                  cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+                  cairo_move_to(c,x+8+mb_mvs[0][0],y+8-mb_mvs[0][1]);
                   cairo_set_source_rgba(c,1.,1.,1.,.9);
                   cairo_set_line_width(c,3.);
-                  cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+                  cairo_line_to(c,x+8+mb_mvs[0][0]*.66,y+8-mb_mvs[0][1]*.66);
                   cairo_stroke_preserve(c);
                   cairo_set_line_width(c,2.);
-                  cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+                  cairo_line_to(c,x+8+mb_mvs[0][0]*.33,y+8-mb_mvs[0][1]*.33);
                   cairo_stroke_preserve(c);
                   cairo_set_line_width(c,1.);
                   cairo_line_to(c,x+8,y+8);
@@ -2667,13 +2752,13 @@
                   cairo_stroke(c);
                 }
                 if(_dec->telemetry_mv&0x40){
-                  cairo_move_to(c,x+8+frag_mv[0],y+8-frag_mv[1]);
+                  cairo_move_to(c,x+8+mb_mvs[0][0],y+8-mb_mvs[0][1]);
                   cairo_set_source_rgba(c,1.,1.,1.,.9);
                   cairo_set_line_width(c,3.);
-                  cairo_line_to(c,x+8+frag_mv[0]*.66,y+8-frag_mv[1]*.66);
+                  cairo_line_to(c,x+8+mb_mvs[0][0]*.66,y+8-mb_mvs[0][1]*.66);
                   cairo_stroke_preserve(c);
                   cairo_set_line_width(c,2.);
-                  cairo_line_to(c,x+8+frag_mv[0]*.33,y+8-frag_mv[1]*.33);
+                  cairo_line_to(c,x+8+mb_mvs[0][0]*.33,y+8-mb_mvs[0][1]*.33);
                   cairo_stroke_preserve(c);
                   cairo_set_line_width(c,1.);
                   cairo_line_to(c,x+8,y+8);
@@ -2690,61 +2775,57 @@
                   cairo_stroke(c);
                 }
                 /*4mv is odd, coded in raster order.*/
-                fragi=mb_maps[mbi][0][0];
+                fragi=row2*2*nhfrags+col2*2;
                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
-                  frag_mv=frag_mvs[fragi];
-                  cairo_move_to(c,x+4+frag_mv[0],y+12-frag_mv[1]);
+                  cairo_move_to(c,x+4+mb_mvs[0][0],y+12-mb_mvs[0][1]);
                   cairo_set_source_rgba(c,1.,1.,1.,.9);
                   cairo_set_line_width(c,3.);
-                  cairo_line_to(c,x+4+frag_mv[0]*.66,y+12-frag_mv[1]*.66);
+                  cairo_line_to(c,x+4+mb_mvs[0][0]*.66,y+12-mb_mvs[0][1]*.66);
                   cairo_stroke_preserve(c);
                   cairo_set_line_width(c,2.);
-                  cairo_line_to(c,x+4+frag_mv[0]*.33,y+12-frag_mv[1]*.33);
+                  cairo_line_to(c,x+4+mb_mvs[0][0]*.33,y+12-mb_mvs[0][1]*.33);
                   cairo_stroke_preserve(c);
                   cairo_set_line_width(c,1.);
                   cairo_line_to(c,x+4,y+12);
                   cairo_stroke(c);
                 }
-                fragi=mb_maps[mbi][0][1];
+                fragi=row2*2*nhfrags+col2*2+1;
                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
-                  frag_mv=frag_mvs[fragi];
-                  cairo_move_to(c,x+12+frag_mv[0],y+12-frag_mv[1]);
+                  cairo_move_to(c,x+12+mb_mvs[1][0],y+12-mb_mvs[1][1]);
                   cairo_set_source_rgba(c,1.,1.,1.,.9);
                   cairo_set_line_width(c,3.);
-                  cairo_line_to(c,x+12+frag_mv[0]*.66,y+12-frag_mv[1]*.66);
+                  cairo_line_to(c,x+12+mb_mvs[1][0]*.66,y+12-mb_mvs[1][1]*.66);
                   cairo_stroke_preserve(c);
                   cairo_set_line_width(c,2.);
-                  cairo_line_to(c,x+12+frag_mv[0]*.33,y+12-frag_mv[1]*.33);
+                  cairo_line_to(c,x+12+mb_mvs[1][0]*.33,y+12-mb_mvs[1][1]*.33);
                   cairo_stroke_preserve(c);
                   cairo_set_line_width(c,1.);
                   cairo_line_to(c,x+12,y+12);
                   cairo_stroke(c);
                 }
-                fragi=mb_maps[mbi][0][2];
+                fragi=(row2*2+1)*nhfrags+col2*2;
                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
-                  frag_mv=frag_mvs[fragi];
-                  cairo_move_to(c,x+4+frag_mv[0],y+4-frag_mv[1]);
+                  cairo_move_to(c,x+4+mb_mvs[2][0],y+4-mb_mvs[2][1]);
                   cairo_set_source_rgba(c,1.,1.,1.,.9);
                   cairo_set_line_width(c,3.);
-                  cairo_line_to(c,x+4+frag_mv[0]*.66,y+4-frag_mv[1]*.66);
+                  cairo_line_to(c,x+4+mb_mvs[2][0]*.66,y+4-mb_mvs[2][1]*.66);
                   cairo_stroke_preserve(c);
                   cairo_set_line_width(c,2.);
-                  cairo_line_to(c,x+4+frag_mv[0]*.33,y+4-frag_mv[1]*.33);
+                  cairo_line_to(c,x+4+mb_mvs[2][0]*.33,y+4-mb_mvs[2][1]*.33);
                   cairo_stroke_preserve(c);
                   cairo_set_line_width(c,1.);
                   cairo_line_to(c,x+4,y+4);
                   cairo_stroke(c);
                 }
-                fragi=mb_maps[mbi][0][3];
+                fragi=(row2*2+1)*nhfrags+col2*2+1;
                 if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
-                  frag_mv=frag_mvs[fragi];
-                  cairo_move_to(c,x+12+frag_mv[0],y+4-frag_mv[1]);
+                  cairo_move_to(c,x+12+mb_mvs[3][0],y+4-mb_mvs[3][1]);
                   cairo_set_source_rgba(c,1.,1.,1.,.9);
                   cairo_set_line_width(c,3.);
-                  cairo_line_to(c,x+12+frag_mv[0]*.66,y+4-frag_mv[1]*.66);
+                  cairo_line_to(c,x+12+mb_mvs[3][0]*.66,y+4-mb_mvs[3][1]*.66);
                   cairo_stroke_preserve(c);
                   cairo_set_line_width(c,2.);
-                  cairo_line_to(c,x+12+frag_mv[0]*.33,y+4-frag_mv[1]*.33);
+                  cairo_line_to(c,x+12+mb_mvs[3][0]*.33,y+4-mb_mvs[3][1]*.33);
                   cairo_stroke_preserve(c);
                   cairo_set_line_width(c,1.);
                   cairo_line_to(c,x+12,y+4);
@@ -2764,8 +2845,8 @@
             int       yp;
             xp=x+(bi&1)*8;
             yp=y+8-(bi&2)*4;
-            fragi=mb_maps[mbi][0][bi];
-            if(fragi>=0&&frags[fragi].coded){
+            fragi=(row2*2+1)*nhfrags+col2*2+1;
+            if(frags[fragi].coded){
               qiv=qim[frags[fragi].qii];
               cairo_set_line_width(c,3.);
               cairo_set_source_rgba(c,0.,0.,0.,.5);
@@ -2834,9 +2915,9 @@
           }
         }
         col2++;
-        if((col2>>1)>=_dec->state.nhmbs){
+        if(col2>=_dec->state.nhmbs){
           col2=0;
-          row2+=2;
+          row2++;
         }
       }
       /*Bit usage indicator[s]:*/

Modified: branches/theora-gumboot/lib/state.c
===================================================================
--- branches/theora-gumboot/lib/state.c	2010-05-29 23:38:46 UTC (rev 17256)
+++ branches/theora-gumboot/lib/state.c	2010-05-29 23:46:16 UTC (rev 17257)
@@ -435,7 +435,7 @@
   _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps));
   _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes));
   _state->raster_mb_modes=_ogg_calloc(nmbs,sizeof(*_state->raster_mb_modes));
-  _state->raster_mb_mvs=_ogg_calloc(nmbs,sizeof(*_state->raster_mb_modes));
+  _state->raster_mb_mvs=_ogg_calloc(nmbs,sizeof(*_state->raster_mb_mvs));
   _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis));
   if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL||
    _state->sb_flags==NULL||_state->sb_masks==NULL||_state->mb_maps==NULL||

Modified: branches/theora-gumboot/lib/x86/mmxstate.c
===================================================================
--- branches/theora-gumboot/lib/x86/mmxstate.c	2010-05-29 23:38:46 UTC (rev 17256)
+++ branches/theora-gumboot/lib/x86/mmxstate.c	2010-05-29 23:46:16 UTC (rev 17257)
@@ -346,7 +346,6 @@
     }
     else oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,_dct_coeffs[1]);
   }
-  /*TODO:fix this*/ _mvs+=_state->fplanes[_pli].nhfrags-2;
   dst+=ystride*8;
   ref+=ystride*8;
   if (_mask & 4){



More information about the commits mailing list