[xiph-commits] r17272 - in branches/theora-gumboot/lib: . x86

gumboot at svn.xiph.org gumboot at svn.xiph.org
Fri Jun 4 06:43:43 PDT 2010


Author: gumboot
Date: 2010-06-04 06:43:43 -0700 (Fri, 04 Jun 2010)
New Revision: 17272

Modified:
   branches/theora-gumboot/lib/decode.c
   branches/theora-gumboot/lib/internal.h
   branches/theora-gumboot/lib/state.c
   branches/theora-gumboot/lib/x86/mmxstate.c
   branches/theora-gumboot/lib/x86/x86int.h
   branches/theora-gumboot/lib/x86/x86state.c
Log:
Separate and move iDCT after MC.
Clean some muck out of oc_dec_mv_unpack_and_frag_modes_fill().



Modified: branches/theora-gumboot/lib/decode.c
===================================================================
--- branches/theora-gumboot/lib/decode.c	2010-06-04 06:41:11 UTC (rev 17271)
+++ branches/theora-gumboot/lib/decode.c	2010-06-04 13:43:43 UTC (rev 17272)
@@ -842,63 +842,57 @@
   old_mbi=0;
   for (mbi=sby=0;sby<_dec->state.fplanes[0].nsbs;sby+=_dec->state.fplanes[0].nhsbs,mbi+=nhmbs)
     for (sbx=0;sbx<_dec->state.fplanes[0].nhsbs;sbx++,mbi+=2){
-//      int mask=_dec->state.sb_masks[sby+sbx];
-//      int umask=_dec->state.sb_masks[_dec->state.fplanes[1].sboffset+sby_uv+(sbx>>XDECI)]&SOMETHINGCLEVER;
-//      int vmask=_dec->state.sb_masks[_dec->state.fplanes[2].sboffset+sby_uv+(sbx>>XDECI)]&SOMETHINGCLEVER;
+#if 0 /* no genius today */
+       ogg_uint16_t lmask         [4]=  {0x000F,0x00F0,0x0F00,0xF000};
+       ogg_uint16_t cmask444      [4]=  {0x000F,0x00F0,0x0F00,0xF000};
+       ogg_uint16_t cmaskRSV[2]   [4]= {{0x0003,0x000C,0x3000,0xC000},{0x0090,0x0060,0x0600,0x0900}};
+       ogg_uint16_t cmask422   [2][4]= {{0x0009,0x0030,0x00C0,0x0006},{0x6000,0x0300,0x0C00,0x9000}};
+       ogg_uint16_t cmask420[2][2][4]={{{0x0001,0x0008,0x0004,0x0002},{0x4000,0x2000,0x1000,0x8000}},
+                                       {{0x0010,0x0020,0x0040,0x0080},{0x0100,0x0200,0x0400,0x0800}}};
+
+//      int mask=_dec->state.sb_masks[sby+sbx]&lmask[i];
+//      int umask=_dec->state.sb_masks[_dec->state.fplanes[1].sboffset+sby_uv+(sbx>>XDECI)]&cmask[row&YDECI][col&XDECI][i];
+//      int vmask=_dec->state.sb_masks[_dec->state.fplanes[2].sboffset+sby_uv+(sbx>>XDECI)]&cmask[row&YDECI][col&XDECI][i];
       /* TODO: use the superblock masks directly rather than iterating through
        * frags[].  Also use these bitmaps to update frags[].mb_mode in whatever
        * order is convenient.  This should be faster (if there's enough
        * cleverness in the implementation) and it will eliminate use of
        * mb_maps[].
        */
-
+#endif
       for (i=0;i<4;i++,old_mbi++){
         int          mb_mode;
         mb_mode=rmb_modes[i][mbi];
         if(mb_mode!=OC_MODE_INVALID){
           oc_mv       *mbmv;
           ptrdiff_t    fragi;
-          int          coded[13];
-          int          codedi;
-          int          ncoded;
+          int          lastcoded;
+          int          coded;
           int          mapi;
           int          mapii;
           /*Search for at least one coded fragment.*/
-          ncoded=mapii=0;
+          coded=mapii=0;
           do{
             mapi=map_idxs[mapii];
             fragi=mb_maps[old_mbi][mapi>>2][mapi&3];
-            if(frags[fragi].coded)coded[ncoded++]=mapi;
+            if(frags[fragi].coded)
+              frags[fragi].mb_mode=mb_mode,coded|=1<<mapii;
           }
           while(++mapii<map_nidxs);
-          if(ncoded<=0)continue;
+          if(coded==0)continue;
           mbmv=rmb_mvs[i][mbi];
           switch(mb_mode){
             case OC_MODE_INTER_MV_FOUR:{
               int         bi;
-              /*Mark the tail of the list, so we don't accidentally go past it.*/
-              coded[ncoded]=-1;
-              for(bi=codedi=0;bi<4;bi++){
-                if(coded[codedi]==bi){
-                  codedi++;
-                  fragi=mb_maps[old_mbi][0][bi];
-                  frags[fragi].mb_mode=mb_mode;
+              for(bi=lastcoded=0;bi<4;bi++){
+                if(coded&1<<bi){
+                  lastcoded=bi;
                   oc_mv_unpack(&_dec->opb,mv_comp_tree,mbmv[bi]);
                 }
                 else mbmv[bi][0]=mbmv[bi][1]=0;
               }
-              if(codedi>0){
-                memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
-                memcpy(last_mv[0],mbmv[coded[codedi-1]],sizeof(last_mv[0]));
-              }
-              if(codedi<ncoded){
-                for(;codedi<ncoded;codedi++){
-                  mapi=coded[codedi];
-                  bi=mapi&3;
-                  fragi=mb_maps[old_mbi][mapi>>2][bi];
-                  frags[fragi].mb_mode=mb_mode;
-                }
-              }
+              memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
+              memcpy(last_mv[0],mbmv[lastcoded],sizeof(last_mv[0]));
             }break;
             case OC_MODE_INTER_MV:
               memcpy(last_mv[1],last_mv[0],sizeof(last_mv[1]));
@@ -915,15 +909,6 @@
             }break;
             default:memset(*mbmv,0,sizeof(*mbmv));break;
           }
-          /*4MV mode fills in the fragments itself.
-            For all other modes we can use this common code.*/
-          if(mb_mode!=OC_MODE_INTER_MV_FOUR){
-            for(codedi=0;codedi<ncoded;codedi++){
-              mapi=coded[codedi];
-              fragi=mb_maps[old_mbi][mapi>>2][mapi&3];
-              frags[fragi].mb_mode=mb_mode;
-            }
-          }
         }
       }
     }
@@ -1621,11 +1606,11 @@
    counts.*/
 static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
  oc_dec_pipeline_state *_pipe,int _pli){
-  static const char rasterise[16]={
-    0, 1, 3, 2,
-    0, 2, 3, 1,
-    0, 2, 3, 1,
-    3, 2, 0, 1,
+  static const char bitraster[4][16]={
+    {0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15, },
+    {0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, },
+    {0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, },
+    {0, 8, 4, 12, 1, 9, 5, 13, 2, 10, 6, 14, 3, 11, 7, 15, },
   };
   oc_fragment             *frags;
   ogg_uint16_t            *sb_masks;
@@ -1675,15 +1660,14 @@
 
     fragip = _dec->state.sb_maps[sbi][0];
 
-    for (quadi = 0; quadi < 16; quadi += 4, bmask >>= 4, fragip += 4)
+    for (quadi = 0; quadi < 4; quadi++, bmask >>= 4, fragip += 4)
     {
       /*This array is made one element larger because the zig-zag index array
          uses the final element as a dumping ground for out-of-range indices
          to protect us from buffer overflow.*/
       OC_ALIGN8(ogg_int16_t dct_coeffs[4][64 + 8]);
       int bi;
-      int last_zzi[4];
-      int mask = 0;
+      int mask;
       int mb_mode;
       ogg_uint16_t dc_quant;
       oc_mv *mb_mvs;
@@ -1693,122 +1677,128 @@
       if ((bmask & 15) == 0)
         continue;
 
-      mb_mode = _dec->state.frame_type==OC_INTRA_FRAME?OC_MODE_INTRA:_dec->state.raster_mb_modes[mbi+mbo[quadi>>2]];
-      dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
-      mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi>>2]];
-      frag_buf_off = _dec->state.frag_buf_offs[fragip[quadi==12?2:0]];
+      mask = bitraster[quadi][bmask&15];
 
-      for (bi = 0; bi < 4; bi++)
-      {
-        ptrdiff_t fragi;
-        int obi;
-        if ((bmask & (1 << bi)) == 0) continue;
-        fragi = fragip[bi];
-        obi = rasterise[quadi + bi];
+      if (_dec->state.frame_type!=OC_INTRA_FRAME){
+        mb_mode = _dec->state.raster_mb_modes[mbi+mbo[quadi]];
+        dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
+        mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi]];
+        frag_buf_off = _dec->state.frag_buf_offs[fragip[quadi==3?2:0]];
 
-        last_zzi[obi] = oc_dec_get_dct_coeffs(dct_coeffs[obi], _dec, _pipe, _pli, frags + fragi);
-        mask |= 1 << obi;
-      }
+        switch (pixel_fmt){
+        case TH_PF_444:
+          if (mb_mode==OC_MODE_INTER_MV_FOUR)
+            oc_state_4mv_predict(&_dec->state,frag_buf_off,_pli,mask,mb_mvs);
+          else
+            oc_state_quad_predict(&_dec->state,frag_buf_off,_pli,mask,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
+          break;
 
-      switch (pixel_fmt){
-      case TH_PF_444:
-        if (mb_mode==OC_MODE_INTER_MV_FOUR)
-          oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask,mb_mvs);
-        else
-          oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
-        break;
-
-      case TH_PF_422:
-        /* TODO: code the reference frame index and the motion vector into a
-         * single word and then compare left and right copies -- if they're the
-         * same then do things quickly instead of like this:
-         */
-        if (mask&5){
-          if (mb_mode==OC_MODE_INTER_MV_FOUR){
-            cmv[0][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0],1,1);
-            cmv[0][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1],1,1);
-            cmv[2][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][0]+mb_mvs[3][0],1,1);
-            cmv[2][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][1]+mb_mvs[3][1],1,1);
-            oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&5,cmv);
+        case TH_PF_422:
+          /* TODO: code the reference frame index and the motion vector into a
+           * single word and then compare left and right copies -- if they're the
+           * same then do things quickly instead of like this:
+           */
+          if (mask&5){
+            if (mb_mode==OC_MODE_INTER_MV_FOUR){
+              cmv[0][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0],1,1);
+              cmv[0][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1],1,1);
+              cmv[2][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][0]+mb_mvs[3][0],1,1);
+              cmv[2][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][1]+mb_mvs[3][1],1,1);
+              oc_state_4mv_predict(&_dec->state,frag_buf_off,_pli,mask&5,cmv);
+            }
+            else
+              oc_state_quad_predict(&_dec->state,frag_buf_off,_pli,mask&5,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
           }
-          else
-            oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&5,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
-        }
 
-        mb_mode = _dec->state.frame_type==OC_INTRA_FRAME?OC_MODE_INTRA:_dec->state.raster_mb_modes[mbi+mbo[quadi>>2]+1];
-        dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
-        mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi>>2]+1];
+          mb_mode = _dec->state.raster_mb_modes[mbi+mbo[quadi]+1];
+          dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
+          mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi]+1];
 
-        if (mask&10){
-          if (mb_mode==OC_MODE_INTER_MV_FOUR){
-            cmv[1][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0],1,1);
-            cmv[1][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1],1,1);
-            cmv[3][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][0]+mb_mvs[3][0],1,1);
-            cmv[3][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][1]+mb_mvs[3][1],1,1);
-            oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&10,cmv);
+          if (mask&10){
+            if (mb_mode==OC_MODE_INTER_MV_FOUR){
+              cmv[1][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0],1,1);
+              cmv[1][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1],1,1);
+              cmv[3][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][0]+mb_mvs[3][0],1,1);
+              cmv[3][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[2][1]+mb_mvs[3][1],1,1);
+              oc_state_4mv_predict(&_dec->state,frag_buf_off,_pli,mask&10,cmv);
+            }
+            else
+              oc_state_quad_predict(&_dec->state,frag_buf_off,_pli,mask&10,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
           }
-          else
-            oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&10,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
-        }
-        break;
+          break;
 
-      case TH_PF_420:
-        /* TODO: code the reference frame index and the motion vector into a
-         * single word and then compare left and right copies -- if they're the
-         * same then do things quickly instead of like this:
-         */
-        if (mask&1)
-          if (mb_mode==OC_MODE_INTER_MV_FOUR){
-            cmv[0][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
-            cmv[0][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
-            oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&1,cmv);
-          }
-          else
-            oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&1,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
+        case TH_PF_420:
+          /* TODO: code the reference frame index and the motion vector into a
+           * single word and then compare left and right copies -- if they're the
+           * same then do things quickly instead of like this:
+           */
+          if (mask&1)
+            if (mb_mode==OC_MODE_INTER_MV_FOUR){
+              cmv[0][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
+              cmv[0][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
+              oc_state_4mv_predict(&_dec->state,frag_buf_off,_pli,mask&1,cmv);
+            }
+            else
+              oc_state_quad_predict(&_dec->state,frag_buf_off,_pli,mask&1,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
 
-        mb_mode = _dec->state.frame_type==OC_INTRA_FRAME?OC_MODE_INTRA:_dec->state.raster_mb_modes[mbi+mbo[quadi>>2]+1];
-        dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
-        mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi>>2]+1];
+          mb_mode = _dec->state.raster_mb_modes[mbi+mbo[quadi]+1];
+          dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
+          mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi]+1];
 
-        if (mask&2)
-          if (mb_mode==OC_MODE_INTER_MV_FOUR){
-            cmv[1][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
-            cmv[1][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
-            oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&2,cmv);
-          }
-          else
-            oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&2,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
+          if (mask&2)
+            if (mb_mode==OC_MODE_INTER_MV_FOUR){
+              cmv[1][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
+              cmv[1][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
+              oc_state_4mv_predict(&_dec->state,frag_buf_off,_pli,mask&2,cmv);
+            }
+            else
+              oc_state_quad_predict(&_dec->state,frag_buf_off,_pli,mask&2,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
 
-        mb_mode = _dec->state.frame_type==OC_INTRA_FRAME?OC_MODE_INTRA:_dec->state.raster_mb_modes[mbi+mbo[quadi>>2]+nhmbs];
-        dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
-        mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi>>2]+nhmbs];
+          mb_mode = _dec->state.raster_mb_modes[mbi+mbo[quadi]+nhmbs];
+          dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
+          mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi]+nhmbs];
 
-        /* TODO: code the reference frame index and the motion vector into a
-         * single word and then compare left and right copies -- if they're the
-         * same then do things quickly instead of like this:
-         */
-        if (mask&4)
-          if (mb_mode==OC_MODE_INTER_MV_FOUR){
-            cmv[2][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
-            cmv[2][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
-            oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&4,cmv);
-          }
-          else
-            oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&4,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
+          /* TODO: code the reference frame index and the motion vector into a
+           * single word and then compare left and right copies -- if they're the
+           * same then do things quickly instead of like this:
+           */
+          if (mask&4)
+            if (mb_mode==OC_MODE_INTER_MV_FOUR){
+              cmv[2][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
+              cmv[2][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
+              oc_state_4mv_predict(&_dec->state,frag_buf_off,_pli,mask&4,cmv);
+            }
+            else
+              oc_state_quad_predict(&_dec->state,frag_buf_off,_pli,mask&4,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
 
-        mb_mode = _dec->state.frame_type==OC_INTRA_FRAME?OC_MODE_INTRA:_dec->state.raster_mb_modes[mbi+mbo[quadi>>2]+nhmbs+1];
+          mb_mode = _dec->state.raster_mb_modes[mbi+mbo[quadi]+nhmbs+1];
+          dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
+          mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi]+nhmbs+1];
+
+          if (mask&8)
+            if (mb_mode==OC_MODE_INTER_MV_FOUR){
+              cmv[3][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
+              cmv[3][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
+              oc_state_4mv_predict(&_dec->state,frag_buf_off,_pli,mask&8,cmv);
+            }
+            else
+              oc_state_quad_predict(&_dec->state,frag_buf_off,_pli,mask&8,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
+          break;
+        }
+      }
+
+      for (bi = 0; bi < 4; bi++)
+      {
+        ptrdiff_t fragi;
+        int last_zzi;
+        if ((bmask & (1 << bi)) == 0) continue;
+        fragi = fragip[bi];
+        mb_mode=frags[fragi].mb_mode;
         dc_quant = _pipe->dequant[_pli][0][mb_mode!=OC_MODE_INTRA][0];
-        mb_mvs = _dec->state.raster_mb_mvs[mbi+mbo[quadi>>2]+nhmbs+1];
+        frag_buf_off = _dec->state.frag_buf_offs[fragi];
 
-        if (mask&8)
-          if (mb_mode==OC_MODE_INTER_MV_FOUR){
-            cmv[3][0]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][0]+mb_mvs[1][0]+mb_mvs[2][0]+mb_mvs[3][0],2,2);
-            cmv[3][1]=(signed char)OC_DIV_ROUND_POW2(mb_mvs[0][1]+mb_mvs[1][1]+mb_mvs[2][1]+mb_mvs[3][1],2,2);
-            oc_state_4mv_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&8,cmv);
-          }
-          else
-            oc_state_quad_recon(&_dec->state,frag_buf_off,_pli,dct_coeffs,last_zzi,dc_quant,mask&8,OC_FRAME_FOR_MODE(mb_mode),mb_mvs[0]);
-        break;
+        last_zzi = oc_dec_get_dct_coeffs(dct_coeffs[bi], _dec, _pipe, _pli, frags + fragi);
+        oc_state_frag_residual(&_dec->state,frag_buf_off,_pli,dct_coeffs[bi],last_zzi,dc_quant,mb_mode);
       }
     }
   }

Modified: branches/theora-gumboot/lib/internal.h
===================================================================
--- branches/theora-gumboot/lib/internal.h	2010-06-04 06:41:11 UTC (rev 17271)
+++ branches/theora-gumboot/lib/internal.h	2010-06-04 13:43:43 UTC (rev 17272)
@@ -285,12 +285,12 @@
   void (*idct8x8)(ogg_int16_t _y[64],int _last_zzi);
   void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi,
    int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
-  void (*state_quad_recon)(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
-   int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
-   ogg_uint16_t _dc_quant,int _mask,int _ref_frame,oc_mv _mv);
-  void (*state_4mv_recon)(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
-   int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
-   ogg_uint16_t _dc_quant,int _mask,oc_mv _mv[4]);
+  void (*state_quad_predict)(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+   int _pli,int _mask,int _ref_frame,oc_mv _mv);
+  void (*state_4mv_predict)(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+   int _pli,int _mask,oc_mv _mv[4]);
+  void (*state_frag_residual)(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+   int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant,int _mb_mode);
   void (*state_frag_copy_list)(const oc_theora_state *_state,
    const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
    int _dst_frame,int _src_frame,int _pli);
@@ -473,12 +473,12 @@
 void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64],int _last_zzi);
 void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi,
  int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
-void oc_state_quad_recon(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask,int _ref_frame,oc_mv _mv);
-void oc_state_4mv_recon(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask,oc_mv _mv[4]);
+void oc_state_quad_predict(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask,int _ref_frame,oc_mv _mv);
+void oc_state_4mv_predict(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask,oc_mv _mv[4]);
+void oc_state_frag_residual(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant,int _mb_mode);
 void oc_state_frag_copy_list(const oc_theora_state *_state,
  const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
  int _dst_frame,int _src_frame,int _pli);

Modified: branches/theora-gumboot/lib/state.c
===================================================================
--- branches/theora-gumboot/lib/state.c	2010-06-04 06:41:11 UTC (rev 17271)
+++ branches/theora-gumboot/lib/state.c	2010-06-04 13:43:43 UTC (rev 17272)
@@ -608,8 +608,9 @@
   _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c;
   _state->opt_vtable.idct8x8=oc_idct8x8_c;
   _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c;
-  //_state->opt_vtable.state_frag_recon=oc_state_quad_recon_c;
-  //_state->opt_vtable.state_frag_recon=oc_state_4mv_recon_c;
+  //_state->opt_vtable.state_quad_predict=oc_state_quad_predict_c;
+  //_state->opt_vtable.state_4mv_predict=oc_state_4mv_predict_c;
+  //_state->opt_vtable.state_frag_residual=oc_state_frag_residual_c;
   _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_c;
   _state->opt_vtable.state_loop_filter_frag_rows=
    oc_state_loop_filter_frag_rows_c;
@@ -885,20 +886,22 @@
    _last_zzi,_dc_quant);
 }
 
-void oc_state_quad_recon(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask,int _ref_frame,oc_mv _mv){
-  _state->opt_vtable.state_quad_recon(_state,_frag_buf_off,_pli,_dct_coeffs,
-   _last_zzi,_dc_quant,_mask,_ref_frame,_mv);
+void oc_state_quad_predict(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask,int _ref_frame,oc_mv _mv){
+  _state->opt_vtable.state_quad_predict(_state,_frag_buf_off,_pli,_mask,_ref_frame,_mv);
 }
 
-void oc_state_4mv_recon(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask,oc_mv _mvs[4]){
-  _state->opt_vtable.state_4mv_recon(_state,_frag_buf_off,_pli,_dct_coeffs,
-   _last_zzi,_dc_quant,_mask,_mvs);
+void oc_state_4mv_predict(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask,oc_mv _mvs[4]){
+  _state->opt_vtable.state_4mv_predict(_state,_frag_buf_off,_pli,_mask,_mvs);
 }
 
+void oc_state_frag_residual(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant,int _mb_mode){
+   _state->opt_vtable.state_frag_residual(_state,_frag_buf_off,_pli,_dct_coeffs,
+    _last_zzi,_dc_quant,_mb_mode);
+}
+
 void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
  int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){
   unsigned char *dst;

Modified: branches/theora-gumboot/lib/x86/mmxstate.c
===================================================================
--- branches/theora-gumboot/lib/x86/mmxstate.c	2010-06-04 06:41:11 UTC (rev 17271)
+++ branches/theora-gumboot/lib/x86/mmxstate.c	2010-06-04 13:43:43 UTC (rev 17272)
@@ -145,154 +145,145 @@
   }
 }
 
-void oc_state_quad_recon_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask,int _ref_frame, oc_mv _mv){
+static const ogg_int16_t zeroes[64]={0};
+
+void oc_state_quad_predict_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask,int _ref_frame, oc_mv _mv){
   unsigned char *dst;
   int            ystride;
   int            nhfrags;
-  int i;
 
   ystride=_state->ref_ystride[_pli];
   nhfrags=_state->fplanes[_pli].nhfrags;
   dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+_frag_buf_off;
 
-  for (i=0;i<4;i++){
-    if ((_mask & 1 << i) == 0)
-      continue;
-    /*Apply the inverse transform.*/
-    /*Special case only having a DC component.*/
-    if(_last_zzi[i]<2){
-      /*Note that this value must be unsigned, to keep the __asm__ block from
-         sign-extending it when it puts it in a register.*/
-      ogg_uint16_t p;
-      /*We round this dequant product (and not any of the others) because there's
-         no iDCT rounding.*/
-      p=(ogg_int16_t)(_dct_coeffs[i][0]*(ogg_int32_t)_dc_quant+15>>5);
-      /*Fill _dct_coeffs[i] with p.*/
-      __asm__ __volatile__(
-        /*mm0=0000 0000 0000 AAAA*/
-        "movd %[p],%%mm0\n\t"
-        /*mm0=0000 0000 AAAA AAAA*/
-        "punpcklwd %%mm0,%%mm0\n\t"
-        /*mm0=AAAA AAAA AAAA AAAA*/
-        "punpckldq %%mm0,%%mm0\n\t"
-        "movq %%mm0,(%[y])\n\t"
-        "movq %%mm0,8(%[y])\n\t"
-        "movq %%mm0,16(%[y])\n\t"
-        "movq %%mm0,24(%[y])\n\t"
-        "movq %%mm0,32(%[y])\n\t"
-        "movq %%mm0,40(%[y])\n\t"
-        "movq %%mm0,48(%[y])\n\t"
-        "movq %%mm0,56(%[y])\n\t"
-        "movq %%mm0,64(%[y])\n\t"
-        "movq %%mm0,72(%[y])\n\t"
-        "movq %%mm0,80(%[y])\n\t"
-        "movq %%mm0,88(%[y])\n\t"
-        "movq %%mm0,96(%[y])\n\t"
-        "movq %%mm0,104(%[y])\n\t"
-        "movq %%mm0,112(%[y])\n\t"
-        "movq %%mm0,120(%[y])\n\t"
-        :
-        :[y]"r"(_dct_coeffs[i]),[p]"r"((unsigned)p)
-        :"memory"
-      );
-    }
-    else{
-      /*Dequantize the DC coefficient.*/
-      _dct_coeffs[i][0]=(ogg_int16_t)(_dct_coeffs[i][0]*(int)_dc_quant);
-      oc_idct8x8(_state,_dct_coeffs[i],_last_zzi[i]);
-    }
-  }
-
   /*Fill in the target buffer.*/
-  if(_ref_frame==OC_FRAME_SELF) {
-    if (_mask & 1) oc_frag_recon_intra_mmx(dst+0,ystride,_dct_coeffs[0]);
-    if (_mask & 2) oc_frag_recon_intra_mmx(dst+8,ystride,_dct_coeffs[1]);
-    dst += 8 * ystride;
-    if (_mask & 4) oc_frag_recon_intra_mmx(dst+0,ystride,_dct_coeffs[2]);
-    if (_mask & 8) oc_frag_recon_intra_mmx(dst+8,ystride,_dct_coeffs[3]);
-  }
-  else{
+  if(_ref_frame!=OC_FRAME_SELF){
     const unsigned char *ref;
     int                  mvoffsets[2];
     ref=
      _state->ref_frame_data[_state->ref_frame_idx[_ref_frame]]
      +_frag_buf_off;
     if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mv[0],_mv[1])>1){
-      if ((_mask&3)==3){
+      switch(_mask&3){
+      case 3:
         oc_int_fragx2_copy2_sse2(dst,ystride,ref+mvoffsets[0],ref+mvoffsets[1],ystride);
-        if (_dct_coeffs[0][0]||_last_zzi[0]>1)
-          oc_frag_recon_inter_mmx(dst+0,dst+0,ystride,_dct_coeffs[0]);
-        if (_dct_coeffs[1][0]||_last_zzi[1]>1)
-          oc_frag_recon_inter_mmx(dst+8,dst+8,ystride,_dct_coeffs[1]);
+        break;
+      case 1:
+        oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],
+         ystride,zeroes);
+        break;
+      case 2:
+        oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],
+         ystride,zeroes);
       }
-      else{
-        if (_mask & 1)
-            oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],
-             ystride,_dct_coeffs[0]);
-        if (_mask & 2)
-            oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],
-             ystride,_dct_coeffs[1]);
-      }
       dst+=ystride*8;
       ref+=ystride*8;
-      if ((_mask&12)==12){
+      switch(_mask>>2){
+      case 3:
         oc_int_fragx2_copy2_sse2(dst,ystride,ref+mvoffsets[0],ref+mvoffsets[1],ystride);
-        if (_dct_coeffs[2][0]||_last_zzi[2]>1)
-          oc_frag_recon_inter_mmx(dst+0,dst+0,ystride,_dct_coeffs[2]);
-        if (_dct_coeffs[3][0]||_last_zzi[3]>1)
-          oc_frag_recon_inter_mmx(dst+8,dst+8,ystride,_dct_coeffs[3]);
+        break;
+      case 1:
+        oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],
+         ystride,zeroes);
+        break;
+      case 2:
+        oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],
+         ystride,zeroes);
       }
-      else{
-        if (_mask & 4)
-            oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],
-             ystride,_dct_coeffs[2]);
-        if (_mask & 8)
-            oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],
-             ystride,_dct_coeffs[3]);
-      }
     }
     else{
-      if (_mask & 1)
-        oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,_dct_coeffs[0]);
-      if (_mask & 2)
-        oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,_dct_coeffs[1]);
+      switch(_mask&3){
+      case 3:
+        oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,zeroes);
+        oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,zeroes);
+        break;
+      case 1:
+        oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,zeroes);
+        break;
+      case 2:
+        oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,zeroes);
+        break;
+      }
       dst+=ystride*8;
       ref+=ystride*8;
-      if (_mask & 4)
-        oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,_dct_coeffs[2]);
-      if (_mask & 8)
-        oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,_dct_coeffs[3]);
+      switch(_mask>>2){
+      case 3:
+        oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,zeroes);
+        oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,zeroes);
+        break;
+      case 1:
+        oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,zeroes);
+        break;
+      case 2:
+        oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,zeroes);
+        break;
+      };
     }
   }
 }
 
-void oc_state_4mv_recon_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask, oc_mv _mvs[4]){
+void oc_state_4mv_predict_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask, oc_mv _mvs[4]){
   unsigned char       *dst;
   int                  ystride;
   const unsigned char *ref;
   int                  mvoffsets[2];
-  int i;
 
   ystride=_state->ref_ystride[_pli];
   dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+_frag_buf_off;
 
-  for (i=0;i<4;i++){
-    if ((_mask & 1 << i) == 0)
-      continue;
-    /*Apply the inverse transform.*/
-    /*Special case only having a DC component.*/
-    if(_last_zzi[i]<2){
-      /*Note that this value must be unsigned, to keep the __asm__ block from
-         sign-extending it when it puts it in a register.*/
+  /*Fill in the target buffer.*/
+  ref=
+   _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_PREV]]
+   +_frag_buf_off;
+  if (_mask & 1){
+    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mvs[0][0],_mvs[0][1])>1){
+      oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],
+       ystride,zeroes);
+    }
+    else oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,zeroes);
+  }
+  if (_mask & 2){
+    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mvs[1][0],_mvs[1][1])>1){
+      oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],
+       ystride,zeroes);
+    }
+    else oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,zeroes);
+  }
+  dst+=ystride*8;
+  ref+=ystride*8;
+  if (_mask & 4){
+    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mvs[2][0],_mvs[2][1])>1){
+      oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],
+       ystride,zeroes);
+    }
+    else oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,zeroes);
+  }
+  if (_mask & 8){
+    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mvs[3][0],_mvs[3][1])>1){
+      oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],
+       ystride,zeroes);
+    }
+    else oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,zeroes);
+  }
+}
+
+void oc_state_frag_residual_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant,int _mb_mode){
+  unsigned char *dst;
+  int            ystride;
+  /*Apply the inverse transform.*/
+  /*Special case only having a DC component.*/
+  if(_last_zzi<2){
+    /*Note that this value must be unsigned, to keep the __asm__ block from
+       sign-extending it when it puts it in a register.*/
+    if (_dct_coeffs[0]){
       ogg_uint16_t p;
       /*We round this dequant product (and not any of the others) because there's
          no iDCT rounding.*/
-      p=(ogg_int16_t)(_dct_coeffs[i][0]*(ogg_int32_t)_dc_quant+15>>5);
-      /*Fill _dct_coeffs[i] with p.*/
+      p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
+      /*Fill _dct_coeffs with p.*/
       __asm__ __volatile__(
         /*mm0=0000 0000 0000 AAAA*/
         "movd %[p],%%mm0\n\t"
@@ -317,51 +308,21 @@
         "movq %%mm0,112(%[y])\n\t"
         "movq %%mm0,120(%[y])\n\t"
         :
-        :[y]"r"(_dct_coeffs[i]),[p]"r"((unsigned)p)
+        :[y]"r"(_dct_coeffs),[p]"r"((unsigned)p)
         :"memory"
       );
     }
-    else{
-      /*Dequantize the DC coefficient.*/
-      _dct_coeffs[i][0]=(ogg_int16_t)(_dct_coeffs[i][0]*(int)_dc_quant);
-      oc_idct8x8(_state,_dct_coeffs[i],_last_zzi[i]);
-    }
   }
-
-  /*Fill in the target buffer.*/
-  ref=
-   _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_PREV]]
-   +_frag_buf_off;
-  if (_mask & 1){
-    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mvs[0][0],_mvs[0][1])>1){
-      oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],
-       ystride,_dct_coeffs[0]);
-    }
-    else oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,_dct_coeffs[0]);
+  else{
+    /*Dequantize the DC coefficient.*/
+    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
+    oc_idct8x8(_state,_dct_coeffs,_last_zzi);
   }
-  if (_mask & 2){
-    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mvs[1][0],_mvs[1][1])>1){
-      oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],
-       ystride,_dct_coeffs[1]);
-    }
-    else oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,_dct_coeffs[1]);
-  }
-  dst+=ystride*8;
-  ref+=ystride*8;
-  if (_mask & 4){
-    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mvs[2][0],_mvs[2][1])>1){
-      oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],
-       ystride,_dct_coeffs[2]);
-    }
-    else oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,_dct_coeffs[2]);
-  }
-  if (_mask & 8){
-    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,_mvs[3][0],_mvs[3][1])>1){
-      oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],
-       ystride,_dct_coeffs[3]);
-    }
-    else oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,_dct_coeffs[3]);
-  }
+  /*Fill in the target buffer.*/
+  ystride=_state->ref_ystride[_pli];
+  dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+_frag_buf_off;
+  if(_mb_mode==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs);
+  else if(_dct_coeffs[0]||_last_zzi>1)oc_frag_recon_inter_mmx(dst,dst,ystride,_dct_coeffs);
 }
 
 /*We copy these entire function to inline the actual MMX routines so that we

Modified: branches/theora-gumboot/lib/x86/x86int.h
===================================================================
--- branches/theora-gumboot/lib/x86/x86int.h	2010-06-04 06:41:11 UTC (rev 17271)
+++ branches/theora-gumboot/lib/x86/x86int.h	2010-06-04 13:43:43 UTC (rev 17272)
@@ -62,12 +62,12 @@
 void oc_idct8x8_sse2(ogg_int16_t _y[64],int _last_zzi);
 void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
  int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
-void oc_state_quad_recon_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask,int _ref_frame,oc_mv _mv);
-void oc_state_4mv_recon_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
- int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
- ogg_uint16_t _dc_quant,int _mask,oc_mv _mvs[4]);
+void oc_state_quad_predict_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask,int _ref_frame,oc_mv _mv);
+void oc_state_4mv_predict_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,int _mask,oc_mv _mvs[4]);
+void oc_state_frag_residual_mmx(const oc_theora_state *_state,ptrdiff_t _frag_buf_off,
+ int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant,int _mb_mode);
 void oc_state_frag_copy_list_mmx(const oc_theora_state *_state,
  const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
  int _dst_frame,int _src_frame,int _pli);

Modified: branches/theora-gumboot/lib/x86/x86state.c
===================================================================
--- branches/theora-gumboot/lib/x86/x86state.c	2010-06-04 06:41:11 UTC (rev 17271)
+++ branches/theora-gumboot/lib/x86/x86state.c	2010-06-04 13:43:43 UTC (rev 17272)
@@ -72,8 +72,9 @@
     _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx;
     _state->opt_vtable.idct8x8=oc_idct8x8_mmx;
     _state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx;
-    _state->opt_vtable.state_quad_recon=oc_state_quad_recon_mmx;
-    _state->opt_vtable.state_4mv_recon=oc_state_4mv_recon_mmx;
+    _state->opt_vtable.state_quad_predict=oc_state_quad_predict_mmx;
+    _state->opt_vtable.state_4mv_predict=oc_state_4mv_predict_mmx;
+    _state->opt_vtable.state_frag_residual=oc_state_frag_residual_mmx;
     _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_mmx;
     _state->opt_vtable.state_loop_filter_frag_rows=
      oc_state_loop_filter_frag_rows_mmx;



More information about the commits mailing list