[xiph-commits] r17241 - in branches/theora-gumboot/lib: . x86

gumboot at svn.xiph.org gumboot at svn.xiph.org
Sat May 22 15:50:43 PDT 2010


Author: gumboot
Date: 2010-05-22 15:50:43 -0700 (Sat, 22 May 2010)
New Revision: 17241

Modified:
   branches/theora-gumboot/lib/decode.c
   branches/theora-gumboot/lib/internal.h
   branches/theora-gumboot/lib/state.c
   branches/theora-gumboot/lib/x86/mmxstate.c
   branches/theora-gumboot/lib/x86/x86int.h
   branches/theora-gumboot/lib/x86/x86state.c
Log:
Add oc_state_mb_recon() and call it sometimes.  It doesn't really do a macroblock, though.  Just a quad of blocks.

Now to think about what information we really need, and where to find it, and whether or not it should be put somewhere more convenient.

Also need to accelerate some of the wide operations that have been opened up, and to recognise cases (most obviously 444) where chroma can do quads too.


Modified: branches/theora-gumboot/lib/decode.c
===================================================================
--- branches/theora-gumboot/lib/decode.c	2010-05-22 10:39:47 UTC (rev 17240)
+++ branches/theora-gumboot/lib/decode.c	2010-05-22 22:50:43 UTC (rev 17241)
@@ -1530,7 +1530,7 @@
 #include <stdio.h>
 #include <assert.h>
 static int oc_dec_get_dct_coeffs(ogg_int16_t dct_coeffs[65],
- oc_dec_ctx *_dec,oc_dec_pipeline_state *_pipe,int _pli, const oc_fragment *fragp){
+ oc_dec_ctx *_dec,oc_dec_pipeline_state *_pipe,int _pli, const oc_fragment *_fragp){
   unsigned char       *dct_tokens;
   const unsigned char *dct_fzig_zag;
   ptrdiff_t           *ti;
@@ -1544,11 +1544,11 @@
   ti=_pipe->ti[_pli];
   eob_runs=_pipe->eob_runs[_pli];
 
-  assert(fragp == _dec->state.frags + *_pipe->coded_fragis[_pli]++); /*XXX:DEBUG*/
+  assert(_fragp == _dec->state.frags + *_pipe->coded_fragis[_pli]++); /*XXX:DEBUG*/
 
   for(zzi=0;zzi<64;zzi++)dct_coeffs[zzi]=0;
-  qti=fragp->mb_mode!=OC_MODE_INTRA;
-  ac_quant=_pipe->dequant[_pli][fragp->qii][qti];
+  qti=_fragp->mb_mode!=OC_MODE_INTRA;
+  ac_quant=_pipe->dequant[_pli][_fragp->qii][qti];
   /*Decode the AC coefficients.*/
   for(zzi=0;zzi<64;){
     int token;
@@ -1590,7 +1590,7 @@
   /*TODO: zzi should be exactly 64 here.
     If it's not, we should report some kind of warning.*/
   zzi=OC_MINI(zzi,64);
-  dct_coeffs[0]=(ogg_int16_t)fragp->dc;
+  dct_coeffs[0]=(ogg_int16_t)_fragp->dc;
   /*last_zzi is always initialized.
     If your compiler thinks otherwise, it is dumb.*/
   return last_zzi;
@@ -1639,18 +1639,46 @@
       if ((bmask & 15) == 0)
         continue;
 
-      for (bi = 0; bi < 4; bi++)
+      if (_pli == 0) /* or if the mode and subblocks and quantisation are compatible */
       {
-        ptrdiff_t fragi;
-        int last_zzi;
-        if ((bmask & (1 << bi)) == 0) continue;
-        fragi = fragip[bi];
-        assert(fragi >= 0 && frags[fragi].coded);
+        static const char rasterise[16] =
+        {
+          0, 1, 3, 2,
+          0, 2, 3, 1,
+          0, 2, 3, 1,
+          3, 2, 0, 1,
+        };
+        int last_zzi[4];
+        ogg_uint16_t dc_quant[4];
+        int mask = 0;
+        for (bi = 0; bi < 4; bi++)
+        {
+          ptrdiff_t fragi;
+          int obi;
+          if ((bmask & (1 << bi)) == 0) continue;
+          fragi = fragip[bi];
+          obi = rasterise[quadi | bi];
+          assert(fragi >= 0 && frags[fragi].coded);
 
-        last_zzi = oc_dec_get_dct_coeffs(dct_coeffs[bi], _dec, _pipe, _pli, frags + fragi);
-        ogg_uint16_t dc_quant = _pipe->dequant[_pli][0][frags[fragi].mb_mode!=OC_MODE_INTRA][0];
-        oc_state_frag_recon(&_dec->state,fragi,_pli, dct_coeffs[bi],last_zzi,dc_quant);
+          last_zzi[obi] = oc_dec_get_dct_coeffs(dct_coeffs[obi], _dec, _pipe, _pli, frags + fragi);
+          dc_quant[obi] = _pipe->dequant[_pli][0][frags[fragi].mb_mode!=OC_MODE_INTRA][0];
+          mask |= 1 << obi;
+        }
+        oc_state_mb_recon(&_dec->state,fragip[quadi==12?2:0],_pli,dct_coeffs,last_zzi,dc_quant,mask);
       }
+      else
+        for (bi = 0; bi < 4; bi++)
+        {
+          ptrdiff_t fragi;
+          int last_zzi;
+          if ((bmask & (1 << bi)) == 0) continue;
+          fragi = fragip[bi];
+          assert(fragi >= 0 && frags[fragi].coded);
+
+          last_zzi = oc_dec_get_dct_coeffs(dct_coeffs[0], _dec, _pipe, _pli, frags + fragi);
+          ogg_uint16_t dc_quant = _pipe->dequant[_pli][0][frags[fragi].mb_mode!=OC_MODE_INTRA][0];
+          oc_state_frag_recon(&_dec->state,fragi,_pli, dct_coeffs[0],last_zzi,dc_quant);
+        }
     }
   }
 

Modified: branches/theora-gumboot/lib/internal.h
===================================================================
--- branches/theora-gumboot/lib/internal.h	2010-05-22 10:39:47 UTC (rev 17240)
+++ branches/theora-gumboot/lib/internal.h	2010-05-22 22:50:43 UTC (rev 17241)
@@ -285,6 +285,9 @@
   void (*idct8x8)(ogg_int16_t _y[64],int _last_zzi);
   void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi,
    int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
+  void (*state_mb_recon)(const oc_theora_state *_state,ptrdiff_t _fragi,
+   int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
+   ogg_uint16_t _dc_quant[4],int _mask);
   void (*state_frag_copy_list)(const oc_theora_state *_state,
    const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
    int _dst_frame,int _src_frame,int _pli);
@@ -463,6 +466,9 @@
 void oc_idct8x8(const oc_theora_state *_state,ogg_int16_t _y[64],int _last_zzi);
 void oc_state_frag_recon(const oc_theora_state *_state,ptrdiff_t _fragi,
  int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
+void oc_state_mb_recon(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
+ ogg_uint16_t _dc_quant[4],int _mask);
 void oc_state_frag_copy_list(const oc_theora_state *_state,
  const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
  int _dst_frame,int _src_frame,int _pli);

Modified: branches/theora-gumboot/lib/state.c
===================================================================
--- branches/theora-gumboot/lib/state.c	2010-05-22 10:39:47 UTC (rev 17240)
+++ branches/theora-gumboot/lib/state.c	2010-05-22 22:50:43 UTC (rev 17241)
@@ -601,6 +601,7 @@
   _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c;
   _state->opt_vtable.idct8x8=oc_idct8x8_c;
   _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c;
+  //_state->opt_vtable.state_frag_recon=oc_state_mb_recon_c;
   _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_c;
   _state->opt_vtable.state_loop_filter_frag_rows=
    oc_state_loop_filter_frag_rows_c;
@@ -876,6 +877,13 @@
    _last_zzi,_dc_quant);
 }
 
+void oc_state_mb_recon(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
+ ogg_uint16_t _dc_quant[4],int _mask){
+  _state->opt_vtable.state_mb_recon(_state,_fragi,_pli,_dct_coeffs,
+   _last_zzi,_dc_quant,_mask);
+}
+
 void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
  int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant){
   unsigned char *dst;

Modified: branches/theora-gumboot/lib/x86/mmxstate.c
===================================================================
--- branches/theora-gumboot/lib/x86/mmxstate.c	2010-05-22 10:39:47 UTC (rev 17240)
+++ branches/theora-gumboot/lib/x86/mmxstate.c	2010-05-22 22:50:43 UTC (rev 17241)
@@ -94,6 +94,162 @@
   }
 }
 
+void oc_state_mb_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
+ ogg_uint16_t _dc_quant[4],int _mask){
+  unsigned char *dst;
+  ptrdiff_t      frag_buf_off;
+  int            ystride;
+  int            nhfrags;
+  int            mb_mode;
+  int            good_fragi;
+  int i;
+
+  frag_buf_off=_state->frag_buf_offs[_fragi];
+  ystride=_state->ref_ystride[_pli];
+  nhfrags=_state->fplanes[_pli].nhfrags;
+  dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
+  good_fragi=_fragi;
+  if((_mask&3)==0) good_fragi+=nhfrags+((_mask&4)==0);
+  else             good_fragi+=((_mask&1)==0);
+  mb_mode=_state->frags[good_fragi].mb_mode;
+
+  for (i=0;i<4;i++){
+    if ((_mask & 1 << i) == 0)
+      continue;
+    /*Apply the inverse transform.*/
+    /*Special case only having a DC component.*/
+    if(_last_zzi[i]<2){
+      /*Note that this value must be unsigned, to keep the __asm__ block from
+         sign-extending it when it puts it in a register.*/
+      ogg_uint16_t p;
+      /*We round this dequant product (and not any of the others) because there's
+         no iDCT rounding.*/
+      p=(ogg_int16_t)(_dct_coeffs[i][0]*(ogg_int32_t)_dc_quant[i]+15>>5);
+      /*Fill _dct_coeffs[i] with p.*/
+      __asm__ __volatile__(
+        /*mm0=0000 0000 0000 AAAA*/
+        "movd %[p],%%mm0\n\t"
+        /*mm0=0000 0000 AAAA AAAA*/
+        "punpcklwd %%mm0,%%mm0\n\t"
+        /*mm0=AAAA AAAA AAAA AAAA*/
+        "punpckldq %%mm0,%%mm0\n\t"
+        "movq %%mm0,(%[y])\n\t"
+        "movq %%mm0,8(%[y])\n\t"
+        "movq %%mm0,16(%[y])\n\t"
+        "movq %%mm0,24(%[y])\n\t"
+        "movq %%mm0,32(%[y])\n\t"
+        "movq %%mm0,40(%[y])\n\t"
+        "movq %%mm0,48(%[y])\n\t"
+        "movq %%mm0,56(%[y])\n\t"
+        "movq %%mm0,64(%[y])\n\t"
+        "movq %%mm0,72(%[y])\n\t"
+        "movq %%mm0,80(%[y])\n\t"
+        "movq %%mm0,88(%[y])\n\t"
+        "movq %%mm0,96(%[y])\n\t"
+        "movq %%mm0,104(%[y])\n\t"
+        "movq %%mm0,112(%[y])\n\t"
+        "movq %%mm0,120(%[y])\n\t"
+        :
+        :[y]"r"(_dct_coeffs[i]),[p]"r"((unsigned)p)
+        :"memory"
+      );
+    }
+    else{
+      /*Dequantize the DC coefficient.*/
+      _dct_coeffs[i][0]=(ogg_int16_t)(_dct_coeffs[i][0]*(int)_dc_quant[i]);
+      oc_idct8x8_mmx(_dct_coeffs[i],_last_zzi[i]);
+    }
+  }
+
+  /*Fill in the target buffer.*/
+  if(mb_mode==OC_MODE_INTRA) {
+    if (_mask & 1) oc_frag_recon_intra_mmx(dst+0,ystride,_dct_coeffs[0]);
+    if (_mask & 2) oc_frag_recon_intra_mmx(dst+8,ystride,_dct_coeffs[1]);
+    dst += 8 * ystride;
+    if (_mask & 4) oc_frag_recon_intra_mmx(dst+0,ystride,_dct_coeffs[2]);
+    if (_mask & 8) oc_frag_recon_intra_mmx(dst+8,ystride,_dct_coeffs[3]);
+  }
+  else if(mb_mode==OC_MODE_INTER_MV_FOUR){
+    const unsigned char *ref;
+    int                  mvoffsets[2];
+    ref=
+     _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_PREV]]
+     +frag_buf_off;
+    if (_mask & 1) {
+      if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
+       _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){
+        oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],ystride,
+         _dct_coeffs[0]);
+      }
+      else oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,_dct_coeffs[0]);
+    }
+    if (_mask & 2) {
+      if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
+       _state->frag_mvs[_fragi+1][0],_state->frag_mvs[_fragi+1][1])>1){
+        oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],ystride,
+         _dct_coeffs[1]);
+      }
+      else oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,_dct_coeffs[1]);
+    }
+    _fragi+=nhfrags;
+    dst+=ystride*8;
+    ref+=ystride*8;
+    if (_mask & 4) {
+      if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
+       _state->frag_mvs[_fragi][0],_state->frag_mvs[_fragi][1])>1){
+        oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],ystride,
+         _dct_coeffs[2]);
+      }
+      else oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,_dct_coeffs[2]);
+    }
+    if (_mask & 8) {
+      if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
+       _state->frag_mvs[_fragi+1][0],_state->frag_mvs[_fragi+1][1])>1){
+        oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],ystride,
+         _dct_coeffs[3]);
+      }
+      else oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,_dct_coeffs[3]);
+    }
+  }
+  else{
+    const unsigned char *ref;
+    int                  mvoffsets[2];
+    ref=
+     _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
+     +frag_buf_off;
+    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
+         _state->frag_mvs[good_fragi][0],_state->frag_mvs[good_fragi][1])>1){
+      if (_mask & 1)
+          oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],ystride,
+           _dct_coeffs[0]);
+      if (_mask & 2)
+          oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],ystride,
+           _dct_coeffs[1]);
+      dst+=ystride*8;
+      ref+=ystride*8;
+      if (_mask & 4)
+          oc_frag_recon_inter2_mmx(dst+0,ref+0+mvoffsets[0],ref+0+mvoffsets[1],ystride,
+           _dct_coeffs[2]);
+      if (_mask & 8)
+          oc_frag_recon_inter2_mmx(dst+8,ref+8+mvoffsets[0],ref+8+mvoffsets[1],ystride,
+           _dct_coeffs[3]);
+    }
+    else{
+      if (_mask & 1)
+        oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,_dct_coeffs[0]);
+      if (_mask & 2)
+        oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,_dct_coeffs[1]);
+      dst+=ystride*8;
+      ref+=ystride*8;
+      if (_mask & 4)
+        oc_frag_recon_inter_mmx(dst+0,ref+0+mvoffsets[0],ystride,_dct_coeffs[2]);
+      if (_mask & 8)
+        oc_frag_recon_inter_mmx(dst+8,ref+8+mvoffsets[0],ystride,_dct_coeffs[3]);
+    }
+  }
+}
+
 /*We copy these entire function to inline the actual MMX routines so that we
    use only a single indirect call.*/
 

Modified: branches/theora-gumboot/lib/x86/x86int.h
===================================================================
--- branches/theora-gumboot/lib/x86/x86int.h	2010-05-22 10:39:47 UTC (rev 17240)
+++ branches/theora-gumboot/lib/x86/x86int.h	2010-05-22 22:50:43 UTC (rev 17241)
@@ -32,6 +32,9 @@
 void oc_idct8x8_mmx(ogg_int16_t _y[64],int _last_zzi);
 void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
  int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,ogg_uint16_t _dc_quant);
+void oc_state_mb_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[][64+8],int _last_zzi[4],
+ ogg_uint16_t _dc_quant[4],int _mask);
 void oc_state_frag_copy_list_mmx(const oc_theora_state *_state,
  const ptrdiff_t *_fragis,ptrdiff_t _nfragis,
  int _dst_frame,int _src_frame,int _pli);

Modified: branches/theora-gumboot/lib/x86/x86state.c
===================================================================
--- branches/theora-gumboot/lib/x86/x86state.c	2010-05-22 10:39:47 UTC (rev 17240)
+++ branches/theora-gumboot/lib/x86/x86state.c	2010-05-22 22:50:43 UTC (rev 17241)
@@ -51,6 +51,7 @@
     _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx;
     _state->opt_vtable.idct8x8=oc_idct8x8_mmx;
     _state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx;
+    _state->opt_vtable.state_mb_recon=oc_state_mb_recon_mmx;
     _state->opt_vtable.state_frag_copy_list=oc_state_frag_copy_list_mmx;
     _state->opt_vtable.state_loop_filter_frag_rows=
      oc_state_loop_filter_frag_rows_mmx;



More information about the commits mailing list