[xiph-commits] r9864 - in experimental/derf/theora-exp/lib: . x86

tterribe at svn.xiph.org tterribe at svn.xiph.org
Tue Aug 30 11:15:25 PDT 2005


Author: tterribe
Date: 2005-08-30 11:15:23 -0700 (Tue, 30 Aug 2005)
New Revision: 9864

Modified:
   experimental/derf/theora-exp/lib/decode.c
   experimental/derf/theora-exp/lib/internal.h
   experimental/derf/theora-exp/lib/state.c
   experimental/derf/theora-exp/lib/x86/mmxstate.c
   experimental/derf/theora-exp/lib/x86/x86int.h
Log:
Store dct_coeffs in zig-zag order, and undo this right before the actual iDCT.
Patch from Rudolf Marek.
This gives about a 1% speed-up.


Modified: experimental/derf/theora-exp/lib/decode.c
===================================================================
--- experimental/derf/theora-exp/lib/decode.c	2005-08-30 18:12:46 UTC (rev 9863)
+++ experimental/derf/theora-exp/lib/decode.c	2005-08-30 18:15:23 UTC (rev 9864)
@@ -1077,59 +1077,57 @@
   It CANNOT be called for any of the EOB tokens.
   _token:      The token value to expand.
   _extra_bits: The extra bits associated with the token.
-  _dct_coeffs: The current list of coefficients, NOT in zig-zag order, but in
-                the natural order.
+  _dct_coeffs: The current list of coefficients, in zig-zag order.
   _zzi:        A pointer to the zig-zag index of the next coefficient to write
                 to.
                This is updated before the function returns.*/
 typedef void (*oc_token_expand_func)(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[64],int *_zzi);
+ ogg_int16_t _dct_coeffs[128],int *_zzi);
 
 /*Expands a zero run token.*/
 void oc_token_expand_zrl(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[64],int *_zzi){
+ ogg_int16_t _dct_coeffs[128],int *_zzi){
   int zzi;
   zzi=*_zzi;
-  do _dct_coeffs[OC_FZIG_ZAG[zzi++]]=0;
+  do _dct_coeffs[zzi++]=0;
   while(_extra_bits-->0);
   *_zzi=zzi;
 }
 
 /*Expands a constant, single-value token.*/
 void oc_token_expand_const(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[64],int *_zzi){
-  _dct_coeffs[OC_FZIG_ZAG[(*_zzi)++]]=
-   (ogg_int16_t)oc_token_dec1val_const(_token);
+ ogg_int16_t _dct_coeffs[128],int *_zzi){
+  _dct_coeffs[(*_zzi)++]=(ogg_int16_t)oc_token_dec1val_const(_token);
 }
 
 /*Expands category 2 single-valued tokens.*/
 void oc_token_expand_cat2(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[64],int *_zzi){
-  _dct_coeffs[OC_FZIG_ZAG[(*_zzi)++]]=
+ ogg_int16_t _dct_coeffs[128],int *_zzi){
+  _dct_coeffs[(*_zzi)++]=
    (ogg_int16_t)oc_token_dec1val_cat2(_token,_extra_bits);
 }
 
 /*Expands category 3 through 8 single-valued tokens.*/
 void oc_token_expand_cati(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[64],int *_zzi){
-  _dct_coeffs[OC_FZIG_ZAG[(*_zzi)++]]=
+ ogg_int16_t _dct_coeffs[128],int *_zzi){
+  _dct_coeffs[(*_zzi)++]=
    (ogg_int16_t)oc_token_dec1val_cati(_token,_extra_bits);
 }
 
 /*Expands a category 1a zero run/value combo token.*/
 void oc_token_expand_run_cat1a(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[64],int *_zzi){
+ ogg_int16_t _dct_coeffs[128],int *_zzi){
   int zzi;
   int rl;
   zzi=*_zzi;
-  for(rl=_token-OC_DCT_RUN_CAT1A+1;rl-->0;)_dct_coeffs[OC_FZIG_ZAG[zzi++]]=0;
-  _dct_coeffs[OC_FZIG_ZAG[zzi++]]=(ogg_int16_t)(1-(_extra_bits<<1));
+  for(rl=_token-OC_DCT_RUN_CAT1A+1;rl-->0;)_dct_coeffs[zzi++]=0;
+  _dct_coeffs[zzi++]=(ogg_int16_t)(1-(_extra_bits<<1));
   *_zzi=zzi;
 }
 
 /*Expands all other zero run/value combo tokens.*/
 void oc_token_expand_run(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[64],int *_zzi){
+ ogg_int16_t _dct_coeffs[128],int *_zzi){
   static const int NZEROS_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
     6,10,1,2
   };
@@ -1154,11 +1152,11 @@
   _token-=OC_DCT_RUN_CAT1B;
   rl=(_extra_bits&NZEROS_MASK[_token])+NZEROS_ADJUST[_token];
   zzi=*_zzi;
-  while(rl-->0)_dct_coeffs[OC_FZIG_ZAG[zzi++]]=0;
+  while(rl-->0)_dct_coeffs[zzi++]=0;
   valsigned[0]=VALUE_ADJUST[_token]+
    (_extra_bits>>VALUE_SHIFT[_token]&VALUE_MASK[_token]);
   valsigned[1]=-valsigned[0];
-  _dct_coeffs[OC_FZIG_ZAG[zzi++]]=(ogg_int16_t)valsigned[
+  _dct_coeffs[zzi++]=(ogg_int16_t)valsigned[
    _extra_bits>>SIGN_SHIFT[_token]];
   *_zzi=zzi;
 }
@@ -1445,7 +1443,7 @@
        run cannot cause a buffer overflow.
       The inverse zig-zag mapping sends all out of range indices to the last
        entry of this array, where they are ignored.*/
-    ogg_int16_t    dct_coeffs[65];
+    ogg_int16_t    dct_coeffs[128];
     int            fragi;
     int            zzi;
     int            last_zzi;

Modified: experimental/derf/theora-exp/lib/internal.h
===================================================================
--- experimental/derf/theora-exp/lib/internal.h	2005-08-30 18:12:46 UTC (rev 9863)
+++ experimental/derf/theora-exp/lib/internal.h	2005-08-30 18:15:23 UTC (rev 9864)
@@ -239,7 +239,7 @@
   void (*state_frag_copy)(const oc_theora_state *_state,
    const int *_fragis,int _nfragis,int _dst_frame,int _src_frame,int _pli);
   void (*state_frag_recon)(oc_theora_state *_state,const oc_fragment *_frag,
-   int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
+   int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
    ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
   void (*restore_fpu)(void);
 }oc_base_opt_vtable;
@@ -409,7 +409,7 @@
 void oc_state_frag_copy(const oc_theora_state *_state,const int *_fragis,
  int _nfragis,int _dst_frame,int _src_frame,int _pli);
 void oc_state_frag_recon(oc_theora_state *_state,const oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
  ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
 void oc_restore_fpu(const oc_theora_state *_state);
 
@@ -424,7 +424,7 @@
 void oc_state_frag_copy_c(const oc_theora_state *_state,const int *_fragis,
  int _nfragis,int _dst_frame,int _src_frame,int _pli);
 void oc_state_frag_recon_c(oc_theora_state *_state,const oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
  ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
 void oc_restore_fpu_c(void);
 

Modified: experimental/derf/theora-exp/lib/state.c
===================================================================
--- experimental/derf/theora-exp/lib/state.c	2005-08-30 18:12:46 UTC (rev 9863)
+++ experimental/derf/theora-exp/lib/state.c	2005-08-30 18:15:23 UTC (rev 9864)
@@ -786,14 +786,14 @@
 }
 
 void oc_state_frag_recon(oc_theora_state *_state,const oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
  ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){
   _state->opt_vtable.state_frag_recon(_state,_frag,_pli,_dct_coeffs,
    _last_zzi,_ncoefs,_dc_iquant,_ac_iquant);
 }
 
 void oc_state_frag_recon_c(oc_theora_state *_state,const oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
  ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){
   ogg_int16_t dct_buf[64];
   ogg_int16_t res_buf[64];
@@ -839,7 +839,7 @@
     for(zzi=1;zzi<_ncoefs;zzi++){
       int ci;
       ci=OC_FZIG_ZAG[zzi];
-      dct_buf[ci]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[ci]*_ac_iquant[ci]);
+      dct_buf[ci]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]*_ac_iquant[ci]);
     }
     /*Then, fill in the remainder of the coefficients with 0's, and perform
        the iDCT.*/

Modified: experimental/derf/theora-exp/lib/x86/mmxstate.c
===================================================================
--- experimental/derf/theora-exp/lib/x86/mmxstate.c	2005-08-30 18:12:46 UTC (rev 9863)
+++ experimental/derf/theora-exp/lib/x86/mmxstate.c	2005-08-30 18:15:23 UTC (rev 9864)
@@ -29,7 +29,7 @@
 
 
 void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
  ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){
   ogg_int16_t  __attribute__((aligned(8))) res_buf[64];
   int dst_framei;
@@ -131,7 +131,7 @@
     for(zzi=1;zzi<_ncoefs;zzi++){
       int ci;
       ci=OC_FZIG_ZAG[zzi];
-      res_buf[OC_FZIG_ZAGMMX[zzi]]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[ci]*
+      res_buf[OC_FZIG_ZAGMMX[zzi]]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]*
        _ac_iquant[ci]);
     }
     if(_last_zzi<10){

Modified: experimental/derf/theora-exp/lib/x86/x86int.h
===================================================================
--- experimental/derf/theora-exp/lib/x86/x86int.h	2005-08-30 18:12:46 UTC (rev 9863)
+++ experimental/derf/theora-exp/lib/x86/x86int.h	2005-08-30 18:15:23 UTC (rev 9864)
@@ -14,7 +14,7 @@
 void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
  int _nfragis,int _dst_frame,int _src_frame,int _pli);
 void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag,                                               
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,                                                             
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,                                                             
  ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
 void oc_restore_fpu_mmx(void);
 void oc_idct8x8_mmx(ogg_int16_t _y[64]);



More information about the commits mailing list