[Theora-dev] [PATCH] remove some FZIGZAG
Rudolf Marek
r.marek at sh.cvut.cz
Sat Aug 20 02:54:03 PDT 2005
Hello,
As we discussed with derf some time ago, it seems it is not neccessary to enforce "forward" order of dct_coeffs.
This patch gains .99366902855226196000% so approx 1% speedup.
Meausurement method:
time nice -n -19 ./dump /mnt/disc4/theora/unix/gripen.ogg > /dev/null
Ogg logical stream 310b2968 is Theora 720x480 29.97 fps video
Encoded frame content is 720x480 with 0x0 offset
12460 frames
This patch is as I had it working. I just want to submit it before I will forgot about it :)
Derf please consider applaying this into SVN in some form.
Thanks,
Regards
Rudolf
-------------- next part --------------
diff -Naur ../mergeSTATE/test/lib/decode.c test/lib/decode.c
--- ../mergeSTATE/test/lib/decode.c 2005-08-17 09:58:23.000000000 +0200
+++ test/lib/decode.c 2005-08-20 11:19:04.052143250 +0200
@@ -1083,53 +1083,53 @@
to.
This is updated before the function returns.*/
typedef void (*oc_token_expand_func)(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[64],int *_zzi);
+ ogg_int16_t _dct_coeffs[128],int *_zzi);
/*Expands a zero run token.*/
void oc_token_expand_zrl(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[64],int *_zzi){
+ ogg_int16_t _dct_coeffs[128],int *_zzi){
int zzi;
zzi=*_zzi;
- do _dct_coeffs[OC_FZIG_ZAG[zzi++]]=0;
+ do _dct_coeffs[zzi++]=0;
while(_extra_bits-->0);
*_zzi=zzi;
}
/*Expands a constant, single-value token.*/
void oc_token_expand_const(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[64],int *_zzi){
- _dct_coeffs[OC_FZIG_ZAG[(*_zzi)++]]=
+ ogg_int16_t _dct_coeffs[128],int *_zzi){
+ _dct_coeffs[(*_zzi)++]=
(ogg_int16_t)oc_token_dec1val_const(_token);
}
/*Expands category 2 single-valued tokens.*/
void oc_token_expand_cat2(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[64],int *_zzi){
- _dct_coeffs[OC_FZIG_ZAG[(*_zzi)++]]=
+ ogg_int16_t _dct_coeffs[128],int *_zzi){
+ _dct_coeffs[(*_zzi)++]=
(ogg_int16_t)oc_token_dec1val_cat2(_token,_extra_bits);
}
/*Expands category 3 through 8 single-valued tokens.*/
void oc_token_expand_cati(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[64],int *_zzi){
- _dct_coeffs[OC_FZIG_ZAG[(*_zzi)++]]=
+ ogg_int16_t _dct_coeffs[128],int *_zzi){
+ _dct_coeffs[(*_zzi)++]=
(ogg_int16_t)oc_token_dec1val_cati(_token,_extra_bits);
}
/*Expands a category 1a zero run/value combo token.*/
void oc_token_expand_run_cat1a(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[64],int *_zzi){
+ ogg_int16_t _dct_coeffs[128],int *_zzi){
int zzi;
int rl;
zzi=*_zzi;
- for(rl=_token-OC_DCT_RUN_CAT1A+1;rl-->0;)_dct_coeffs[OC_FZIG_ZAG[zzi++]]=0;
- _dct_coeffs[OC_FZIG_ZAG[zzi++]]=(ogg_int16_t)(1-(_extra_bits<<1));
+ for(rl=_token-OC_DCT_RUN_CAT1A+1;rl-->0;)_dct_coeffs[zzi++]=0;
+ _dct_coeffs[zzi++]=(ogg_int16_t)(1-(_extra_bits<<1));
*_zzi=zzi;
}
/*Expands all other zero run/value combo tokens.*/
void oc_token_expand_run(int _token,int _extra_bits,
- ogg_int16_t _dct_coeffs[64],int *_zzi){
+ ogg_int16_t _dct_coeffs[128],int *_zzi){
static const int NZEROS_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
6,10,1,2
};
@@ -1154,11 +1154,11 @@
_token-=OC_DCT_RUN_CAT1B;
rl=(_extra_bits&NZEROS_MASK[_token])+NZEROS_ADJUST[_token];
zzi=*_zzi;
- while(rl-->0)_dct_coeffs[OC_FZIG_ZAG[zzi++]]=0;
+ while(rl-->0)_dct_coeffs[zzi++]=0;
valsigned[0]=VALUE_ADJUST[_token]+
(_extra_bits>>VALUE_SHIFT[_token]&VALUE_MASK[_token]);
valsigned[1]=-valsigned[0];
- _dct_coeffs[OC_FZIG_ZAG[zzi++]]=(ogg_int16_t)valsigned[
+ _dct_coeffs[zzi++]=(ogg_int16_t)valsigned[
_extra_bits>>SIGN_SHIFT[_token]];
*_zzi=zzi;
}
diff -Naur ../mergeSTATE/test/lib/internal.h test/lib/internal.h
--- ../mergeSTATE/test/lib/internal.h 2005-08-17 10:05:34.000000000 +0200
+++ test/lib/internal.h 2005-08-20 11:39:38.797310000 +0200
@@ -239,7 +239,7 @@
void (*state_frag_copy)(const oc_theora_state *_state,
const int *_fragis,int _nfragis,int _dst_frame,int _src_frame,int _pli);
void (*state_frag_recon)(oc_theora_state *_state,const oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
void (*restore_fpu)(void);
void (*oc_state_loop_filter_frag_rows)(oc_theora_state *_state,int *_bv,
@@ -409,7 +409,7 @@
void oc_state_frag_copy(const oc_theora_state *_state,const int *_fragis,
int _nfragis,int _dst_frame,int _src_frame,int _pli);
void oc_state_frag_recon(oc_theora_state *_state,const oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
void oc_restore_fpu(const oc_theora_state *_state);
void oc_state_loop_filter_frag_rows(oc_theora_state *_state,int *_bv,
@@ -426,7 +426,7 @@
void oc_state_frag_copy_c(const oc_theora_state *_state,const int *_fragis,
int _nfragis,int _dst_frame,int _src_frame,int _pli);
void oc_state_frag_recon_c(oc_theora_state *_state,const oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
void oc_restore_fpu_c(void);
void oc_state_loop_filter_frag_rows_c(oc_theora_state *_state,int *_bv,
diff -Naur ../mergeSTATE/test/lib/state.c test/lib/state.c
--- ../mergeSTATE/test/lib/state.c 2005-08-17 10:29:23.797763500 +0200
+++ test/lib/state.c 2005-08-20 11:22:10.211777500 +0200
@@ -788,14 +788,14 @@
}
void oc_state_frag_recon(oc_theora_state *_state,const oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){
_state->opt_vtable.state_frag_recon(_state,_frag,_pli,_dct_coeffs,
_last_zzi,_ncoefs,_dc_iquant,_ac_iquant);
}
void oc_state_frag_recon_c(oc_theora_state *_state,const oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){
ogg_int16_t dct_buf[64];
ogg_int16_t res_buf[64];
@@ -841,7 +841,7 @@
for(zzi=1;zzi<_ncoefs;zzi++){
int ci;
ci=OC_FZIG_ZAG[zzi];
- dct_buf[ci]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[ci]*_ac_iquant[ci]);
+ dct_buf[ci]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]*_ac_iquant[ci]);
}
/*Then, fill in the remainder of the coefficients with 0's, and perform
the iDCT.*/
diff -Naur ../mergeSTATE/test/lib/x86/mmxstate.c test/lib/x86/mmxstate.c
--- ../mergeSTATE/test/lib/x86/mmxstate.c 2005-08-17 21:03:14.000000000 +0200
+++ test/lib/x86/mmxstate.c 2005-08-20 11:39:04.899191500 +0200
@@ -29,7 +29,7 @@
void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){
ogg_int16_t __attribute__((aligned(8))) res_buf[64];
int dst_framei;
@@ -131,7 +131,7 @@
for(zzi=1;zzi<_ncoefs;zzi++){
int ci;
ci=OC_FZIG_ZAG[zzi];
- res_buf[OC_FZIG_ZAGMMX[zzi]]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[ci]*
+ res_buf[OC_FZIG_ZAGMMX[zzi]]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]*
_ac_iquant[ci]);
}
if(_last_zzi<10){
diff -Naur ../mergeSTATE/test/lib/x86/x86int.h test/lib/x86/x86int.h
--- ../mergeSTATE/test/lib/x86/x86int.h 2005-08-17 10:11:36.000000000 +0200
+++ test/lib/x86/x86int.h 2005-08-20 11:38:53.890503500 +0200
@@ -14,7 +14,7 @@
void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
int _nfragis,int _dst_frame,int _src_frame,int _pli);
void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag,
- int _pli,ogg_int16_t _dct_coeffs[64],int _last_zzi,int _ncoefs,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
void oc_restore_fpu_mmx(void);
void oc_idct8x8_mmx(ogg_int16_t _y[64]);
More information about the Theora-dev
mailing list