[xiph-commits] r16852 - in experimental/derf/theora-ptalarbvorm/lib: . x86 x86_vc
tterribe at svn.xiph.org
tterribe at svn.xiph.org
Fri Jan 29 13:18:10 PST 2010
Author: tterribe
Date: 2010-01-29 13:18:10 -0800 (Fri, 29 Jan 2010)
New Revision: 16852
Modified:
experimental/derf/theora-ptalarbvorm/lib/analyze.c
experimental/derf/theora-ptalarbvorm/lib/encfrag.c
experimental/derf/theora-ptalarbvorm/lib/encint.h
experimental/derf/theora-ptalarbvorm/lib/encode.c
experimental/derf/theora-ptalarbvorm/lib/mcenc.c
experimental/derf/theora-ptalarbvorm/lib/x86/mmxencfrag.c
experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.c
experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.h
experimental/derf/theora-ptalarbvorm/lib/x86_vc/mmxencfrag.c
experimental/derf/theora-ptalarbvorm/lib/x86_vc/x86enc.c
experimental/derf/theora-ptalarbvorm/lib/x86_vc/x86enc.h
Log:
First step towards separating out DC and AC prediction error for better RDO.
This should produce bit-identical results to the previous version.
Modified: experimental/derf/theora-ptalarbvorm/lib/analyze.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/analyze.c 2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/analyze.c 2010-01-29 21:18:10 UTC (rev 16852)
@@ -732,14 +732,17 @@
#if defined(OC_COLLECT_METRICS)
{
unsigned satd;
+ unsigned dc;
switch(nmv_offs){
- case 0:satd=oc_enc_frag_intra_satd(_enc,src,ystride);break;
+ case 0:satd=oc_enc_frag_intra_satd(_enc,&dc,src,ystride);break;
case 1:{
- satd=oc_enc_frag_satd_thresh(_enc,src,ref+mv_offs[0],ystride,UINT_MAX);
+ satd=oc_enc_frag_satd(_enc,&dc,src,ref+mv_offs[0],ystride);
+ satd+=dc;
}break;
default:{
- satd=oc_enc_frag_satd_thresh(_enc,src,dst,ystride,UINT_MAX);
- }
+ satd=oc_enc_frag_satd(_enc,&dc,src,dst,ystride);
+ satd+=dc;
+ }break;
}
_enc->frag_satd[_fragi]=satd;
}
@@ -1286,6 +1289,7 @@
unsigned rate[4][3];
int prev[3][3];
unsigned satd;
+ unsigned dc;
unsigned best_cost;
unsigned best_ssd;
unsigned best_rate;
@@ -1301,7 +1305,7 @@
ystride=_enc->state.ref_ystride[0];
fragi=sb_maps[_mbi>>2][_mbi&3][0];
frag_offs=frag_buf_offs[fragi];
- satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+ satd=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
nqis=_enc->state.nqis;
lambda=_enc->lambda;
for(qii=0;qii<nqis;qii++){
@@ -1314,7 +1318,7 @@
for(bi=1;bi<4;bi++){
fragi=sb_maps[_mbi>>2][_mbi&3][bi];
frag_offs=frag_buf_offs[fragi];
- satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+ satd=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
for(qii=0;qii<nqis;qii++){
oc_qii_state qt[3];
unsigned cur_ssd;
@@ -1379,6 +1383,7 @@
oc_qii_state qt[3];
unsigned cost[3];
unsigned satd;
+ unsigned dc;
unsigned best_cost;
int best_qii;
int qii;
@@ -1388,7 +1393,7 @@
src=_enc->state.ref_frame_data[OC_FRAME_IO];
ystride=_enc->state.ref_ystride[_pli];
frag_offs=_enc->state.frag_buf_offs[_fragi];
- satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+ satd=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
/*Most chroma blocks have no AC coefficients to speak of anyway, so it's not
worth spending the bits to change the AC quantizer.
TODO: This may be worth revisiting when we separate out DC and AC
@@ -1893,6 +1898,7 @@
int bi;
ptrdiff_t fragi;
ptrdiff_t frag_offs;
+ unsigned dc;
frag_buf_offs=_enc->state.frag_buf_offs;
sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
src=_enc->state.ref_frame_data[OC_FRAME_IO];
@@ -1900,7 +1906,7 @@
for(bi=0;bi<4;bi++){
fragi=sb_map[bi];
frag_offs=frag_buf_offs[fragi];
- _frag_satd[bi]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+ _frag_satd[bi]=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
}
mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
@@ -1913,7 +1919,7 @@
bi=mapi&3;
fragi=mb_map[pli][bi];
frag_offs=frag_buf_offs[fragi];
- _frag_satd[mapii]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+ _frag_satd[mapii]=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
}
}
@@ -1951,6 +1957,7 @@
int bi;
ptrdiff_t fragi;
ptrdiff_t frag_offs;
+ unsigned dc;
src=_enc->state.ref_frame_data[OC_FRAME_IO];
ref=_enc->state.ref_frame_data[
_enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(_mb_mode)]];
@@ -1964,16 +1971,18 @@
for(bi=0;bi<4;bi++){
fragi=sb_map[bi];
frag_offs=frag_buf_offs[fragi];
- frag_satd[bi]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
- ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+ frag_satd[bi]=oc_enc_frag_satd2(_enc,&dc,src+frag_offs,
+ ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride);
+ frag_satd[bi]+=dc;
}
}
else{
for(bi=0;bi<4;bi++){
fragi=sb_map[bi];
frag_offs=frag_buf_offs[fragi];
- frag_satd[bi]=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
- ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+ frag_satd[bi]=oc_enc_frag_satd(_enc,&dc,src+frag_offs,
+ ref+frag_offs+mv_offs[0],ystride);
+ frag_satd[bi]+=dc;
}
}
mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
@@ -1988,8 +1997,9 @@
bi=mapi&3;
fragi=mb_map[pli][bi];
frag_offs=frag_buf_offs[fragi];
- frag_satd[mapii]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
- ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+ frag_satd[mapii]=oc_enc_frag_satd2(_enc,&dc,src+frag_offs,
+ ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride);
+ frag_satd[mapii]+=dc;
}
}
else{
@@ -1999,8 +2009,9 @@
bi=mapi&3;
fragi=mb_map[pli][bi];
frag_offs=frag_buf_offs[fragi];
- frag_satd[mapii]=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
- ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+ frag_satd[mapii]=oc_enc_frag_satd(_enc,&dc,src+frag_offs,
+ ref+frag_offs+mv_offs[0],ystride);
+ frag_satd[mapii]+=dc;
}
}
oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,_rd_scale,1);
@@ -2064,6 +2075,7 @@
int bits0;
int bits1;
unsigned satd;
+ unsigned dc;
src=_enc->state.ref_frame_data[OC_FRAME_IO];
ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]];
ystride=_enc->state.ref_ystride[0];
@@ -2081,14 +2093,14 @@
frag_mvs[fragi][1]=(signed char)dy;
frag_offs=frag_buf_offs[fragi];
if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){
- satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
- ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+ satd=oc_enc_frag_satd2(_enc,&dc,src+frag_offs,
+ ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride);
}
else{
- satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
- ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+ satd=oc_enc_frag_satd(_enc,&dc,src+frag_offs,
+ ref+frag_offs+mv_offs[0],ystride);
}
- frag_satd[OC_MB_PHASE[_mbi&3][bi]]=satd;
+ frag_satd[OC_MB_PHASE[_mbi&3][bi]]=satd+dc;
}
oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd,
_enc->vp3_compatible?OC_NOSKIP:_skip_ssd,_rd_scale,1);
@@ -2123,14 +2135,14 @@
/*TODO: We could save half these calls by re-using the results for the Cb
and Cr planes; is it worth it?*/
if(oc_state_get_mv_offsets(&_enc->state,mv_offs,pli,dx,dy)>1){
- satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
- ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+ satd=oc_enc_frag_satd2(_enc,&dc,src+frag_offs,
+ ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride);
}
else{
- satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
- ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+ satd=oc_enc_frag_satd(_enc,&dc,src+frag_offs,
+ ref+frag_offs+mv_offs[0],ystride);
}
- frag_satd[mapii]=satd;
+ frag_satd[mapii]=satd+dc;
}
oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,
frag_satd,_skip_ssd,_rd_scale[4],1);
Modified: experimental/derf/theora-ptalarbvorm/lib/encfrag.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/encfrag.c 2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/encfrag.c 2010-01-29 21:18:10 UTC (rev 16852)
@@ -269,8 +269,9 @@
}
}
-unsigned oc_hadamard_sad_thresh(const ogg_int16_t _buf[64],unsigned _thresh){
+unsigned oc_hadamard_sad(unsigned *_dc,const ogg_int16_t _buf[64]){
unsigned sad;
+ unsigned dc;
int t0;
int t1;
int t2;
@@ -281,7 +282,7 @@
int t7;
int r;
int i;
- sad=0;
+ sad=dc=0;
for(i=0;i<8;i++){
/*Hadamard stage 1:*/
t0=_buf[i*8+0]+_buf[i*8+4];
@@ -306,8 +307,8 @@
t5+=t7;
t7=r-t7;
/*Hadamard stage 3:*/
- r=abs(t0+t1);
- r+=abs(t0-t1);
+ dc+=abs(t0+t1);
+ r=abs(t0-t1);
r+=abs(t2+t3);
r+=abs(t2-t3);
r+=abs(t4+t5);
@@ -315,49 +316,46 @@
r+=abs(t6+t7);
r+=abs(t6-t7);
sad+=r;
- if(sad>_thresh)break;
}
+ *_dc=dc;
return sad;
}
-unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc,
- const unsigned char *_src,const unsigned char *_ref,int _ystride,
- unsigned _thresh){
- return (*_enc->opt_vtable.frag_satd_thresh)(_src,_ref,_ystride,_thresh);
+unsigned oc_enc_frag_satd(const oc_enc_ctx *_enc,unsigned *_dc,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride){
+ return (*_enc->opt_vtable.frag_satd)(_dc,_src,_ref,_ystride);
}
-unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src,
- const unsigned char *_ref,int _ystride,unsigned _thresh){
+unsigned oc_enc_frag_satd_c(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
ogg_int16_t buf[64];
oc_diff_hadamard(buf,_src,_ref,_ystride);
- return oc_hadamard_sad_thresh(buf,_thresh);
+ return oc_hadamard_sad(_dc,buf);
}
-unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc,
+unsigned oc_enc_frag_satd2(const oc_enc_ctx *_enc,unsigned *_dc,
const unsigned char *_src,const unsigned char *_ref1,
- const unsigned char *_ref2,int _ystride,unsigned _thresh){
- return (*_enc->opt_vtable.frag_satd2_thresh)(_src,_ref1,_ref2,_ystride,
- _thresh);
+ const unsigned char *_ref2,int _ystride){
+ return (*_enc->opt_vtable.frag_satd2)(_dc,_src,_ref1,_ref2,_ystride);
}
-unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src,
- const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
- unsigned _thresh){
+unsigned oc_enc_frag_satd2_c(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride){
ogg_int16_t buf[64];
oc_diff_hadamard2(buf,_src,_ref1,_ref2,_ystride);
- return oc_hadamard_sad_thresh(buf,_thresh);
+ return oc_hadamard_sad(_dc,buf);
}
-unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,
+unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,unsigned *_dc,
const unsigned char *_src,int _ystride){
- return (*_enc->opt_vtable.frag_intra_satd)(_src,_ystride);
+ return (*_enc->opt_vtable.frag_intra_satd)(_dc,_src,_ystride);
}
-unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride){
+unsigned oc_enc_frag_intra_satd_c(unsigned *_dc,
+ const unsigned char *_src,int _ystride){
ogg_int16_t buf[64];
oc_intra_hadamard(buf,_src,_ystride);
- return oc_hadamard_sad_thresh(buf,UINT_MAX)
- -abs(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]);
+ return oc_hadamard_sad(_dc,buf);
}
void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst,
Modified: experimental/derf/theora-ptalarbvorm/lib/encint.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/encint.h 2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/encint.h 2010-01-29 21:18:10 UTC (rev 16852)
@@ -82,12 +82,12 @@
unsigned (*frag_sad2_thresh)(const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
unsigned _thresh);
- unsigned (*frag_satd_thresh)(const unsigned char *_src,
- const unsigned char *_ref,int _ystride,unsigned _thresh);
- unsigned (*frag_satd2_thresh)(const unsigned char *_src,
- const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
- unsigned _thresh);
- unsigned (*frag_intra_satd)(const unsigned char *_src,int _ystride);
+ unsigned (*frag_satd)(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+ unsigned (*frag_satd2)(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
+ unsigned (*frag_intra_satd)(unsigned *_dc,const unsigned char *_src,
+ int _ystride);
void (*frag_sub)(ogg_int16_t _diff[64],const unsigned char *_src,
const unsigned char *_ref,int _ystride);
void (*frag_sub_128)(ogg_int16_t _diff[64],
@@ -453,13 +453,12 @@
unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc,
const unsigned char *_src,const unsigned char *_ref1,
const unsigned char *_ref2,int _ystride,unsigned _thresh);
-unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc,
- const unsigned char *_src,const unsigned char *_ref,int _ystride,
- unsigned _thresh);
-unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc,
+unsigned oc_enc_frag_satd(const oc_enc_ctx *_enc,unsigned *_dc,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_satd2(const oc_enc_ctx *_enc,unsigned *_dc,
const unsigned char *_src,const unsigned char *_ref1,
- const unsigned char *_ref2,int _ystride,unsigned _thresh);
-unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,
+ const unsigned char *_ref2,int _ystride);
+unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,unsigned *_dc,
const unsigned char *_src,int _ystride);
void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst,
const unsigned char *_src1,const unsigned char *_src2,int _ystride);
@@ -486,12 +485,12 @@
unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
unsigned _thresh);
-unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src,
- const unsigned char *_ref,int _ystride,unsigned _thresh);
-unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src,
- const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
- unsigned _thresh);
-unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride);
+unsigned oc_enc_frag_satd_c(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_satd2_c(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
+unsigned oc_enc_frag_intra_satd_c(unsigned *_dc,const unsigned char *_src,
+ int _ystride);
void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
#endif
Modified: experimental/derf/theora-ptalarbvorm/lib/encode.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/encode.c 2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/encode.c 2010-01-29 21:18:10 UTC (rev 16852)
@@ -894,8 +894,8 @@
_enc->opt_vtable.frag_sad=oc_enc_frag_sad_c;
_enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_c;
_enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_c;
- _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_c;
- _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_c;
+ _enc->opt_vtable.frag_satd=oc_enc_frag_satd_c;
+ _enc->opt_vtable.frag_satd2=oc_enc_frag_satd2_c;
_enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_c;
_enc->opt_vtable.frag_sub=oc_enc_frag_sub_c;
_enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_c;
Modified: experimental/derf/theora-ptalarbvorm/lib/mcenc.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/mcenc.c 2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/mcenc.c 2010-01-29 21:18:10 UTC (rev 16852)
@@ -177,13 +177,15 @@
int _mvoffset0,int _mvoffset1,const unsigned char *_src,
const unsigned char *_ref,int _ystride,unsigned _best_err){
unsigned err;
+ unsigned dc;
int bi;
err=0;
for(bi=0;bi<4;bi++){
ptrdiff_t frag_offs;
frag_offs=_frag_buf_offs[_fragis[bi]];
- err+=oc_enc_frag_satd2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0,
- _ref+frag_offs+_mvoffset1,_ystride,_best_err-err);
+ err+=oc_enc_frag_satd2(_enc,&dc,_src+frag_offs,
+ _ref+frag_offs+_mvoffset0,_ref+frag_offs+_mvoffset1,_ystride);
+ err+=dc;
}
return err;
}
@@ -219,9 +221,11 @@
err=0;
for(bi=0;bi<4;bi++){
ptrdiff_t frag_offs;
+ unsigned dc;
frag_offs=_frag_buf_offs[_fragis[bi]];
- err+=oc_enc_frag_satd_thresh(_enc,
- _src+frag_offs,_ref+frag_offs+mvoffset,_ystride,UINT_MAX);
+ err+=oc_enc_frag_satd(_enc,&dc,
+ _src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
+ err+=dc;
}
return err;
}
@@ -229,8 +233,11 @@
static unsigned oc_mcenc_ysatd_check_bcandidate_fullpel(const oc_enc_ctx *_enc,
ptrdiff_t _frag_offs,int _dx,int _dy,
const unsigned char *_src,const unsigned char *_ref,int _ystride){
- return oc_enc_frag_satd_thresh(_enc,
- _src+_frag_offs,_ref+_frag_offs+_dx+_dy*_ystride,_ystride,UINT_MAX);
+ unsigned err;
+ unsigned dc;
+ err=oc_enc_frag_satd(_enc,&dc,
+ _src+_frag_offs,_ref+_frag_offs+_dx+_dy*_ystride,_ystride);
+ return err+dc;
}
/*Perform a motion vector search for this macro block against a single
@@ -704,6 +711,7 @@
best_site=4;
for(sitei=0;sitei<8;sitei++){
unsigned err;
+ unsigned dc;
int site;
int xmask;
int ymask;
@@ -723,8 +731,9 @@
ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
- err=oc_enc_frag_satd2_thresh(_enc,_src,
- _ref+mvoffset0,_ref+mvoffset1,_ystride,_best_err);
+ err=oc_enc_frag_satd2(_enc,&dc,_src,
+ _ref+mvoffset0,_ref+mvoffset1,_ystride);
+ err+=dc;
if(err<_best_err){
_best_err=err;
best_site=site;
Modified: experimental/derf/theora-ptalarbvorm/lib/x86/mmxencfrag.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86/mmxencfrag.c 2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/x86/mmxencfrag.c 2010-01-29 21:18:10 UTC (rev 16852)
@@ -447,12 +447,14 @@
mm6 = d2 c2 b2 a2 \
mm7 = d3 c3 b3 a3*/ \
-static unsigned oc_int_frag_satd_thresh_mmxext(const unsigned char *_src,
- int _src_ystride,const unsigned char *_ref,int _ref_ystride,unsigned _thresh){
- OC_ALIGN8(ogg_int16_t buf[64]);
+static unsigned oc_int_frag_satd_mmxext(unsigned *_dc,
+ const unsigned char *_src,int _src_ystride,
+ const unsigned char *_ref,int _ref_ystride){
+ OC_ALIGN8(ogg_int16_t buf[64]);
ogg_int16_t *bufp;
unsigned ret;
unsigned ret2;
+ unsigned dc;
bufp=buf;
__asm__ __volatile__(
OC_LOAD_SUB_8x4("0x00")
@@ -475,14 +477,18 @@
"movq 0x20(%[buf]),%%mm2\n\t"
"movq 0x30(%[buf]),%%mm3\n\t"
"movq 0x00(%[buf]),%%mm0\n\t"
- OC_HADAMARD_ABS_ACCUM_8x4("0x28","0x38")
+ /*We split out the stages here so we can save the DC coefficient in the
+ middle.*/
+ OC_HADAMARD_AB_8x4
+ OC_HADAMARD_C_ABS_ACCUM_A_8x4("0x28","0x38")
+ "movd %%mm1,%[dc]\n\t"
+ OC_HADAMARD_C_ABS_ACCUM_B_8x4("0x28","0x38")
/*Up to this point, everything fit in 16 bits (8 input + 1 for the
difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1
for the factor of two we dropped + 3 for the vertical accumulation).
Now we finally have to promote things to dwords.
We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long
latency of pmaddwd by starting the next series of loads now.*/
- "mov %[thresh],%[ret2]\n\t"
"pmaddwd %%mm7,%%mm0\n\t"
"movq 0x50(%[buf]),%%mm1\n\t"
"movq 0x58(%[buf]),%%mm5\n\t"
@@ -492,29 +498,27 @@
"movq 0x68(%[buf]),%%mm6\n\t"
"paddd %%mm0,%%mm4\n\t"
"movq 0x70(%[buf]),%%mm3\n\t"
- "movd %%mm4,%[ret]\n\t"
+ "movd %%mm4,%[ret2]\n\t"
"movq 0x78(%[buf]),%%mm7\n\t"
/*The sums produced by OC_HADAMARD_ABS_ACCUM_8x4 each have an extra 4
added to them, and a factor of two removed; correct the final sum here.*/
- "lea -32(%[ret],%[ret]),%[ret]\n\t"
"movq 0x40(%[buf]),%%mm0\n\t"
- "cmp %[ret2],%[ret]\n\t"
"movq 0x48(%[buf]),%%mm4\n\t"
- "jae 1f\n\t"
OC_HADAMARD_ABS_ACCUM_8x4("0x68","0x78")
"pmaddwd %%mm7,%%mm0\n\t"
- /*There isn't much to stick in here to hide the latency this time, but the
- alternative to pmaddwd is movq->punpcklwd->punpckhwd->paddd, whose
- latency is even worse.*/
- "sub $32,%[ret]\n\t"
+ /*Compute abs(dc).*/
+ "movsx %w[dc],%[ret]\n\t"
+ "cdq\n\t"
+ "add %[ret2],%[ret2]\n\t"
+ "add %[dc],%[ret]\n\t"
"movq %%mm0,%%mm4\n\t"
"punpckhdq %%mm0,%%mm0\n\t"
+ "xor %[ret],%[dc]\n\t"
"paddd %%mm0,%%mm4\n\t"
- "movd %%mm4,%[ret2]\n\t"
- "lea (%[ret],%[ret2],2),%[ret]\n\t"
- ".p2align 4,,15\n\t"
- "1:\n\t"
- /*Although it looks like we're using 7 registers here, gcc can alias %[ret]
+ "sub %[dc],%[ret2]\n\t"
+ "movd %%mm4,%[ret]\n\t"
+ "lea -64(%[ret2],%[ret],2),%[ret]\n\t"
+ /*Although it looks like we're using 8 registers here, gcc can alias %[ret]
and %[ret2] with some of the inputs, since for once we don't write to
them until after we're done using everything but %[buf] (which is also
listed as an output to ensure gcc _doesn't_ alias them against it).*/
@@ -522,24 +526,24 @@
constraints, otherewise if gcc can prove they're equal it will allocate
them to the same register (which is bad); _src and _ref face a similar
problem, though those are never actually the same.*/
- :[ret]"=a"(ret),[ret2]"=r"(ret2),[buf]"+r"(bufp)
+ :[ret]"=a"(ret),[ret2]"=r"(ret2),[dc]"=d"(dc),[buf]"+r"(bufp)
:[src]"r"(_src),[src_ystride]"c"((ptrdiff_t)_src_ystride),
- [ref]"r"(_ref),[ref_ystride]"d"((ptrdiff_t)_ref_ystride),
- [thresh]"m"(_thresh)
+ [ref]"r"(_ref),[ref_ystride]"d"((ptrdiff_t)_ref_ystride)
/*We have to use neg, so we actually clobber the condition codes for once
(not to mention cmp, sub, and add).*/
:"cc"
);
+ *_dc=dc;
return ret;
}
-unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src,
- const unsigned char *_ref,int _ystride,unsigned _thresh){
- return oc_int_frag_satd_thresh_mmxext(_src,_ystride,_ref,_ystride,_thresh);
+unsigned oc_enc_frag_satd_mmxext(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+ return oc_int_frag_satd_mmxext(_dc,_src,_ystride,_ref,_ystride);
}
/*Our internal implementation of frag_copy2 takes an extra stride parameter so
- we can share code with oc_enc_frag_satd2_thresh_mmxext().*/
+ we can share code with oc_enc_frag_satd2_mmxext().*/
static void oc_int_frag_copy2_mmxext(unsigned char *_dst,int _dst_ystride,
const unsigned char *_src1,const unsigned char *_src2,int _src_ystride){
__asm__ __volatile__(
@@ -656,20 +660,20 @@
);
}
-unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src,
- const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
- unsigned _thresh){
+unsigned oc_enc_frag_satd2_mmxext(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride){
OC_ALIGN8(unsigned char ref[64]);
oc_int_frag_copy2_mmxext(ref,8,_ref1,_ref2,_ystride);
- return oc_int_frag_satd_thresh_mmxext(_src,_ystride,ref,8,_thresh);
+ return oc_int_frag_satd_mmxext(_dc,_src,_ystride,ref,8);
}
-unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,
- int _ystride){
- OC_ALIGN8(ogg_int16_t buf[64]);
+unsigned oc_enc_frag_intra_satd_mmxext(unsigned *_dc,
+ const unsigned char *_src,int _ystride){
+ OC_ALIGN8(ogg_int16_t buf[64]);
ogg_int16_t *bufp;
unsigned ret;
unsigned ret2;
+ unsigned dc;
bufp=buf;
__asm__ __volatile__(
OC_LOAD_8x4("0x00")
@@ -696,7 +700,7 @@
middle.*/
OC_HADAMARD_AB_8x4
OC_HADAMARD_C_ABS_ACCUM_A_8x4("0x28","0x38")
- "movd %%mm1,%[ret]\n\t"
+ "movd %%mm1,%[dc]\n\t"
OC_HADAMARD_C_ABS_ACCUM_B_8x4("0x28","0x38")
/*Up to this point, everything fit in 16 bits (8 input + 1 for the
difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1
@@ -714,32 +718,33 @@
"movq 0x70(%[buf]),%%mm3\n\t"
"paddd %%mm0,%%mm4\n\t"
"movq 0x78(%[buf]),%%mm7\n\t"
- "movd %%mm4,%[ret2]\n\t"
+ "movd %%mm4,%[ret]\n\t"
"movq 0x40(%[buf]),%%mm0\n\t"
"movq 0x48(%[buf]),%%mm4\n\t"
OC_HADAMARD_ABS_ACCUM_8x4("0x68","0x78")
"pmaddwd %%mm7,%%mm0\n\t"
/*We assume that the DC coefficient is always positive (which is true,
because the input to the INTRA transform was not a difference).*/
- "movzx %w[ret],%[ret]\n\t"
- "add %[ret2],%[ret2]\n\t"
- "sub %[ret],%[ret2]\n\t"
+ "movzx %w[dc],%[dc]\n\t"
+ "add %[ret],%[ret]\n\t"
+ "sub %[dc],%[ret]\n\t"
"movq %%mm0,%%mm4\n\t"
"punpckhdq %%mm0,%%mm0\n\t"
"paddd %%mm0,%%mm4\n\t"
- "movd %%mm4,%[ret]\n\t"
- "lea -64(%[ret2],%[ret],2),%[ret]\n\t"
- /*Although it looks like we're using 7 registers here, gcc can alias %[ret]
+ "movd %%mm4,%[ret2]\n\t"
+ "lea -64(%[ret],%[ret2],2),%[ret]\n\t"
+ /*Although it looks like we're using 8 registers here, gcc can alias %[ret]
and %[ret2] with some of the inputs, since for once we don't write to
them until after we're done using everything but %[buf] (which is also
listed as an output to ensure gcc _doesn't_ alias them against it).*/
- :[ret]"=a"(ret),[ret2]"=r"(ret2),[buf]"+r"(bufp)
+ :[ret]"=a"(ret),[ret2]"=r"(ret2),[dc]"=r"(dc),[buf]"+r"(bufp)
:[src]"r"(_src),[src4]"r"(_src+4*_ystride),
[ystride]"r"((ptrdiff_t)_ystride),[ystride3]"r"((ptrdiff_t)3*_ystride)
/*We have to use sub, so we actually clobber the condition codes for once
(not to mention add).*/
:"cc"
);
+ *_dc=dc;
return ret;
}
Modified: experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.c 2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.c 2010-01-29 21:18:10 UTC (rev 16852)
@@ -35,8 +35,8 @@
_enc->opt_vtable.frag_sad=oc_enc_frag_sad_mmxext;
_enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_mmxext;
_enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_mmxext;
- _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_mmxext;
- _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_mmxext;
+ _enc->opt_vtable.frag_satd=oc_enc_frag_satd_mmxext;
+ _enc->opt_vtable.frag_satd2=oc_enc_frag_satd2_mmxext;
_enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_mmxext;
_enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_mmxext;
}
Modified: experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.h 2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.h 2010-01-29 21:18:10 UTC (rev 16852)
@@ -29,12 +29,12 @@
unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
unsigned _thresh);
-unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src,
- const unsigned char *_ref,int _ystride,unsigned _thresh);
-unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src,
- const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
- unsigned _thresh);
-unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,int _ystride);
+unsigned oc_enc_frag_satd_mmxext(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_satd2_mmxext(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
+unsigned oc_enc_frag_intra_satd_mmxext(unsigned *_dc,
+ const unsigned char *_src,int _ystride);
void oc_enc_frag_sub_mmx(ogg_int16_t _diff[64],
const unsigned char *_x,const unsigned char *_y,int _stride);
void oc_enc_frag_sub_128_mmx(ogg_int16_t _diff[64],
Modified: experimental/derf/theora-ptalarbvorm/lib/x86_vc/mmxencfrag.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86_vc/mmxencfrag.c 2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/x86_vc/mmxencfrag.c 2010-01-29 21:18:10 UTC (rev 16852)
@@ -468,12 +468,14 @@
mm7 = d3 c3 b3 a3*/ \
}
-static unsigned oc_int_frag_satd_thresh_mmxext(const unsigned char *_src,
- int _src_ystride,const unsigned char *_ref,int _ref_ystride,unsigned _thresh){
- OC_ALIGN8(ogg_int16_t buf[64]);
- ogg_int16_t *bufp;
- unsigned ret1;
- unsigned ret2;
+static unsigned oc_int_frag_satd_mmxext(unsigned *_dc,
+ const unsigned char *_src,int _src_ystride,
+ const unsigned char *_ref,int _ref_ystride){
+ OC_ALIGN8(ogg_int16_t buf[64]);
+ ogg_int16_t *bufp;
+ unsigned ret1;
+ unsigned ret2;
+ unsigned dc;
bufp=buf;
__asm{
#define SRC esi
@@ -482,7 +484,9 @@
#define REF_YSTRIDE edx
#define BUF edi
#define RET eax
-#define RET2 edx
+#define RET2 ecx
+#define DC edx
+#define DC_WORD dx
mov SRC,_src
mov SRC_YSTRIDE,_src_ystride
mov REF,_ref
@@ -508,14 +512,18 @@
movq mm2,[0x20+BUF]
movq mm3,[0x30+BUF]
movq mm0,[0x00+BUF]
- OC_HADAMARD_ABS_ACCUM_8x4(0x28,0x38)
+ /*We split out the stages here so we can save the DC coefficient in the
+ middle.*/
+ OC_HADAMARD_AB_8x4
+ OC_HADAMARD_C_ABS_ACCUM_A_8x4(0x28,0x38)
+ movd DC,mm1
+ OC_HADAMARD_C_ABS_ACCUM_B_8x4(0x28,0x38)
/*Up to this point, everything fit in 16 bits (8 input + 1 for the
difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1
for the factor of two we dropped + 3 for the vertical accumulation).
Now we finally have to promote things to dwords.
We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long
latency of pmaddwd by starting the next series of loads now.*/
- mov RET2,_thresh
pmaddwd mm0,mm7
movq mm1,[0x50+BUF]
movq mm5,[0x58+BUF]
@@ -525,29 +533,29 @@
movq mm6,[0x68+BUF]
paddd mm4,mm0
movq mm3,[0x70+BUF]
- movd RET,mm4
+ movd RET2,mm4
movq mm7,[0x78+BUF]
/*The sums produced by OC_HADAMARD_ABS_ACCUM_8x4 each have an extra 4
added to them, and a factor of two removed; correct the final sum here.*/
lea RET,[RET+RET-32]
movq mm0,[0x40+BUF]
- cmp RET,RET2
movq mm4,[0x48+BUF]
- jae at_end
OC_HADAMARD_ABS_ACCUM_8x4(0x68,0x78)
pmaddwd mm0,mm7
- /*There isn't much to stick in here to hide the latency this time, but the
- alternative to pmaddwd is movq->punpcklwd->punpckhwd->paddd, whose
- latency is even worse.*/
- sub RET,32
+ /*Compute abs(dc).*/
+ movsx RET,DC_WORD
+ cdq
+ add RET2,RET2
+ add RET,DC
movq mm4,mm0
punpckhdq mm0,mm0
+ xor DC,RET
paddd mm4,mm0
- movd RET2,mm4
- lea RET,[RET+RET2*2]
- align 16
-at_end:
+ sub RET2,DC
+ movd RET,mm4
+ lea RET,[RET2+RET*2-64]
mov ret1,RET
+ mov dc,DC
#undef SRC
#undef REF
#undef SRC_YSTRIDE
@@ -555,18 +563,21 @@
#undef BUF
#undef RET
#undef RET2
+#undef DC
+#undef DC_WORD
}
+ *_dc=dc;
return ret1;
}
-unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src,
- const unsigned char *_ref,int _ystride,unsigned _thresh){
- return oc_int_frag_satd_thresh_mmxext(_src,_ystride,_ref,_ystride,_thresh);
+unsigned oc_enc_frag_satd_mmxext(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+ return oc_int_frag_satd_mmxext(_dc,_src,_ystride,_ref,_ystride);
}
/*Our internal implementation of frag_copy2 takes an extra stride parameter so
- we can share code with oc_enc_frag_satd2_thresh_mmxext().*/
+ we can share code with oc_enc_frag_satd2_mmxext().*/
static void oc_int_frag_copy2_mmxext(unsigned char *_dst,int _dst_ystride,
const unsigned char *_src1,const unsigned char *_src2,int _src_ystride){
__asm{
@@ -694,30 +705,31 @@
}
}
-unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src,
- const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
- unsigned _thresh){
+unsigned oc_enc_frag_satd2_mmxext(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride){
OC_ALIGN8(unsigned char ref[64]);
oc_int_frag_copy2_mmxext(ref,8,_ref1,_ref2,_ystride);
- return oc_int_frag_satd_thresh_mmxext(_src,_ystride,ref,8,_thresh);
+ return oc_int_frag_satd_thresh_mmxext(dc,_src,_ystride,ref,8);
}
-unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,
+unsigned oc_enc_frag_intra_satd_mmxext(unsigned *_dc,const unsigned char *_src,
int _ystride){
- OC_ALIGN8(ogg_int16_t buf[64]);
- ogg_int16_t *bufp;
- unsigned ret1;
- unsigned ret2;
+ OC_ALIGN8(ogg_int16_t buf[64]);
+ ogg_int16_t *bufp;
+ unsigned ret1;
+ unsigned ret2;
+ unsigned dc;
bufp=buf;
__asm{
#define SRC eax
#define SRC4 esi
#define BUF edi
+#define YSTRIDE edx
+#define YSTRIDE3 ecx
#define RET eax
-#define RET_WORD ax
#define RET2 ecx
-#define YSTRIDE edx
-#define YSTRIDE3 ecx
+#define DC edx
+#define DC_WORD dx
mov SRC,_src
mov BUF,bufp
mov YSTRIDE,_ystride
@@ -749,7 +761,7 @@
middle.*/
OC_HADAMARD_AB_8x4
OC_HADAMARD_C_ABS_ACCUM_A_8x4(0x28,0x38)
- movd RET,mm1
+ movd DC,mm1
OC_HADAMARD_C_ABS_ACCUM_B_8x4(0x28,0x38)
/*Up to this point, everything fit in 16 bits (8 input + 1 for the
difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1
@@ -767,31 +779,34 @@
movq mm3,[0x70+BUF]
paddd mm4,mm0
movq mm7,[0x78+BUF]
- movd RET2,mm4
+ movd RET,mm4
movq mm0,[0x40+BUF]
movq mm4,[0x48+BUF]
OC_HADAMARD_ABS_ACCUM_8x4(0x68,0x78)
pmaddwd mm0,mm7
/*We assume that the DC coefficient is always positive (which is true,
because the input to the INTRA transform was not a difference).*/
- movzx RET,RET_WORD
- add RET2,RET2
- sub RET2,RET
+ movzx DC,DC_WORD
+ add RET,RET
+ sub DC,RET
movq mm4,mm0
punpckhdq mm0,mm0
paddd mm4,mm0
- movd RET,mm4
- lea RET,[-64+RET2+RET*2]
+ movd RET2,mm4
+ lea RET,[-64+RET+RET2*2]
+ mov [dc],DC
mov [ret1],RET
#undef SRC
#undef SRC4
#undef BUF
+#undef YSTRIDE
+#undef YSTRIDE3
#undef RET
-#undef RET_WORD
#undef RET2
-#undef YSTRIDE
-#undef YSTRIDE3
+#undef DC
+#undef DC_WORD
}
+ *_dc=dc;
return ret1;
}
Modified: experimental/derf/theora-ptalarbvorm/lib/x86_vc/x86enc.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86_vc/x86enc.c 2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/x86_vc/x86enc.c 2010-01-29 21:18:10 UTC (rev 16852)
@@ -35,8 +35,8 @@
_enc->opt_vtable.frag_sad=oc_enc_frag_sad_mmxext;
_enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_mmxext;
_enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_mmxext;
- _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_mmxext;
- _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_mmxext;
+ _enc->opt_vtable.frag_satd=oc_enc_frag_satd_mmxext;
+ _enc->opt_vtable.frag_satd2=oc_enc_frag_satd2_mmxext;
_enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_mmxext;
_enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_mmxext;
}
Modified: experimental/derf/theora-ptalarbvorm/lib/x86_vc/x86enc.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86_vc/x86enc.h 2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/x86_vc/x86enc.h 2010-01-29 21:18:10 UTC (rev 16852)
@@ -29,12 +29,12 @@
unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src,
const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
unsigned _thresh);
-unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src,
- const unsigned char *_ref,int _ystride,unsigned _thresh);
-unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src,
- const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
- unsigned _thresh);
-unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,int _ystride);
+unsigned oc_enc_frag_satd_mmxext(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_satd2_mmxext(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
+unsigned oc_enc_frag_intra_satd_mmxext(unsigned *_dc,
+ const unsigned char *_src,int _ystride);
void oc_enc_frag_sub_mmx(ogg_int16_t _diff[64],
const unsigned char *_x,const unsigned char *_y,int _stride);
void oc_enc_frag_sub_128_mmx(ogg_int16_t _diff[64],
More information about the commits
mailing list