[xiph-commits] r16852 - in experimental/derf/theora-ptalarbvorm/lib: . x86 x86_vc

tterribe at svn.xiph.org tterribe at svn.xiph.org
Fri Jan 29 13:18:10 PST 2010


Author: tterribe
Date: 2010-01-29 13:18:10 -0800 (Fri, 29 Jan 2010)
New Revision: 16852

Modified:
   experimental/derf/theora-ptalarbvorm/lib/analyze.c
   experimental/derf/theora-ptalarbvorm/lib/encfrag.c
   experimental/derf/theora-ptalarbvorm/lib/encint.h
   experimental/derf/theora-ptalarbvorm/lib/encode.c
   experimental/derf/theora-ptalarbvorm/lib/mcenc.c
   experimental/derf/theora-ptalarbvorm/lib/x86/mmxencfrag.c
   experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.c
   experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.h
   experimental/derf/theora-ptalarbvorm/lib/x86_vc/mmxencfrag.c
   experimental/derf/theora-ptalarbvorm/lib/x86_vc/x86enc.c
   experimental/derf/theora-ptalarbvorm/lib/x86_vc/x86enc.h
Log:
First step towards separating out DC and AC prediction error for better RDO.
This should produce bit-identical results to the previous version.


Modified: experimental/derf/theora-ptalarbvorm/lib/analyze.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/analyze.c	2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/analyze.c	2010-01-29 21:18:10 UTC (rev 16852)
@@ -732,14 +732,17 @@
 #if defined(OC_COLLECT_METRICS)
   {
     unsigned satd;
+    unsigned dc;
     switch(nmv_offs){
-      case 0:satd=oc_enc_frag_intra_satd(_enc,src,ystride);break;
+      case 0:satd=oc_enc_frag_intra_satd(_enc,&dc,src,ystride);break;
       case 1:{
-        satd=oc_enc_frag_satd_thresh(_enc,src,ref+mv_offs[0],ystride,UINT_MAX);
+        satd=oc_enc_frag_satd(_enc,&dc,src,ref+mv_offs[0],ystride);
+        satd+=dc;
       }break;
       default:{
-        satd=oc_enc_frag_satd_thresh(_enc,src,dst,ystride,UINT_MAX);
-      }
+        satd=oc_enc_frag_satd(_enc,&dc,src,dst,ystride);
+        satd+=dc;
+      }break;
     }
     _enc->frag_satd[_fragi]=satd;
   }
@@ -1286,6 +1289,7 @@
   unsigned             rate[4][3];
   int                  prev[3][3];
   unsigned             satd;
+  unsigned             dc;
   unsigned             best_cost;
   unsigned             best_ssd;
   unsigned             best_rate;
@@ -1301,7 +1305,7 @@
   ystride=_enc->state.ref_ystride[0];
   fragi=sb_maps[_mbi>>2][_mbi&3][0];
   frag_offs=frag_buf_offs[fragi];
-  satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+  satd=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
   nqis=_enc->state.nqis;
   lambda=_enc->lambda;
   for(qii=0;qii<nqis;qii++){
@@ -1314,7 +1318,7 @@
   for(bi=1;bi<4;bi++){
     fragi=sb_maps[_mbi>>2][_mbi&3][bi];
     frag_offs=frag_buf_offs[fragi];
-    satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+    satd=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
     for(qii=0;qii<nqis;qii++){
       oc_qii_state qt[3];
       unsigned     cur_ssd;
@@ -1379,6 +1383,7 @@
   oc_qii_state         qt[3];
   unsigned             cost[3];
   unsigned             satd;
+  unsigned             dc;
   unsigned             best_cost;
   int                  best_qii;
   int                  qii;
@@ -1388,7 +1393,7 @@
   src=_enc->state.ref_frame_data[OC_FRAME_IO];
   ystride=_enc->state.ref_ystride[_pli];
   frag_offs=_enc->state.frag_buf_offs[_fragi];
-  satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+  satd=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
   /*Most chroma blocks have no AC coefficients to speak of anyway, so it's not
      worth spending the bits to change the AC quantizer.
     TODO: This may be worth revisiting when we separate out DC and AC
@@ -1893,6 +1898,7 @@
   int                    bi;
   ptrdiff_t              fragi;
   ptrdiff_t              frag_offs;
+  unsigned               dc;
   frag_buf_offs=_enc->state.frag_buf_offs;
   sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
   src=_enc->state.ref_frame_data[OC_FRAME_IO];
@@ -1900,7 +1906,7 @@
   for(bi=0;bi<4;bi++){
     fragi=sb_map[bi];
     frag_offs=frag_buf_offs[fragi];
-    _frag_satd[bi]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+    _frag_satd[bi]=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
   }
   mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
   map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
@@ -1913,7 +1919,7 @@
     bi=mapi&3;
     fragi=mb_map[pli][bi];
     frag_offs=frag_buf_offs[fragi];
-    _frag_satd[mapii]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
+    _frag_satd[mapii]=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
   }
 }
 
@@ -1951,6 +1957,7 @@
   int                    bi;
   ptrdiff_t              fragi;
   ptrdiff_t              frag_offs;
+  unsigned               dc;
   src=_enc->state.ref_frame_data[OC_FRAME_IO];
   ref=_enc->state.ref_frame_data[
    _enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(_mb_mode)]];
@@ -1964,16 +1971,18 @@
     for(bi=0;bi<4;bi++){
       fragi=sb_map[bi];
       frag_offs=frag_buf_offs[fragi];
-      frag_satd[bi]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
-       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+      frag_satd[bi]=oc_enc_frag_satd2(_enc,&dc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride);
+      frag_satd[bi]+=dc;
     }
   }
   else{
     for(bi=0;bi<4;bi++){
       fragi=sb_map[bi];
       frag_offs=frag_buf_offs[fragi];
-      frag_satd[bi]=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
-       ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+      frag_satd[bi]=oc_enc_frag_satd(_enc,&dc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ystride);
+      frag_satd[bi]+=dc;
     }
   }
   mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
@@ -1988,8 +1997,9 @@
       bi=mapi&3;
       fragi=mb_map[pli][bi];
       frag_offs=frag_buf_offs[fragi];
-      frag_satd[mapii]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
-       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+      frag_satd[mapii]=oc_enc_frag_satd2(_enc,&dc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride);
+      frag_satd[mapii]+=dc;
     }
   }
   else{
@@ -1999,8 +2009,9 @@
       bi=mapi&3;
       fragi=mb_map[pli][bi];
       frag_offs=frag_buf_offs[fragi];
-      frag_satd[mapii]=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
-       ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+      frag_satd[mapii]=oc_enc_frag_satd(_enc,&dc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ystride);
+      frag_satd[mapii]+=dc;
     }
   }
   oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,_rd_scale,1);
@@ -2064,6 +2075,7 @@
   int                    bits0;
   int                    bits1;
   unsigned               satd;
+  unsigned               dc;
   src=_enc->state.ref_frame_data[OC_FRAME_IO];
   ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]];
   ystride=_enc->state.ref_ystride[0];
@@ -2081,14 +2093,14 @@
     frag_mvs[fragi][1]=(signed char)dy;
     frag_offs=frag_buf_offs[fragi];
     if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){
-      satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
-       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+      satd=oc_enc_frag_satd2(_enc,&dc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride);
     }
     else{
-      satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
-       ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+      satd=oc_enc_frag_satd(_enc,&dc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ystride);
     }
-    frag_satd[OC_MB_PHASE[_mbi&3][bi]]=satd;
+    frag_satd[OC_MB_PHASE[_mbi&3][bi]]=satd+dc;
   }
   oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd,
    _enc->vp3_compatible?OC_NOSKIP:_skip_ssd,_rd_scale,1);
@@ -2123,14 +2135,14 @@
     /*TODO: We could save half these calls by re-using the results for the Cb
        and Cr planes; is it worth it?*/
     if(oc_state_get_mv_offsets(&_enc->state,mv_offs,pli,dx,dy)>1){
-      satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
-       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+      satd=oc_enc_frag_satd2(_enc,&dc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride);
     }
     else{
-      satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
-       ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
+      satd=oc_enc_frag_satd(_enc,&dc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ystride);
     }
-    frag_satd[mapii]=satd;
+    frag_satd[mapii]=satd+dc;
   }
   oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,
    frag_satd,_skip_ssd,_rd_scale[4],1);

Modified: experimental/derf/theora-ptalarbvorm/lib/encfrag.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/encfrag.c	2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/encfrag.c	2010-01-29 21:18:10 UTC (rev 16852)
@@ -269,8 +269,9 @@
   }
 }
 
-unsigned oc_hadamard_sad_thresh(const ogg_int16_t _buf[64],unsigned _thresh){
+unsigned oc_hadamard_sad(unsigned *_dc,const ogg_int16_t _buf[64]){
   unsigned    sad;
+  unsigned    dc;
   int         t0;
   int         t1;
   int         t2;
@@ -281,7 +282,7 @@
   int         t7;
   int         r;
   int         i;
-  sad=0;
+  sad=dc=0;
   for(i=0;i<8;i++){
     /*Hadamard stage 1:*/
     t0=_buf[i*8+0]+_buf[i*8+4];
@@ -306,8 +307,8 @@
     t5+=t7;
     t7=r-t7;
     /*Hadamard stage 3:*/
-    r=abs(t0+t1);
-    r+=abs(t0-t1);
+    dc+=abs(t0+t1);
+    r=abs(t0-t1);
     r+=abs(t2+t3);
     r+=abs(t2-t3);
     r+=abs(t4+t5);
@@ -315,49 +316,46 @@
     r+=abs(t6+t7);
     r+=abs(t6-t7);
     sad+=r;
-    if(sad>_thresh)break;
   }
+  *_dc=dc;
   return sad;
 }
 
-unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc,
- const unsigned char *_src,const unsigned char *_ref,int _ystride,
- unsigned _thresh){
-  return (*_enc->opt_vtable.frag_satd_thresh)(_src,_ref,_ystride,_thresh);
+unsigned oc_enc_frag_satd(const oc_enc_ctx *_enc,unsigned *_dc,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride){
+  return (*_enc->opt_vtable.frag_satd)(_dc,_src,_ref,_ystride);
 }
 
-unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src,
- const unsigned char *_ref,int _ystride,unsigned _thresh){
+unsigned oc_enc_frag_satd_c(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
   ogg_int16_t buf[64];
   oc_diff_hadamard(buf,_src,_ref,_ystride);
-  return oc_hadamard_sad_thresh(buf,_thresh);
+  return oc_hadamard_sad(_dc,buf);
 }
 
-unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc,
+unsigned oc_enc_frag_satd2(const oc_enc_ctx *_enc,unsigned *_dc,
  const unsigned char *_src,const unsigned char *_ref1,
- const unsigned char *_ref2,int _ystride,unsigned _thresh){
-  return (*_enc->opt_vtable.frag_satd2_thresh)(_src,_ref1,_ref2,_ystride,
-   _thresh);
+ const unsigned char *_ref2,int _ystride){
+  return (*_enc->opt_vtable.frag_satd2)(_dc,_src,_ref1,_ref2,_ystride);
 }
 
-unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src,
- const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
- unsigned _thresh){
+unsigned oc_enc_frag_satd2_c(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride){
   ogg_int16_t buf[64];
   oc_diff_hadamard2(buf,_src,_ref1,_ref2,_ystride);
-  return oc_hadamard_sad_thresh(buf,_thresh);
+  return oc_hadamard_sad(_dc,buf);
 }
 
-unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,
+unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,unsigned *_dc,
  const unsigned char *_src,int _ystride){
-  return (*_enc->opt_vtable.frag_intra_satd)(_src,_ystride);
+  return (*_enc->opt_vtable.frag_intra_satd)(_dc,_src,_ystride);
 }
 
-unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride){
+unsigned oc_enc_frag_intra_satd_c(unsigned *_dc,
+ const unsigned char *_src,int _ystride){
   ogg_int16_t buf[64];
   oc_intra_hadamard(buf,_src,_ystride);
-  return oc_hadamard_sad_thresh(buf,UINT_MAX)
-   -abs(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]);
+  return oc_hadamard_sad(_dc,buf);
 }
 
 void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst,

Modified: experimental/derf/theora-ptalarbvorm/lib/encint.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/encint.h	2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/encint.h	2010-01-29 21:18:10 UTC (rev 16852)
@@ -82,12 +82,12 @@
   unsigned (*frag_sad2_thresh)(const unsigned char *_src,
    const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
    unsigned _thresh);
-  unsigned (*frag_satd_thresh)(const unsigned char *_src,
-   const unsigned char *_ref,int _ystride,unsigned _thresh);
-  unsigned (*frag_satd2_thresh)(const unsigned char *_src,
-   const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
-   unsigned _thresh);
-  unsigned (*frag_intra_satd)(const unsigned char *_src,int _ystride);
+  unsigned (*frag_satd)(unsigned *_dc,const unsigned char *_src,
+   const unsigned char *_ref,int _ystride);
+  unsigned (*frag_satd2)(unsigned *_dc,const unsigned char *_src,
+   const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
+  unsigned (*frag_intra_satd)(unsigned *_dc,const unsigned char *_src,
+   int _ystride);
   void     (*frag_sub)(ogg_int16_t _diff[64],const unsigned char *_src,
    const unsigned char *_ref,int _ystride);
   void     (*frag_sub_128)(ogg_int16_t _diff[64],
@@ -453,13 +453,12 @@
 unsigned oc_enc_frag_sad2_thresh(const oc_enc_ctx *_enc,
  const unsigned char *_src,const unsigned char *_ref1,
  const unsigned char *_ref2,int _ystride,unsigned _thresh);
-unsigned oc_enc_frag_satd_thresh(const oc_enc_ctx *_enc,
- const unsigned char *_src,const unsigned char *_ref,int _ystride,
- unsigned _thresh);
-unsigned oc_enc_frag_satd2_thresh(const oc_enc_ctx *_enc,
+unsigned oc_enc_frag_satd(const oc_enc_ctx *_enc,unsigned *_dc,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_satd2(const oc_enc_ctx *_enc,unsigned *_dc,
  const unsigned char *_src,const unsigned char *_ref1,
- const unsigned char *_ref2,int _ystride,unsigned _thresh);
-unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,
+ const unsigned char *_ref2,int _ystride);
+unsigned oc_enc_frag_intra_satd(const oc_enc_ctx *_enc,unsigned *_dc,
  const unsigned char *_src,int _ystride);
 void oc_enc_frag_copy2(const oc_enc_ctx *_enc,unsigned char *_dst,
  const unsigned char *_src1,const unsigned char *_src2,int _ystride);
@@ -486,12 +485,12 @@
 unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src,
  const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
  unsigned _thresh);
-unsigned oc_enc_frag_satd_thresh_c(const unsigned char *_src,
- const unsigned char *_ref,int _ystride,unsigned _thresh);
-unsigned oc_enc_frag_satd2_thresh_c(const unsigned char *_src,
- const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
- unsigned _thresh);
-unsigned oc_enc_frag_intra_satd_c(const unsigned char *_src,int _ystride);
+unsigned oc_enc_frag_satd_c(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_satd2_c(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
+unsigned oc_enc_frag_intra_satd_c(unsigned *_dc,const unsigned char *_src,
+ int _ystride);
 void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
 
 #endif

Modified: experimental/derf/theora-ptalarbvorm/lib/encode.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/encode.c	2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/encode.c	2010-01-29 21:18:10 UTC (rev 16852)
@@ -894,8 +894,8 @@
   _enc->opt_vtable.frag_sad=oc_enc_frag_sad_c;
   _enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_c;
   _enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_c;
-  _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_c;
-  _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_c;
+  _enc->opt_vtable.frag_satd=oc_enc_frag_satd_c;
+  _enc->opt_vtable.frag_satd2=oc_enc_frag_satd2_c;
   _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_c;
   _enc->opt_vtable.frag_sub=oc_enc_frag_sub_c;
   _enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_c;

Modified: experimental/derf/theora-ptalarbvorm/lib/mcenc.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/mcenc.c	2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/mcenc.c	2010-01-29 21:18:10 UTC (rev 16852)
@@ -177,13 +177,15 @@
  int _mvoffset0,int _mvoffset1,const unsigned char *_src,
  const unsigned char *_ref,int _ystride,unsigned _best_err){
   unsigned err;
+  unsigned dc;
   int      bi;
   err=0;
   for(bi=0;bi<4;bi++){
     ptrdiff_t frag_offs;
     frag_offs=_frag_buf_offs[_fragis[bi]];
-    err+=oc_enc_frag_satd2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0,
-     _ref+frag_offs+_mvoffset1,_ystride,_best_err-err);
+    err+=oc_enc_frag_satd2(_enc,&dc,_src+frag_offs,
+     _ref+frag_offs+_mvoffset0,_ref+frag_offs+_mvoffset1,_ystride);
+    err+=dc;
   }
   return err;
 }
@@ -219,9 +221,11 @@
   err=0;
   for(bi=0;bi<4;bi++){
     ptrdiff_t frag_offs;
+    unsigned  dc;
     frag_offs=_frag_buf_offs[_fragis[bi]];
-    err+=oc_enc_frag_satd_thresh(_enc,
-     _src+frag_offs,_ref+frag_offs+mvoffset,_ystride,UINT_MAX);
+    err+=oc_enc_frag_satd(_enc,&dc,
+     _src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
+    err+=dc;
   }
   return err;
 }
@@ -229,8 +233,11 @@
 static unsigned oc_mcenc_ysatd_check_bcandidate_fullpel(const oc_enc_ctx *_enc,
  ptrdiff_t _frag_offs,int _dx,int _dy,
  const unsigned char *_src,const unsigned char *_ref,int _ystride){
-  return oc_enc_frag_satd_thresh(_enc,
-   _src+_frag_offs,_ref+_frag_offs+_dx+_dy*_ystride,_ystride,UINT_MAX);
+  unsigned err;
+  unsigned dc;
+  err=oc_enc_frag_satd(_enc,&dc,
+   _src+_frag_offs,_ref+_frag_offs+_dx+_dy*_ystride,_ystride);
+  return err+dc;
 }
 
 /*Perform a motion vector search for this macro block against a single
@@ -704,6 +711,7 @@
   best_site=4;
   for(sitei=0;sitei<8;sitei++){
     unsigned err;
+    unsigned dc;
     int      site;
     int      xmask;
     int      ymask;
@@ -723,8 +731,9 @@
     ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
     mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
     mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
-    err=oc_enc_frag_satd2_thresh(_enc,_src,
-     _ref+mvoffset0,_ref+mvoffset1,_ystride,_best_err);
+    err=oc_enc_frag_satd2(_enc,&dc,_src,
+     _ref+mvoffset0,_ref+mvoffset1,_ystride);
+    err+=dc;
     if(err<_best_err){
       _best_err=err;
       best_site=site;

Modified: experimental/derf/theora-ptalarbvorm/lib/x86/mmxencfrag.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86/mmxencfrag.c	2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/x86/mmxencfrag.c	2010-01-29 21:18:10 UTC (rev 16852)
@@ -447,12 +447,14 @@
    mm6 = d2 c2 b2 a2 \
    mm7 = d3 c3 b3 a3*/ \
 
-static unsigned oc_int_frag_satd_thresh_mmxext(const unsigned char *_src,
- int _src_ystride,const unsigned char *_ref,int _ref_ystride,unsigned _thresh){
-  OC_ALIGN8(ogg_int16_t  buf[64]);
+static unsigned oc_int_frag_satd_mmxext(unsigned *_dc,
+ const unsigned char *_src,int _src_ystride,
+ const unsigned char *_ref,int _ref_ystride){
+  OC_ALIGN8(ogg_int16_t buf[64]);
   ogg_int16_t *bufp;
   unsigned     ret;
   unsigned     ret2;
+  unsigned     dc;
   bufp=buf;
   __asm__ __volatile__(
     OC_LOAD_SUB_8x4("0x00")
@@ -475,14 +477,18 @@
     "movq 0x20(%[buf]),%%mm2\n\t"
     "movq 0x30(%[buf]),%%mm3\n\t"
     "movq 0x00(%[buf]),%%mm0\n\t"
-    OC_HADAMARD_ABS_ACCUM_8x4("0x28","0x38")
+    /*We split out the stages here so we can save the DC coefficient in the
+       middle.*/
+    OC_HADAMARD_AB_8x4
+    OC_HADAMARD_C_ABS_ACCUM_A_8x4("0x28","0x38")
+    "movd %%mm1,%[dc]\n\t"
+    OC_HADAMARD_C_ABS_ACCUM_B_8x4("0x28","0x38")
     /*Up to this point, everything fit in 16 bits (8 input + 1 for the
        difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1
        for the factor of two we dropped + 3 for the vertical accumulation).
       Now we finally have to promote things to dwords.
       We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long
        latency of pmaddwd by starting the next series of loads now.*/
-    "mov %[thresh],%[ret2]\n\t"
     "pmaddwd %%mm7,%%mm0\n\t"
     "movq 0x50(%[buf]),%%mm1\n\t"
     "movq 0x58(%[buf]),%%mm5\n\t"
@@ -492,29 +498,27 @@
     "movq 0x68(%[buf]),%%mm6\n\t"
     "paddd %%mm0,%%mm4\n\t"
     "movq 0x70(%[buf]),%%mm3\n\t"
-    "movd %%mm4,%[ret]\n\t"
+    "movd %%mm4,%[ret2]\n\t"
     "movq 0x78(%[buf]),%%mm7\n\t"
     /*The sums produced by OC_HADAMARD_ABS_ACCUM_8x4 each have an extra 4
        added to them, and a factor of two removed; correct the final sum here.*/
-    "lea -32(%[ret],%[ret]),%[ret]\n\t"
     "movq 0x40(%[buf]),%%mm0\n\t"
-    "cmp %[ret2],%[ret]\n\t"
     "movq 0x48(%[buf]),%%mm4\n\t"
-    "jae 1f\n\t"
     OC_HADAMARD_ABS_ACCUM_8x4("0x68","0x78")
     "pmaddwd %%mm7,%%mm0\n\t"
-    /*There isn't much to stick in here to hide the latency this time, but the
-       alternative to pmaddwd is movq->punpcklwd->punpckhwd->paddd, whose
-       latency is even worse.*/
-    "sub $32,%[ret]\n\t"
+    /*Compute abs(dc).*/
+    "movsx %w[dc],%[ret]\n\t"
+    "cdq\n\t"
+    "add %[ret2],%[ret2]\n\t"
+    "add %[dc],%[ret]\n\t"
     "movq %%mm0,%%mm4\n\t"
     "punpckhdq %%mm0,%%mm0\n\t"
+    "xor %[ret],%[dc]\n\t"
     "paddd %%mm0,%%mm4\n\t"
-    "movd %%mm4,%[ret2]\n\t"
-    "lea (%[ret],%[ret2],2),%[ret]\n\t"
-    ".p2align 4,,15\n\t"
-    "1:\n\t"
-    /*Although it looks like we're using 7 registers here, gcc can alias %[ret]
+    "sub %[dc],%[ret2]\n\t"
+    "movd %%mm4,%[ret]\n\t"
+    "lea -64(%[ret2],%[ret],2),%[ret]\n\t"
+    /*Although it looks like we're using 8 registers here, gcc can alias %[ret]
        and %[ret2] with some of the inputs, since for once we don't write to
        them until after we're done using everything but %[buf] (which is also
        listed as an output to ensure gcc _doesn't_ alias them against it).*/
@@ -522,24 +526,24 @@
        constraints, otherewise if gcc can prove they're equal it will allocate
        them to the same register (which is bad); _src and _ref face a similar
        problem, though those are never actually the same.*/
-    :[ret]"=a"(ret),[ret2]"=r"(ret2),[buf]"+r"(bufp)
+    :[ret]"=a"(ret),[ret2]"=r"(ret2),[dc]"=d"(dc),[buf]"+r"(bufp)
     :[src]"r"(_src),[src_ystride]"c"((ptrdiff_t)_src_ystride),
-     [ref]"r"(_ref),[ref_ystride]"d"((ptrdiff_t)_ref_ystride),
-     [thresh]"m"(_thresh)
+     [ref]"r"(_ref),[ref_ystride]"d"((ptrdiff_t)_ref_ystride)
     /*We have to use neg, so we actually clobber the condition codes for once
        (not to mention cmp, sub, and add).*/
     :"cc"
   );
+  *_dc=dc;
   return ret;
 }
 
-unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src,
- const unsigned char *_ref,int _ystride,unsigned _thresh){
-  return oc_int_frag_satd_thresh_mmxext(_src,_ystride,_ref,_ystride,_thresh);
+unsigned oc_enc_frag_satd_mmxext(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+  return oc_int_frag_satd_mmxext(_dc,_src,_ystride,_ref,_ystride);
 }
 
 /*Our internal implementation of frag_copy2 takes an extra stride parameter so
-   we can share code with oc_enc_frag_satd2_thresh_mmxext().*/
+   we can share code with oc_enc_frag_satd2_mmxext().*/
 static void oc_int_frag_copy2_mmxext(unsigned char *_dst,int _dst_ystride,
  const unsigned char *_src1,const unsigned char *_src2,int _src_ystride){
   __asm__ __volatile__(
@@ -656,20 +660,20 @@
   );
 }
 
-unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src,
- const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
- unsigned _thresh){
+unsigned oc_enc_frag_satd2_mmxext(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride){
   OC_ALIGN8(unsigned char ref[64]);
   oc_int_frag_copy2_mmxext(ref,8,_ref1,_ref2,_ystride);
-  return oc_int_frag_satd_thresh_mmxext(_src,_ystride,ref,8,_thresh);
+  return oc_int_frag_satd_mmxext(_dc,_src,_ystride,ref,8);
 }
 
-unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,
- int _ystride){
-  OC_ALIGN8(ogg_int16_t  buf[64]);
+unsigned oc_enc_frag_intra_satd_mmxext(unsigned *_dc,
+ const unsigned char *_src,int _ystride){
+  OC_ALIGN8(ogg_int16_t buf[64]);
   ogg_int16_t *bufp;
   unsigned     ret;
   unsigned     ret2;
+  unsigned     dc;
   bufp=buf;
   __asm__ __volatile__(
     OC_LOAD_8x4("0x00")
@@ -696,7 +700,7 @@
        middle.*/
     OC_HADAMARD_AB_8x4
     OC_HADAMARD_C_ABS_ACCUM_A_8x4("0x28","0x38")
-    "movd %%mm1,%[ret]\n\t"
+    "movd %%mm1,%[dc]\n\t"
     OC_HADAMARD_C_ABS_ACCUM_B_8x4("0x28","0x38")
     /*Up to this point, everything fit in 16 bits (8 input + 1 for the
        difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1
@@ -714,32 +718,33 @@
     "movq 0x70(%[buf]),%%mm3\n\t"
     "paddd %%mm0,%%mm4\n\t"
     "movq 0x78(%[buf]),%%mm7\n\t"
-    "movd %%mm4,%[ret2]\n\t"
+    "movd %%mm4,%[ret]\n\t"
     "movq 0x40(%[buf]),%%mm0\n\t"
     "movq 0x48(%[buf]),%%mm4\n\t"
     OC_HADAMARD_ABS_ACCUM_8x4("0x68","0x78")
     "pmaddwd %%mm7,%%mm0\n\t"
     /*We assume that the DC coefficient is always positive (which is true,
        because the input to the INTRA transform was not a difference).*/
-    "movzx %w[ret],%[ret]\n\t"
-    "add %[ret2],%[ret2]\n\t"
-    "sub %[ret],%[ret2]\n\t"
+    "movzx %w[dc],%[dc]\n\t"
+    "add %[ret],%[ret]\n\t"
+    "sub %[dc],%[ret]\n\t"
     "movq %%mm0,%%mm4\n\t"
     "punpckhdq %%mm0,%%mm0\n\t"
     "paddd %%mm0,%%mm4\n\t"
-    "movd %%mm4,%[ret]\n\t"
-    "lea -64(%[ret2],%[ret],2),%[ret]\n\t"
-    /*Although it looks like we're using 7 registers here, gcc can alias %[ret]
+    "movd %%mm4,%[ret2]\n\t"
+    "lea -64(%[ret],%[ret2],2),%[ret]\n\t"
+    /*Although it looks like we're using 8 registers here, gcc can alias %[ret]
        and %[ret2] with some of the inputs, since for once we don't write to
        them until after we're done using everything but %[buf] (which is also
        listed as an output to ensure gcc _doesn't_ alias them against it).*/
-    :[ret]"=a"(ret),[ret2]"=r"(ret2),[buf]"+r"(bufp)
+    :[ret]"=a"(ret),[ret2]"=r"(ret2),[dc]"=r"(dc),[buf]"+r"(bufp)
     :[src]"r"(_src),[src4]"r"(_src+4*_ystride),
      [ystride]"r"((ptrdiff_t)_ystride),[ystride3]"r"((ptrdiff_t)3*_ystride)
     /*We have to use sub, so we actually clobber the condition codes for once
        (not to mention add).*/
     :"cc"
   );
+  *_dc=dc;
   return ret;
 }
 

Modified: experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.c	2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.c	2010-01-29 21:18:10 UTC (rev 16852)
@@ -35,8 +35,8 @@
     _enc->opt_vtable.frag_sad=oc_enc_frag_sad_mmxext;
     _enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_mmxext;
     _enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_mmxext;
-    _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_mmxext;
-    _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_mmxext;
+    _enc->opt_vtable.frag_satd=oc_enc_frag_satd_mmxext;
+    _enc->opt_vtable.frag_satd2=oc_enc_frag_satd2_mmxext;
     _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_mmxext;
     _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_mmxext;
   }

Modified: experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.h	2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/x86/x86enc.h	2010-01-29 21:18:10 UTC (rev 16852)
@@ -29,12 +29,12 @@
 unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src,
  const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
  unsigned _thresh);
-unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src,
- const unsigned char *_ref,int _ystride,unsigned _thresh);
-unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src,
- const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
- unsigned _thresh);
-unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,int _ystride);
+unsigned oc_enc_frag_satd_mmxext(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_satd2_mmxext(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
+unsigned oc_enc_frag_intra_satd_mmxext(unsigned *_dc,
+ const unsigned char *_src,int _ystride);
 void oc_enc_frag_sub_mmx(ogg_int16_t _diff[64],
  const unsigned char *_x,const unsigned char *_y,int _stride);
 void oc_enc_frag_sub_128_mmx(ogg_int16_t _diff[64],

Modified: experimental/derf/theora-ptalarbvorm/lib/x86_vc/mmxencfrag.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86_vc/mmxencfrag.c	2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/x86_vc/mmxencfrag.c	2010-01-29 21:18:10 UTC (rev 16852)
@@ -468,12 +468,14 @@
     mm7 = d3 c3 b3 a3*/ \
 }
 
-static unsigned oc_int_frag_satd_thresh_mmxext(const unsigned char *_src,
- int _src_ystride,const unsigned char *_ref,int _ref_ystride,unsigned _thresh){
-  OC_ALIGN8(ogg_int16_t  buf[64]);
-  ogg_int16_t           *bufp;
-  unsigned               ret1;
-  unsigned               ret2;
+static unsigned oc_int_frag_satd_mmxext(unsigned *_dc,
+ const unsigned char *_src,int _src_ystride,
+ const unsigned char *_ref,int _ref_ystride){
+  OC_ALIGN8(ogg_int16_t buf[64]);
+  ogg_int16_t *bufp;
+  unsigned     ret1;
+  unsigned     ret2;
+  unsigned     dc;
   bufp=buf;
   __asm{
 #define SRC esi
@@ -482,7 +484,9 @@
 #define REF_YSTRIDE edx
 #define BUF edi
 #define RET eax
-#define RET2 edx
+#define RET2 ecx
+#define DC edx
+#define DC_WORD dx
     mov SRC,_src
     mov SRC_YSTRIDE,_src_ystride
     mov REF,_ref
@@ -508,14 +512,18 @@
     movq mm2,[0x20+BUF]
     movq mm3,[0x30+BUF]
     movq mm0,[0x00+BUF]
-    OC_HADAMARD_ABS_ACCUM_8x4(0x28,0x38)
+    /*We split out the stages here so we can save the DC coefficient in the
+       middle.*/
+    OC_HADAMARD_AB_8x4
+    OC_HADAMARD_C_ABS_ACCUM_A_8x4(0x28,0x38)
+    movd DC,mm1
+    OC_HADAMARD_C_ABS_ACCUM_B_8x4(0x28,0x38)
     /*Up to this point, everything fit in 16 bits (8 input + 1 for the
        difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1
        for the factor of two we dropped + 3 for the vertical accumulation).
       Now we finally have to promote things to dwords.
       We break this part out of OC_HADAMARD_ABS_ACCUM_8x4 to hide the long
        latency of pmaddwd by starting the next series of loads now.*/
-    mov RET2,_thresh
     pmaddwd mm0,mm7
     movq mm1,[0x50+BUF]
     movq mm5,[0x58+BUF]
@@ -525,29 +533,29 @@
     movq mm6,[0x68+BUF]
     paddd mm4,mm0
     movq mm3,[0x70+BUF]
-    movd RET,mm4
+    movd RET2,mm4
     movq mm7,[0x78+BUF]
     /*The sums produced by OC_HADAMARD_ABS_ACCUM_8x4 each have an extra 4
        added to them, and a factor of two removed; correct the final sum here.*/
     lea RET,[RET+RET-32]
     movq mm0,[0x40+BUF]
-    cmp RET,RET2
     movq mm4,[0x48+BUF]
-    jae at_end
     OC_HADAMARD_ABS_ACCUM_8x4(0x68,0x78)
     pmaddwd mm0,mm7
-    /*There isn't much to stick in here to hide the latency this time, but the
-       alternative to pmaddwd is movq->punpcklwd->punpckhwd->paddd, whose
-       latency is even worse.*/
-    sub RET,32
+    /*Compute abs(dc).*/
+    movsx RET,DC_WORD
+    cdq
+    add RET2,RET2
+    add RET,DC
     movq mm4,mm0
     punpckhdq mm0,mm0
+    xor DC,RET
     paddd mm4,mm0
-    movd RET2,mm4
-    lea RET,[RET+RET2*2]
-    align 16
-at_end:
+    sub RET2,DC
+    movd RET,mm4
+    lea RET,[RET2+RET*2-64]
     mov ret1,RET
+    mov dc,DC
 #undef SRC
 #undef REF
 #undef SRC_YSTRIDE
@@ -555,18 +563,21 @@
 #undef BUF
 #undef RET
 #undef RET2
+#undef DC
+#undef DC_WORD
   }
+  *_dc=dc;
   return ret1;
 }
 
-unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src,
- const unsigned char *_ref,int _ystride,unsigned _thresh){
-  return oc_int_frag_satd_thresh_mmxext(_src,_ystride,_ref,_ystride,_thresh);
+unsigned oc_enc_frag_satd_mmxext(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+  return oc_int_frag_satd_mmxext(_dc,_src,_ystride,_ref,_ystride);
 }
 
 
 /*Our internal implementation of frag_copy2 takes an extra stride parameter so
-   we can share code with oc_enc_frag_satd2_thresh_mmxext().*/
+   we can share code with oc_enc_frag_satd2_mmxext().*/
 static void oc_int_frag_copy2_mmxext(unsigned char *_dst,int _dst_ystride,
  const unsigned char *_src1,const unsigned char *_src2,int _src_ystride){
   __asm{
@@ -694,30 +705,31 @@
   }
 }
 
-unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src,
- const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
- unsigned _thresh){
+unsigned oc_enc_frag_satd2_mmxext(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride){
   OC_ALIGN8(unsigned char ref[64]);
   oc_int_frag_copy2_mmxext(ref,8,_ref1,_ref2,_ystride);
-  return oc_int_frag_satd_thresh_mmxext(_src,_ystride,ref,8,_thresh);
+  return oc_int_frag_satd_thresh_mmxext(dc,_src,_ystride,ref,8);
 }
 
-unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,
+unsigned oc_enc_frag_intra_satd_mmxext(unsigned *_dc,const unsigned char *_src,
  int _ystride){
-  OC_ALIGN8(ogg_int16_t  buf[64]);
-  ogg_int16_t           *bufp;
-  unsigned               ret1;
-  unsigned               ret2;
+  OC_ALIGN8(ogg_int16_t buf[64]);
+  ogg_int16_t *bufp;
+  unsigned     ret1;
+  unsigned     ret2;
+  unsigned     dc;
   bufp=buf;
   __asm{
 #define SRC eax
 #define SRC4 esi
 #define BUF edi
+#define YSTRIDE edx
+#define YSTRIDE3 ecx
 #define RET eax
-#define RET_WORD ax
 #define RET2 ecx
-#define YSTRIDE edx
-#define YSTRIDE3 ecx
+#define DC edx
+#define DC_WORD dx
     mov SRC,_src
     mov BUF,bufp
     mov YSTRIDE,_ystride
@@ -749,7 +761,7 @@
       middle.*/
     OC_HADAMARD_AB_8x4
     OC_HADAMARD_C_ABS_ACCUM_A_8x4(0x28,0x38)
-    movd RET,mm1
+    movd DC,mm1
     OC_HADAMARD_C_ABS_ACCUM_B_8x4(0x28,0x38)
     /*Up to this point, everything fit in 16 bits (8 input + 1 for the
       difference + 2*3 for the two 8-point 1-D Hadamards - 1 for the abs - 1
@@ -767,31 +779,34 @@
     movq mm3,[0x70+BUF]
     paddd mm4,mm0
     movq mm7,[0x78+BUF]
-    movd RET2,mm4
+    movd RET,mm4
     movq mm0,[0x40+BUF]
     movq mm4,[0x48+BUF]
     OC_HADAMARD_ABS_ACCUM_8x4(0x68,0x78)
     pmaddwd mm0,mm7
     /*We assume that the DC coefficient is always positive (which is true,
     because the input to the INTRA transform was not a difference).*/
-    movzx RET,RET_WORD
-    add RET2,RET2
-    sub RET2,RET
+    movzx DC,DC_WORD
+    add RET,RET
+    sub DC,RET
     movq mm4,mm0
     punpckhdq mm0,mm0
     paddd mm4,mm0
-    movd RET,mm4
-    lea RET,[-64+RET2+RET*2]
+    movd RET2,mm4
+    lea RET,[-64+RET+RET2*2]
+    mov [dc],DC
     mov [ret1],RET
 #undef SRC
 #undef SRC4
 #undef BUF
+#undef YSTRIDE
+#undef YSTRIDE3
 #undef RET
-#undef RET_WORD
 #undef RET2
-#undef YSTRIDE
-#undef YSTRIDE3
+#undef DC
+#undef DC_WORD
   }
+  *_dc=dc;
   return ret1;
 }
 

Modified: experimental/derf/theora-ptalarbvorm/lib/x86_vc/x86enc.c
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86_vc/x86enc.c	2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/x86_vc/x86enc.c	2010-01-29 21:18:10 UTC (rev 16852)
@@ -35,8 +35,8 @@
     _enc->opt_vtable.frag_sad=oc_enc_frag_sad_mmxext;
     _enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_mmxext;
     _enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_mmxext;
-    _enc->opt_vtable.frag_satd_thresh=oc_enc_frag_satd_thresh_mmxext;
-    _enc->opt_vtable.frag_satd2_thresh=oc_enc_frag_satd2_thresh_mmxext;
+    _enc->opt_vtable.frag_satd=oc_enc_frag_satd_mmxext;
+    _enc->opt_vtable.frag_satd2=oc_enc_frag_satd2_mmxext;
     _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_mmxext;
     _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_mmxext;
   }

Modified: experimental/derf/theora-ptalarbvorm/lib/x86_vc/x86enc.h
===================================================================
--- experimental/derf/theora-ptalarbvorm/lib/x86_vc/x86enc.h	2010-01-29 14:35:34 UTC (rev 16851)
+++ experimental/derf/theora-ptalarbvorm/lib/x86_vc/x86enc.h	2010-01-29 21:18:10 UTC (rev 16852)
@@ -29,12 +29,12 @@
 unsigned oc_enc_frag_sad2_thresh_mmxext(const unsigned char *_src,
  const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
  unsigned _thresh);
-unsigned oc_enc_frag_satd_thresh_mmxext(const unsigned char *_src,
- const unsigned char *_ref,int _ystride,unsigned _thresh);
-unsigned oc_enc_frag_satd2_thresh_mmxext(const unsigned char *_src,
- const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
- unsigned _thresh);
-unsigned oc_enc_frag_intra_satd_mmxext(const unsigned char *_src,int _ystride);
+unsigned oc_enc_frag_satd_mmxext(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_satd2_mmxext(unsigned *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
+unsigned oc_enc_frag_intra_satd_mmxext(unsigned *_dc,
+ const unsigned char *_src,int _ystride);
 void oc_enc_frag_sub_mmx(ogg_int16_t _diff[64],
  const unsigned char *_x,const unsigned char *_y,int _stride);
 void oc_enc_frag_sub_128_mmx(ogg_int16_t _diff[64],



More information about the commits mailing list