[xiph-commits] r14652 - branches/theora-thusnelda/lib/enc

xiphmont at svn.xiph.org xiphmont at svn.xiph.org
Wed Apr 2 15:38:58 PDT 2008


Author: xiphmont
Date: 2008-04-02 15:38:58 -0700 (Wed, 02 Apr 2008)
New Revision: 14652

Modified:
   branches/theora-thusnelda/lib/enc/codec_internal.h
   branches/theora-thusnelda/lib/enc/mcenc.c
   branches/theora-thusnelda/lib/enc/mode.c
Log:
Second attempt at halfpel opt; not the win I thought it would be, also
carries a bitrate penalty.  



Modified: branches/theora-thusnelda/lib/enc/codec_internal.h
===================================================================
--- branches/theora-thusnelda/lib/enc/codec_internal.h	2008-04-02 21:31:38 UTC (rev 14651)
+++ branches/theora-thusnelda/lib/enc/codec_internal.h	2008-04-02 22:38:58 UTC (rev 14652)
@@ -328,12 +328,23 @@
 extern void oc_mcenc_start(CP_INSTANCE *cpi,
 			   mc_state *mcenc);
 
-extern void oc_mcenc_search(CP_INSTANCE *cpi, 
-			    mc_state *_mcenc,
-			    int _mbi,
-			    int _goldenp,
-			    mv_t *_bmvs);
+extern int oc_mcenc_search(CP_INSTANCE *cpi, 
+			   mc_state *_mcenc,
+			   int _mbi,
+			   int _goldenp,
+			   mv_t *_bmvs,
+			   int *best_err,
+			   int best_block_err[4]);
 
+extern void oc_mcenc_refine1mv(CP_INSTANCE *cpi, 
+			      int _mbi,
+			      int _goldenp,
+			      int err);
+
+extern void oc_mcenc_refine4mv(CP_INSTANCE *cpi, 
+			      int _mbi,
+			      int err[4]);
+
 extern int PickModes(CP_INSTANCE *cpi, int recode);
 
 extern void InitFrameInfo(CP_INSTANCE *cpi);

Modified: branches/theora-thusnelda/lib/enc/mcenc.c
===================================================================
--- branches/theora-thusnelda/lib/enc/mcenc.c	2008-04-02 21:31:38 UTC (rev 14651)
+++ branches/theora-thusnelda/lib/enc/mcenc.c	2008-04-02 22:38:58 UTC (rev 14652)
@@ -246,7 +246,6 @@
 }
 
 static int oc_mcenc_ysad_halfpel_mbrefine(CP_INSTANCE *cpi, 
-					  mc_state *_mcenc,
 					  int _mbi,
 					  mv_t *_vec,
 					  int _best_err,
@@ -278,11 +277,7 @@
     site=OC_SQUARE_SITES[0][sitei];
     dx=OC_SQUARE_DX[site];
     dy=OC_SQUARE_DY[site];
-    /*The following code SHOULD be equivalent to
-      oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
-       (_vec->x<<1)+dx,(_vec->y<<1)+dy,ref_ystride,0);
-      However, it should also be much faster, as it involves no multiplies and
-       doesn't have to handle chroma vectors.*/
+
     xmask=-((((_vec->x<<1)+dx)^dx)<0);
     ymask=-((((_vec->y<<1)+dy)^dy)<0);
     mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
@@ -301,7 +296,6 @@
 }
 
 static int oc_mcenc_ysad_halfpel_brefine(CP_INSTANCE *cpi, 
-					 mc_state *_mcenc,
 					 int _mbi,
 					 int _bi,
 					 mv_t *_vec,
@@ -341,11 +335,7 @@
     site=OC_SQUARE_SITES[0][sitei];
     dx=OC_SQUARE_DX[site];
     dy=OC_SQUARE_DY[site];
-    /*The following code SHOULD be equivalent to
-      oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
-       (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
-      However, it should also be much faster, as it involves no multiplies and
-       doesn't have to handle chroma vectors.*/
+
     xmask=-((((_vec->x<<1)+dx)^dx)<0);
     ymask=-((((_vec->y<<1)+dy)^dy)<0);
     mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
@@ -378,11 +368,13 @@
    _frame:    The frame to search, either OC_FRAME_PREV or OC_FRAME_GOLD.
    _bmvs:     Returns the individual block motion vectors. */
 
-void oc_mcenc_search(CP_INSTANCE *cpi, 
-		     mc_state *_mcenc,
-		     int _mbi,
-		     int _goldenp,
-		     mv_t *_bmvs){
+int oc_mcenc_search(CP_INSTANCE *cpi, 
+		    mc_state *_mcenc,
+		    int _mbi,
+		    int _goldenp,
+		    mv_t *_bmvs,
+		    int *best_err,
+		    int best_block_err[4]){
   
   /*TODO: customize error function for speed/(quality+size) tradeoff.*/
 
@@ -390,8 +382,6 @@
   ogg_int32_t     hitbit;
   int             block_err[4];
   mv_t            best_vec;
-  int             best_err;
-  int             best_block_err[4];
   mv_t            best_block_vec[4];
   mv_t            cand;
   int             bi;
@@ -407,16 +397,17 @@
   /*Start with the median predictor.*/
   cand=_mcenc->candidates[0];
   hit_cache[cand.y+15]|=(ogg_int32_t)1<<cand.x+15;
-  best_err = oc_mcenc_ysad_check_mbcandidate_fullpel(cpi,_mcenc,_mbi,cand,
+  *best_err = oc_mcenc_ysad_check_mbcandidate_fullpel(cpi,_mcenc,_mbi,cand,
 						     _goldenp,block_err);
   best_vec=cand;
-  for(bi=0;bi<4;bi++){
-    best_block_err[bi]=block_err[bi];
-    best_block_vec[bi]=cand;
-  }
-
+  if(_bmvs)
+    for(bi=0;bi<4;bi++){
+      best_block_err[bi]=block_err[bi];
+      best_block_vec[bi]=cand;
+    }
+  
   /*If this predictor fails, move on to set A.*/
-  if(best_err>OC_YSAD_THRESH1){
+  if(*best_err>OC_YSAD_THRESH1){
     int err;
     int ci;
     int ncs;
@@ -438,18 +429,19 @@
       if(hit_cache[cand.y+15]&hitbit)continue;
       hit_cache[cand.y+15]|=hitbit;
       err=oc_mcenc_ysad_check_mbcandidate_fullpel(cpi,_mcenc,_mbi,cand,_goldenp,block_err);
-      if(err<best_err){
-        best_err=err;
+      if(err<*best_err){
+        *best_err=err;
         best_vec=cand;
       }
-      for(bi=0;bi<4;bi++)
-	if(block_err[bi]<best_block_err[bi]){
-	  best_block_err[bi]=block_err[bi];
-	  best_block_vec[bi]=cand;
-	}
+      if(_bmvs)
+	for(bi=0;bi<4;bi++)
+	  if(block_err[bi]<best_block_err[bi]){
+	    best_block_err[bi]=block_err[bi];
+	    best_block_vec[bi]=cand;
+	  }
     }
 
-    if(best_err>t2){
+    if(*best_err>t2){
       /*Examine the candidates in set B.*/
       for(;ci<_mcenc->ncandidates;ci++){
         cand=_mcenc->candidates[ci];
@@ -457,19 +449,20 @@
         if(hit_cache[cand.y+15]&hitbit)continue;
         hit_cache[cand.y+15]|=hitbit;
         err=oc_mcenc_ysad_check_mbcandidate_fullpel(cpi,_mcenc,_mbi,cand,_goldenp,block_err);
-        if(err<best_err){
-          best_err=err;
+        if(err<*best_err){
+          *best_err=err;
           best_vec=cand;
         }
-        for(bi=0;bi<4;bi++)
-	  if(block_err[bi]<best_block_err[bi]){
-	    best_block_err[bi]=block_err[bi];
-	    best_block_vec[bi]=cand;
-	  }
+	if(_bmvs)
+	  for(bi=0;bi<4;bi++)
+	    if(block_err[bi]<best_block_err[bi]){
+	      best_block_err[bi]=block_err[bi];
+	      best_block_vec[bi]=cand;
+	    }
       }
 
       /*Use the same threshold for set B as in set A.*/
-      if(best_err>t2){
+      if(*best_err>t2){
         int best_site;
         int nsites;
         int sitei;
@@ -490,15 +483,16 @@
             if(hit_cache[cand.y+15]&hitbit)continue;
             hit_cache[cand.y+15]|=hitbit;
             err=oc_mcenc_ysad_check_mbcandidate_fullpel(cpi,_mcenc,_mbi,cand,_goldenp,block_err);
-            if(err<best_err){
-              best_err=err;
+            if(err<*best_err){
+              *best_err=err;
               best_site=site;
             }
-            for(bi=0;bi<4;bi++)
-	      if(block_err[bi]<best_block_err[bi]){
-		best_block_err[bi]=block_err[bi];
-		best_block_vec[bi]=cand;
-	      }
+	    if(_bmvs)
+	      for(bi=0;bi<4;bi++)
+		if(block_err[bi]<best_block_err[bi]){
+		  best_block_err[bi]=block_err[bi];
+		  best_block_vec[bi]=cand;
+		}
           }
           if(best_site==4)break;
           best_vec.x+=OC_SQUARE_DX[best_site];
@@ -551,8 +545,8 @@
 		  if(hit_cache[cand.y+15]&hitbit)continue;
 		  hit_cache[cand.y+15]|=hitbit;
 		  err=oc_mcenc_ysad_check_mbcandidate_fullpel(cpi,_mcenc,_mbi,cand,_goldenp,block_err);
-		  if(err<best_err){
-		    best_err=err;
+		  if(err<*best_err){
+		    *best_err=err;
 		    best_vec=cand;
 		    bflag=1;
 		  }
@@ -571,32 +565,50 @@
     }
   }
 
-  {
+  if(!_goldenp) 
+    mb->aerror = *best_err;
+  mb->analysis_mv[0][_goldenp].x=best_vec.x<<1;;
+  mb->analysis_mv[0][_goldenp].y=best_vec.y<<1;;
 
-    int error=oc_mcenc_ysad_halfpel_mbrefine(cpi,_mcenc,_mbi,&best_vec,best_err,_goldenp);
-    if(!_goldenp) mb->aerror = error;
-    mb->analysis_mv[0][_goldenp]=best_vec;
-
-    if(_bmvs){
-      if(bflag){
-	for(bi=0;bi<4;bi++){
-	  oc_mcenc_ysad_halfpel_brefine(cpi,_mcenc,_mbi,bi,
-					&best_block_vec[bi],
-					best_block_err[bi],
-					_goldenp);
-	  _bmvs[bi]=best_block_vec[bi];
-	}
-      }else{
-	for(bi=0;bi<4;bi++){
-	  _bmvs[bi].x=best_block_vec[bi].x<<1;
-	  _bmvs[bi].y=best_block_vec[bi].y<<1;
-	}
-      }
+  if(_bmvs && bflag){
+    for(bi=0;bi<4;bi++){
+      _bmvs[bi].x=best_block_vec[bi].x<<1;
+      _bmvs[bi].y=best_block_vec[bi].y<<1;
     }
   }
+
+  return bflag;
 }
 
 
+void oc_mcenc_refine1mv(CP_INSTANCE *cpi, 
+		       int _mbi,
+		       int _goldenp,
+		       int err){
+
+  macroblock_t *mb = &cpi->macro[_mbi];
+  mv_t mv;
+  mv.x = mb->analysis_mv[0][_goldenp].x>>1;
+  mv.y = mb->analysis_mv[0][_goldenp].y>>1;
+  
+  oc_mcenc_ysad_halfpel_mbrefine(cpi,_mbi,&mv,err,_goldenp);
+  mb->analysis_mv[0][_goldenp]=mv;
+}
+
+void oc_mcenc_refine4mv(CP_INSTANCE *cpi, 
+		       int _mbi,
+		       int err[4]){
+  macroblock_t *mb = &cpi->macro[_mbi];
+  int bi;
+  for(bi=0;bi<4;bi++){
+    mv_t mv;
+    mv.x = mb->mv[bi].x>>1;
+    mv.y = mb->mv[bi].y>>1;
+    oc_mcenc_ysad_halfpel_brefine(cpi,_mbi,bi,&mv,err[bi],0);
+    mb->mv[bi]=mv;
+  }
+}
+
 void oc_mcenc_start(CP_INSTANCE *cpi,
                     mc_state *mcenc){
 

Modified: branches/theora-thusnelda/lib/enc/mode.c
===================================================================
--- branches/theora-thusnelda/lib/enc/mode.c	2008-04-02 21:31:38 UTC (rev 14651)
+++ branches/theora-thusnelda/lib/enc/mode.c	2008-04-02 22:38:58 UTC (rev 14652)
@@ -570,6 +570,10 @@
       int mb_4mv_bits_0;
       int mb_4mv_bits_1;
       int mode,bi;
+      int aerror;
+      int gerror;
+      int block_err[4];
+      int flag4mv=0;
 
       macroblock_t *mb = &cpi->macro[mbi];
 
@@ -580,7 +584,7 @@
 	memmove(mb->analysis_mv+1,mb->analysis_mv,2*sizeof(mb->analysis_mv[0]));
 	
 	/* basic 1MV search always done for all macroblocks, coded or not, keyframe or not */
-	oc_mcenc_search(cpi, &mcenc, mbi, 0, mb->mv);
+	flag4mv = oc_mcenc_search(cpi, &mcenc, mbi, 0, mb->mv, &aerror, block_err);
 	
 	/* replace the block MVs for not-coded blocks with (0,0).*/   
 	mb->coded = 0;
@@ -593,7 +597,7 @@
 	}
 	
 	/* search golden frame */
-	oc_mcenc_search(cpi, &mcenc, mbi, 1, NULL);
+	oc_mcenc_search(cpi, &mcenc, mbi, 1, NULL, &gerror, NULL);
 	
       }
 
@@ -619,12 +623,31 @@
 	cost[CODE_INTER_PRIOR_LAST] = cost_inter(cpi, qi, mbi, prior_mv, CODE_INTER_PRIOR_LAST);
 	cost[CODE_USING_GOLDEN] = cost_inter(cpi, qi, mbi, (mv_t){0,0},CODE_USING_GOLDEN);
 	cost[CODE_GOLDEN_MV] = cost_inter1mv(cpi, qi, mbi, 1, &mb_gmv_bits_0);
-	cost[CODE_INTER_FOURMV] = cost_inter4mv(cpi, qi, mbi, &mb_4mv_bits_0, &mb_4mv_bits_1);
+	if(flag4mv)
+	  cost[CODE_INTER_FOURMV] = cost_inter4mv(cpi, qi, mbi, &mb_4mv_bits_0, &mb_4mv_bits_1);
+	else
+	  cost[CODE_INTER_FOURMV] = 99999999;
 	
 	/* train this too... because the bit cost of an MV should be
 	   considered in the context of LAST_MV and PRIOR_LAST. */
 	cost[CODE_INTER_PLUS_MV] -= 384;
 	
+	/* the explicit MV modes (2,6,7) have not yet gone through
+	   halfpel refinement as it's a relatively expensive
+	   operation.  We choose the explicit mv mode that's already
+	   furthest ahead on bits and refine only that one */
+	if(flag4mv && cost[CODE_INTER_FOURMV]<cost[CODE_INTER_PLUS_MV] && cost[CODE_INTER_FOURMV]<cost[CODE_GOLDEN_MV]){
+	  oc_mcenc_refine4mv(cpi, mbi, block_err);
+	  cost[CODE_INTER_FOURMV] = cost_inter4mv(cpi, qi, mbi, &mb_4mv_bits_0, &mb_4mv_bits_1);
+	}else if (cost[CODE_GOLDEN_MV]<cost[CODE_INTER_PLUS_MV]){
+	  oc_mcenc_refine1mv(cpi, mbi, 1, gerror);
+	  cost[CODE_GOLDEN_MV] = cost_inter1mv(cpi, qi, mbi, 1, &mb_gmv_bits_0);
+	}else{
+	  oc_mcenc_refine1mv(cpi, mbi, 0, aerror);
+	  cost[CODE_INTER_PLUS_MV] = cost_inter1mv(cpi, qi, mbi, 0, &mb_mv_bits_0);
+	  cost[CODE_INTER_PLUS_MV] -= 384;
+	}
+
 	/* Finally, pick the mode with the cheapest estimated bit cost.*/
 	mode=0;
 	for(i=1;i<8;i++)



More information about the commits mailing list