[xiph-commits] r14876 - trunk/cdparanoia/paranoia

Mon May 12 14:20:52 PDT 2008

Author: xiphmont
Date: 2008-05-12 14:20:49 -0700 (Mon, 12 May 2008)
New Revision: 14876

Modified:
   trunk/cdparanoia/paranoia/gap.c
   trunk/cdparanoia/paranoia/overlap.c
   trunk/cdparanoia/paranoia/paranoia.c
Log:
Large batch of potential bugfixes and general code-review comments
from Peter Creath.  Each has been reviewed, but not yet tested.



Modified: trunk/cdparanoia/paranoia/gap.c
===================================================================

--- trunk/cdparanoia/paranoia/gap.c	2008-05-12 19:19:13 UTC (rev 14875)
+++ trunk/cdparanoia/paranoia/gap.c	2008-05-12 21:20:49 UTC (rev 14876)
@@ -36,13 +36,6 @@
   for(;beginA>=0 && beginB>=0;beginA--,beginB--)
     if(buffA[beginA]!=buffB[beginB])break;
 
-  /* These values will either point to the first mismatching sample, or
-   * -1 if we hit the beginning of a vector.  So increment to point to the
-   * last matching sample.
-   */
-  beginA++;
-  beginB++;
-  
   return(offsetA-beginA);
 }
 
@@ -180,7 +173,7 @@
    * to fix the rift.
    */
   
-  for(i=0;;i++){
+  for(i=1;;i++){
     /* Search for whatever case we hit first, so as to end up with the
      * smallest rift.
      */
@@ -335,7 +328,7 @@
    * to fix the rift.
    */
   
-  for(i=0;;i++){
+  for(i=1;;i++){
     /* Search for whatever case we hit first, so as to end up with the
      * smallest rift.
      */
@@ -414,7 +407,16 @@
      *
      * If the rift doesn't match the good samples in A (and hence in B),
      * it's not a stutter, and the rift should be inserted into A.
+     *
+     * ???BUG??? It's possible for aoffset+1+*matchA to be > sizeA, in
+     * which case the comparison in i_stutter_or_gap() will extend beyond
+     * the bounds of A.  Thankfully, this isn't writing data and thus
+     * trampling memory, but it's still a memory access error that should
+     * be fixed.
+     *
+     * This bug is not fixed yet.
      */
+
     if(i_stutter_or_gap(A,B,aoffset+1,boffset-*matchA+1,*matchA))
       return;
 

Modified: trunk/cdparanoia/paranoia/overlap.c
===================================================================
--- trunk/cdparanoia/paranoia/overlap.c	2008-05-12 19:19:13 UTC (rev 14875)
+++ trunk/cdparanoia/paranoia/overlap.c	2008-05-12 21:20:49 UTC (rev 14876)
@@ -186,6 +186,21 @@
  * compensation settings (e.g. dynoverlap).  This allows us to narrow our
  * search for matching runs (in both stages) in low-jitter conditions
  * and also widen our search appropriately when there is jitter.
+ *
+ *
+ * "???BUG???:
+ * Note that there is a bug in the way that this is called by try_sort_sync().
+ * Silence looks like zero jitter, and dynoverlap may be incorrectly reduced
+ * when there's lots of silence but also jitter."
+ *
+ * Strictly speaking, this is only sort-of true.  The silence will
+ * incorrectly somewhat reduce dynoverlap.  However, it will increase
+ * again once past the silence (even if reduced to zero, it will be
+ * increased by the block read loop if we're not getting matches).
+ * In reality, silence usually passes rapidly.  Anyway, long streaks
+ * of silence benefit performance-wise from having dynoverlap decrease
+ * momentarily. There is no correctness bug. --Monty
+ *
  */
 void offset_add_value(cdrom_paranoia *p,offsets *o,long value,
 			     void(*callback)(long,int)){

Modified: trunk/cdparanoia/paranoia/paranoia.c
===================================================================
--- trunk/cdparanoia/paranoia/paranoia.c	2008-05-12 19:19:13 UTC (rev 14875)
+++ trunk/cdparanoia/paranoia/paranoia.c	2008-05-12 21:20:49 UTC (rev 14876)
@@ -194,9 +194,13 @@
   for(;beginA>=0 && beginB>=0;beginA--,beginB--){
     if(buffA[beginA]!=buffB[beginB])break;
 
-    /* don't allow matching across matching sector boundaries */
-    /* Stop if both samples were at the edges of a low-level read.
-     */
+    /* don't allow matching across matching sector boundaries. The
+       liklihood of the drive skipping identically in two
+       different reads with the same sector read boundary is actually
+       relatively very high compared to the liklihood of it skipping
+       when one read is continuous across the boundary and other was
+       discontinuous */
+    /* Stop if both samples were at the edges of a low-level read. */
     if((flagsA[beginA]&flagsB[beginB]&FLAGS_EDGE)){
       beginA--;
       beginB--;
@@ -373,6 +377,7 @@
     if(do_const_sync(B,A,Aflags,
 		     post-cb(B),ipos(A,ptr),
 		     begin,end,offset)){
+
       offset_add_value(p,&(p->stage1),*offset,callback);
       return(1);
     }
@@ -438,6 +443,15 @@
 
   /* Provide feedback via the callback about the samples we've just
    * verified.
+   *
+   * "???: How can matchbegin ever be < cb(old)?"
+   *      Sorry, old bulletproofing habit.  I often use <= to mean "not >" 
+   *      --Monty
+   *
+   * "???: Why do edge samples get logged only when there's jitter
+   * between the matched runs (matchoffset != 0)?"
+   *      FIXUP_EDGE is actually logging a jitter event, not a rift-- 
+   *      a rift is FIXUP_ATOM --Monty
    */
   if(matchbegin-matchoffset<=cb(new) ||
      matchbegin<=cb(old) ||
@@ -549,6 +563,18 @@
 
   long matchbegin=-1,matchend=-1,matchoffset;
 
+  /* "???: Why do we limit our search only to the samples with overlapping
+   * absolute positions?  It could be because it eliminates some further
+   * bounds checking."
+   *  Short answer is yes --Monty
+   *
+   * "Why do we "no longer try to spread the ... search" as mentioned
+   * below?"  
+   * The search is normally much faster without the spread,
+   * even in heavy jitter.  Dynoverlap tends to be a bigger deal in
+   * stage 2. --Monty
+   */
+
   /* we no longer try to spread the stage one search area by dynoverlap */
   long searchend=min(ce(old),ce(new));
   long searchbegin=max(cb(old),cb(new));
@@ -562,6 +588,7 @@
   if(searchsize<=0)return(0);
   
   /* match return values are in terms of the new vector, not old */
+  /* "???: Why 23?" Odd, prime number --Monty  */
 
   for(j=searchbegin;j<searchend;j+=23){
 
@@ -808,6 +835,8 @@
    * overlaps with the root.  If we can't find a match within 256 samples,
    * there's probably no match to be found (because this fragment doesn't
    * overlap with the root).
+   *
+   * "??? Is this why?  Why 256?" 256 is simply a 'large enough number'. --Monty 
    */
   fev=min(min(fbv+256,re(root)+p->dynoverlap),fe(v));
   
@@ -1033,6 +1062,21 @@
 
       /* We're going to append the non-silent samples of the fragment
        * to the root where its silence begins.
+       *
+       * "??? This seems to be a very strange approach.  At this point
+       *  the root has a lot of trailing silence, and the fragment has
+       *  the lot of leading silence.  This merge will drop the silence
+       *  and just splice the non-silence together.
+       *
+       *  In theory, rift analysis will either confirm or fix this result.
+       *  What circumstances motivated this approach?"
+       *
+       * This is an 'all bets are off' situation and we choose to make
+       * the best guess we can, based on absolute position being
+       * returned by the most recent reads.  There are drives that
+       * will randomly lose what they're doing during a read and just
+       * pad out the results with zeros and return no error.  This at
+       * least has a shot of addressing that situation. --Monty
        */
 
       /* Compute the amount of silence at the beginning of the fragment.
@@ -1124,6 +1168,8 @@
 
   cdrom_paranoia *p=v->p;
   long dynoverlap=p->dynoverlap/2*2;
+  /* "??? Why do we round down to an even dynoverlap?" Dynoverlap is
+     in samples, not stereo frames --Monty */
   
   /* If this fragment has already been merged & freed, abort. */
   if(!v || !v->one)return(0);
@@ -1246,7 +1292,11 @@
 	fprintf(stderr,"matching rootR: matchA:%ld matchB:%ld matchC:%ld\n",
 		matchA,matchB,matchC);
 #endif		
-	
+	/* "??? The root.returnedlimit checks below are presently a mystery." */
+	/* Those are for the case where our backtracking wants to take
+	   us to back before bytes we've already returned to the
+	   application.  In short, it's a "we're screwed"
+	   check. --Monty */
 	if(matchA){
 	  /* There's a problem with the root */
 
@@ -1350,8 +1400,11 @@
 	   * as either a stutter or dropped samples, and we have no way of
 	   * telling whether the fragment or the root is right.
 	   *
-	   * The original comment indicated that we set "disagree" flags
-	   * in the root, but it seems to be historical.
+	   * "The original comment indicated that we set "disagree"
+	   * flags in the root, but it seems to be historical."  The
+	   * disagree flags were from a time when we did interpolation
+	   * over samples we simply couldn't get to agree.  Yes,
+	   * historical functionality that didn;t work well. --Monty
 	   */
 
 	  if(rb(root)+begin-matchC<p->root.returnedlimit)
@@ -1359,7 +1412,13 @@
 
 	  /* Overwrite the mismatching (matchC) samples in root with the
 	   * samples from the fixed up fragment.
+	   *
+	   * "??? Do we think the fragment is more likely correct, is this
+	   * just arbitrary, or is there some other reason for overwriting
+	   * the root?"
+	   *   We think these samples are more likely to be correct --Monty
 	   */
+	   */
 	  c_overwrite(rc(root),begin-matchC,
 			cv(l)+beginL-matchC,matchC);
 	  
@@ -1367,6 +1426,17 @@
       
 	  /* We may have had a mismatch because we ran into leading silence.
 	   *
+	   * "??? To be studied: why would this cause a mismatch?
+	   * Neither i_analyze_rift_r nor i_iterate_stage2() nor
+	   * i_paranoia_overlap() appear to take silence into
+	   * consideration in this regard.  It could be due to our
+	   * skipping of silence when searching for a match."  Jitter
+	   * and or skipping in sections of silence could end up with
+	   * two sets of verified vectors that agree completely except
+	   * for the length of the silence.  Silence is a huge bugaboo
+	   * in general because there's no entropy within it to base
+	   * verification on. --Monty
+	   *
 	   * Since we don't extend the root in that direction, we don't
 	   * do anything, just move on to trailing rifts.
 	   */
@@ -1381,6 +1451,10 @@
 
 	/* Recalculate the offset of the edge of the rift in the fixed
 	 * up fragment, in case it changed.
+	 *
+	 * "??? Why is this done here rather than in the (matchB) case above,
+	 * which should be the only time beginL will change."
+	 * Because there's no reason not to? --Monty
 	 */
 	beginL=begin+offset;
 
@@ -1627,7 +1701,8 @@
 			   begin,beginL,
 			   rs(root),cs(l),
 			   NULL,&end);
-
+	
+	temp=cs(l);
       } /* end while (trailing rift) */
 
 
@@ -1642,6 +1717,17 @@
        * This is generally fine, since the verified root is supposed to
        * slide from earlier samples to later samples across multiple calls
        * to paranoia_read().
+       *
+       * "??? But, is this actually right?  Because of this, we don't
+       * extend the root to hold the earliest read sample, if we
+       * happened to initialize the root with a later sample due to
+       * jitter.  There are probably some ugly side effects from
+       * extending the root backward, in the general case, but it may
+       * not be so dire if we're near sample 0.  To be investigated."
+       * In the begin case, any start position is arbitrary due to
+       * inexact seeking.  Later, we can't back-extend the root as the
+       * samples preceeding the beginning have already been returned
+       * to the application! --Monty
        */
       {
 	long sizeA=rs(root);
@@ -1666,12 +1752,23 @@
 	 * relative to the root (A), and see if the offset is past the
 	 * end of the root (> sizeA).  If it is, this fragment will extend
 	 * our root.
+	 *
+	 * "??? Why do we check for v->lastsector separately?" Because
+	 * of the case where root extends *too* far; if we never get a
+	 * read that accidentally extends that far again, we could
+	 * hang and loop forever. --Monty
 	 */
 	if(sizeB-offset>sizeA || v->lastsector){	  
 	  if(v->lastsector){
 	    root->lastsector=1;
 	  }
 	  
+	  /* "??? Why would end be < sizeA? Why do we truncate root?"
+	     Because it can happen (seeking is very very inexact) and
+	     end of disk tends to be very problematic in terms of
+	     stopping point.  We also generally believe more recent
+	     information over previous information when they disagree
+	     and both are 'verified'. --Monty */
 	  if(end<sizeA)c_remove(rc(root),end,-1);
 	  
 	  /* Extend the root with the samples from the end of the
@@ -1859,6 +1956,26 @@
 	  /* If we don't have a verified root yet, just promote the first
 	   * fragment (with lowest beginning sample) to be the verified
 	   * root.
+	   *
+	   * "??? It seems that this could be fairly arbitrary if jitter
+	   * is an issue.  If we've verified two fragments allegedly
+	   * beginning at "0" (which are actually slightly offset due to
+	   * jitter), the root might not begin at the earliest read
+	   * sample.  Additionally, because subsequent fragments are
+	   * only merged at the tail end of the root, this situation
+	   * won't be fixed by merging the earlier samples.
+	   *
+	   * Practically, this ends up not being critical since most
+	   * drives insert some extra silent samples at the beginning
+	   * of the stream.  Missing a few of them doesn't cause any
+	   * real lost data.  But it is non-deterministic." 
+	   *
+	   * On such a drive, the entire act of CDDA read is highly
+	   * nondeterministic.  All redbook says is +/- 75 sectors.
+	   * If you insist on the earliest possible sample, you can
+	   * get into a situation where the first read was far earlier
+	   * than all the others and no other read ever repeats the
+	   * early positioning. --Monty */
 	   */
 	  if(rv(root)==NULL){
 	    if(i_init_root(&(p->root),first,beginword,callback)){
@@ -2261,6 +2378,17 @@
        * with null data and mark it as invalid (FLAGS_UNREAD).  We pad
        * because we're going to be appending further reads to the current
        * c_block.
+       *
+       * "???: Why not re-read?  It might be to keep you from getting
+       * hung up on a bad sector.  Or it might be to avoid
+       * interrupting the streaming as much as possible."  
+       *
+       * There are drives on which you will never get a full read in
+       * some positions.  They always abort out early due to firmware
+       * boundary cases.  Reread will cause exactly the same thing to
+       * happen again.  NEC MultiSpeed 4x is one such drive. In these
+       * cases, you take what part of the read you know is good, and
+       * you get substantially better performance. --Monty
        */
       if((thisread=cdda_read(p->d,buffer+sofar*CD_FRAMEWORDS,adjread,
 			    secread))<secread){
@@ -2303,6 +2431,10 @@
 
       /* Move the read cursor ahead by the number of sectors we attempted
        * to read.
+       *
+       * "???: Again, why not move it ahead by the number actually
+       * read?"  Because adding zero would not be moving
+       * ahead. --Monty
        */
       p->lastread=adjread+secread;
       
@@ -2474,7 +2606,7 @@
 	    long begin=0,end=0;
 	    
 	    while(begin<cs(new)){
-	      while(end<cs(new)&&(new->flags[begin]&FLAGS_EDGE))begin++;
+	      while(begin<cs(new)&&(new->flags[begin]&FLAGS_EDGE))begin++;
 	      end=begin+1;
 	      while(end<cs(new)&&(new->flags[end]&FLAGS_EDGE)==0)end++;
 	      {