[xiph-commits] r14875 - trunk/cdparanoia/paranoia

xiphmont at svn.xiph.org xiphmont at svn.xiph.org
Mon May 12 12:19:14 PDT 2008


Author: xiphmont
Date: 2008-05-12 12:19:13 -0700 (Mon, 12 May 2008)
New Revision: 14875

Modified:
   trunk/cdparanoia/paranoia/gap.c
   trunk/cdparanoia/paranoia/isort.c
   trunk/cdparanoia/paranoia/isort.h
   trunk/cdparanoia/paranoia/overlap.c
   trunk/cdparanoia/paranoia/p_block.c
   trunk/cdparanoia/paranoia/paranoia.c
Log:
Commit comment patches


Modified: trunk/cdparanoia/paranoia/gap.c
===================================================================
--- trunk/cdparanoia/paranoia/gap.c	2008-05-12 19:04:53 UTC (rev 14874)
+++ trunk/cdparanoia/paranoia/gap.c	2008-05-12 19:19:13 UTC (rev 14875)
@@ -12,46 +12,138 @@
 
 /**** Gap analysis code ***************************************************/
 
+/* ===========================================================================
+ * i_paranoia_overlap_r (internal)
+ *
+ * This function seeks backward through two vectors (starting at the given
+ * offsets) to determine how many consecutive samples agree.  It returns
+ * the number of matching samples, which may be 0.
+ *
+ * Unlike its sibling, i_paranoia_overlap_f, this function doesn't need to
+ * be given the size of the vectors (all vectors stop at offset 0).
+ *
+ * This function is used by i_analyze_rift_r() below to find where a
+ * leading rift ends.
+ */
 long i_paranoia_overlap_r(int16_t *buffA,int16_t *buffB,
 			  long offsetA, long offsetB){
   long beginA=offsetA;
   long beginB=offsetB;
 
+  /* Start at the given offsets and work our way backwards until we hit
+   * the beginning of one of the vectors.
+   */
   for(;beginA>=0 && beginB>=0;beginA--,beginB--)
     if(buffA[beginA]!=buffB[beginB])break;
+
+  /* These values will either point to the first mismatching sample, or
+   * -1 if we hit the beginning of a vector.  So increment to point to the
+   * last matching sample.
+   */
   beginA++;
   beginB++;
   
   return(offsetA-beginA);
 }
 
+
+/* ===========================================================================
+ * i_paranoia_overlap_f (internal)
+ *
+ * This function seeks forward through two vectors (starting at the given
+ * offsets) to determine how many consecutive samples agree.  It returns
+ * the number of matching samples, which may be 0.
+ *
+ * Unlike its sibling, i_paranoia_overlap_r, this function needs to given
+ * the size of the vectors.
+ *
+ * This function is used by i_analyze_rift_f() below to find where a
+ * trailing rift ends.
+ */
 long i_paranoia_overlap_f(int16_t *buffA,int16_t *buffB,
 			  long offsetA, long offsetB,
 			  long sizeA,long sizeB){
   long endA=offsetA;
   long endB=offsetB;
   
+  /* Start at the given offsets and work our way forward until we hit
+   * the end of one of the vectors.
+   */
   for(;endA<sizeA && endB<sizeB;endA++,endB++)
     if(buffA[endA]!=buffB[endB])break;
   
   return(endA-offsetA);
 }
 
+
+/* ===========================================================================
+ * i_stutter_or_gap (internal)
+ *
+ * This function compares (gap) samples of two vectors at the given offsets.
+ * It returns 0 if all the samples are identical, or nonzero if they differ.
+ *
+ * This is used by i_analyze_rift_[rf] below to determine whether a rift
+ * contains samples dropped by the other vector (that should be inserted),
+ * or whether the rift contains a stutter (that should be dropped).  See
+ * i_analyze_rift_[rf] for more details.
+ */
 int i_stutter_or_gap(int16_t *A, int16_t *B,long offA, long offB,
 		     long gap){
   long a1=offA;
   long b1=offB;
   
+  /* If the rift was so big that there aren't enough samples in the other
+   * vector to compare against the full gap, then just compare what we
+   * have available.  E.g.:
+   *
+   *            (5678)|(newly matching run ...)
+   *    (... 12345678)| (345678) |(newly matching run ...)
+   *
+   * In this case, a1 would be -2, since we'd want to compare 6 samples
+   * against a vector that had only 4.  So we start 2 samples later, and
+   * compare the 4 available samples.
+   *
+   * Again, this approach to identifying stutters is simply a heuristic,
+   * so this may not produce correct results in all cases.
+   */
   if(a1<0){
+    /* Note that a1 is negative, so we're increasing b1 and decreasing (gap).
+     */
     b1-=a1;
     gap+=a1;
     a1=0;
   }
   
+  /* Note that we don't have an equivalent adjustment for leading rifts.
+   * Thus, it's possible for the following memcmp() to run off the end
+   * of A.  See the bug note in i_analyze_rift_r().
+   */
+  
+  /* Multiply gap by 2 because samples are 2 bytes long and memcmp compares
+   * at the byte level.
+   */
   return(memcmp(A+a1,B+b1,gap*2));
 }
 
 /* riftv is the first value into the rift -> or <- */
+
+
+/* ===========================================================================
+ * i_analyze_rift_f (internal)
+ *
+ * This function examines a trailing rift to see how far forward the rift goes
+ * and to determine what kind of rift it is.  This function is called by
+ * i_stage2_each() when a trailing rift is detected.  (aoffset,boffset) are
+ * the offsets into (A,B) of the first mismatching sample.
+ *
+ * This function returns:
+ *  matchA  > 0 if there are (matchA) samples missing from A
+ *  matchA  < 0 if there are (-matchA) duplicate samples (stuttering) in A
+ *  matchB  > 0 if there are (matchB) samples missing from B
+ *  matchB  < 0 if there are (-matchB) duplicate samples in B
+ *  matchC != 0 if there are (matchC) samples of garbage, after which
+ *              both A and B are in sync again
+ */
 void i_analyze_rift_f(int16_t *A,int16_t *B,
 		      long sizeA, long sizeB,
 		      long aoffset, long boffset, 
@@ -63,51 +155,150 @@
   
   *matchA=0, *matchB=0, *matchC=0;
   
-  /* Look for three possible matches... (A) Ariftv->B, (B) Briftv->A and 
-     (c) AB->AB. */
+  /* Look forward to see where we regain agreement between vectors
+   * A and B (of at least MIN_WORDS_RIFT samples).  We look for one of
+   * the following possible matches:
+   * 
+   *                         edge
+   *                          v
+   * (1)  (... A matching run)|(aoffset matches ...)
+   *      (... B matching run)| (rift) |(boffset+i matches ...)
+   *
+   * (2)  (... A matching run)| (rift) |(aoffset+i matches ...)
+   *      (... B matching run)|(boffset matches ...)
+   *
+   * (3)  (... A matching run)| (rift) |(aoffset+i matches ...)
+   *      (... B matching run)| (rift) |(boffset+i matches ...)
+   *
+   * Anything that doesn't match one of these three is too corrupt to
+   * for us to recover from.  E.g.:
+   *
+   *      (... A matching run)| (rift) |(eventual match ...)
+   *      (... B matching run)| (big rift) |(eventual match ...)
+   *
+   * We won't find the eventual match, since we wouldn't be sure how
+   * to fix the rift.
+   */
   
   for(i=0;;i++){
-    if(i<bpast) /* A */
+    /* Search for whatever case we hit first, so as to end up with the
+     * smallest rift.
+     */
+
+    /* Don't search for (1) past the end of B */
+    if(i<bpast)
+
+      /* See if we match case (1) above, which either means that A dropped
+       * samples at the rift, or that B stuttered.
+       */
       if(i_paranoia_overlap_f(A,B,aoffset,boffset+i,sizeA,sizeB)>=MIN_WORDS_RIFT){
 	*matchA=i;
 	break;
       }
     
-    if(i<apast){ /* B */
+    /* Don't search for (2) or (3) past the end of A */
+    if(i<apast){
+
+      /* See if we match case (2) above, which either means that B dropped
+       * samples at the rift, or that A stuttered.
+       */
       if(i_paranoia_overlap_f(A,B,aoffset+i,boffset,sizeA,sizeB)>=MIN_WORDS_RIFT){
 	*matchB=i;
 	break;
       }
-      if(i<bpast) /* C */
+
+      /* Don't search for (3) past the end of B */
+      if(i<bpast)
+
+        /* See if we match case (3) above, which means that a fixed-length
+         * rift of samples is getting read unreliably.
+         */
 	if(i_paranoia_overlap_f(A,B,aoffset+i,boffset+i,sizeA,sizeB)>=MIN_WORDS_RIFT){
 	  *matchC=i;
 	  break;
 	}
     }else
+
+      /* Stop searching when we've reached the end of both vectors.
+       * In theory we could stop when there aren't MIN_WORDS_RIFT samples
+       * left in both vectors, but this case should happen fairly rarely.
+       */
       if(i>=bpast)break;
     
+    /* Try the search again with a larger tentative rift. */
   }
   
   if(*matchA==0 && *matchB==0 && *matchC==0)return;
   
   if(*matchC)return;
+
+  /* For case (1) or (2), we need to determine whether the rift contains
+   * samples dropped by the other vector (that should be inserted), or
+   * whether the rift contains a stutter (that should be dropped).  To
+   * distinguish, we check the contents of the rift against the good samples
+   * just before the rift.  If the contents match, then the rift contains
+   * a stutter.
+   *
+   * A stutter in the second vector:
+   *     (...good samples... 1234)|(567 ...newly matched run...)
+   *     (...good samples... 1234)| (1234) | (567 ...newly matched run)
+   *
+   * Samples missing from the first vector:
+   *     (...good samples... 1234)|(901 ...newly matched run...)
+   *     (...good samples... 1234)| (5678) |(901 ...newly matched run...)
+   *
+   * Of course, there's no theoretical guarantee that a non-stutter
+   * truly represents missing samples, but given that we're dealing with
+   * verified fragments in stage 2, we can have some confidence that this
+   * is the case.
+   */
   if(*matchA){
+    /* For case (1), we need to determine whether A dropped samples at the
+     * rift or whether B stuttered.
+     *
+     * If the rift doesn't match the good samples in A (and hence in B),
+     * it's not a stutter, and the rift should be inserted into A.
+     */
     if(i_stutter_or_gap(A,B,aoffset-*matchA,boffset,*matchA))
       return;
-    *matchB=-*matchA; /* signify we need to remove n bytes from B */
+
+    /* It is a stutter, so we need to signal that we need to remove
+     * (matchA) bytes from B.
+     */
+    *matchB=-*matchA;
     *matchA=0;
     return;
+
   }else{
+    /* Case (2) is the inverse of case (1) above. */
     if(i_stutter_or_gap(B,A,boffset-*matchB,aoffset,*matchB))
       return;
+
     *matchA=-*matchB;
     *matchB=0;
     return;
   }
 }
 
+
 /* riftv must be first even val of rift moving back */
 
+/* ===========================================================================
+ * i_analyze_rift_r (internal)
+ *
+ * This function examines a leading rift to see how far back the rift goes
+ * and to determine what kind of rift it is.  This function is called by
+ * i_stage2_each() when a leading rift is detected.  (aoffset,boffset) are
+ * the offsets into (A,B) of the first mismatching sample.
+ *
+ * This function returns:
+ *  matchA  > 0 if there are (matchA) samples missing from A
+ *  matchA  < 0 if there are (-matchA) duplicate samples (stuttering) in A
+ *  matchB  > 0 if there are (matchB) samples missing from B
+ *  matchB  < 0 if there are (-matchB) duplicate samples in B
+ *  matchC != 0 if there are (matchC) samples of garbage, after which
+ *              both A and B are in sync again
+ */
 void i_analyze_rift_r(int16_t *A,int16_t *B,
 		      long sizeA, long sizeB,
 		      long aoffset, long boffset, 
@@ -119,61 +310,171 @@
   
   *matchA=0, *matchB=0, *matchC=0;
   
-  /* Look for three possible matches... (A) Ariftv->B, (B) Briftv->A and 
-     (c) AB->AB. */
+  /* Look backward to see where we regain agreement between vectors
+   * A and B (of at least MIN_WORDS_RIFT samples).  We look for one of
+   * the following possible matches:
+   * 
+   *                                    edge
+   *                                      v
+   * (1)             (... aoffset matches)|(A matching run ...)
+   *      (... boffset-i matches)| (rift) |(B matching run ...)
+   *
+   * (2)  (... aoffset-i matches)| (rift) |(A matching run ...)
+   *                (... boffset matches)|(B matching run ...)
+   *
+   * (3)  (... aoffset-i matches)| (rift) |(A matching run ...)
+   *      (... boffset-i matches)| (rift) |(B matching run ...)
+   *
+   * Anything that doesn't match one of these three is too corrupt to
+   * for us to recover from.  E.g.:
+   *
+   *         (... eventual match)| (rift) |(A matching run ...)
+   *    (... eventual match) | (big rift) |(B matching run ...)
+   *
+   * We won't find the eventual match, since we wouldn't be sure how
+   * to fix the rift.
+   */
   
   for(i=0;;i++){
-    if(i<bpast) /* A */
+    /* Search for whatever case we hit first, so as to end up with the
+     * smallest rift.
+     */
+
+    /* Don't search for (1) past the beginning of B */
+    if(i<bpast)
+
+      /* See if we match case (1) above, which either means that A dropped
+       * samples at the rift, or that B stuttered.
+       */
       if(i_paranoia_overlap_r(A,B,aoffset,boffset-i)>=MIN_WORDS_RIFT){
 	*matchA=i;
 	break;
       }
-    if(i<apast){ /* B */
+
+    /* Don't search for (2) or (3) past the beginning of A */
+    if(i<apast){
+
+      /* See if we match case (2) above, which either means that B dropped
+       * samples at the rift, or that A stuttered.
+       */
       if(i_paranoia_overlap_r(A,B,aoffset-i,boffset)>=MIN_WORDS_RIFT){
 	*matchB=i;
 	break;
       }      
-      if(i<bpast) /* C */
+
+      /* Don't search for (3) past the beginning of B */
+      if(i<bpast)
+
+	/* See if we match case (3) above, which means that a fixed-length
+	 * rift of samples is getting read unreliably.
+	 */
 	if(i_paranoia_overlap_r(A,B,aoffset-i,boffset-i)>=MIN_WORDS_RIFT){
 	  *matchC=i;
 	  break;
 	}
     }else
+
+      /* Stop searching when we've reached the end of both vectors.
+       * In theory we could stop when there aren't MIN_WORDS_RIFT samples
+       * left in both vectors, but this case should happen fairly rarely.
+       */
       if(i>=bpast)break;
     
+    /* Try the search again with a larger tentative rift. */
   }
   
   if(*matchA==0 && *matchB==0 && *matchC==0)return;
   
   if(*matchC)return;
   
+  /* For case (1) or (2), we need to determine whether the rift contains
+   * samples dropped by the other vector (that should be inserted), or
+   * whether the rift contains a stutter (that should be dropped).  To
+   * distinguish, we check the contents of the rift against the good samples
+   * just after the rift.  If the contents match, then the rift contains
+   * a stutter.
+   *
+   * A stutter in the second vector:
+   *              (...newly matched run... 234)|(5678 ...good samples...)
+   *     (...newly matched run... 234)| (5678) |(5678 ...good samples...)
+   *
+   * Samples missing from the first vector:
+   *              (...newly matched run... 890)|(5678 ...good samples...)
+   *     (...newly matched run... 890)| (1234) |(5678 ...good samples...)
+   *
+   * Of course, there's no theoretical guarantee that a non-stutter
+   * truly represents missing samples, but given that we're dealing with
+   * verified fragments in stage 2, we can have some confidence that this
+   * is the case.
+   */
+
   if(*matchA){
+    /* For case (1), we need to determine whether A dropped samples at the
+     * rift or whether B stuttered.
+     *
+     * If the rift doesn't match the good samples in A (and hence in B),
+     * it's not a stutter, and the rift should be inserted into A.
+     */
     if(i_stutter_or_gap(A,B,aoffset+1,boffset-*matchA+1,*matchA))
       return;
-    *matchB=-*matchA; /* signify we need to remove n bytes from B */
+
+    /* It is a stutter, so we need to signal that we need to remove
+     * (matchA) bytes from B.
+     */
+    *matchB=-*matchA;
     *matchA=0;
     return;
+
   }else{
+    /* Case (2) is the inverse of case (1) above. */
     if(i_stutter_or_gap(B,A,boffset+1,aoffset-*matchB+1,*matchB))
       return;
+
     *matchA=-*matchB;
     *matchB=0;
     return;
   }
 }
 
+
+/* ===========================================================================
+ * analyze_rift_silence_f (internal)
+ *
+ * This function examines the fragment and root from the rift onward to
+ * see if they have a rift's worth of silence (or if they end with silence).
+ * It sets (*matchA) to -1 if A's rift is silence, (*matchB) to -1 if B's
+ * rift is silence, and sets them to 0 otherwise.
+ *
+ * Note that, unlike every other function in cdparanoia, this function
+ * considers any repeated value to be silence (which, in effect, it is).
+ * All other functions only consider repeated zeroes to be silence.
+ *
+ * This function is called by i_stage2_each() if it runs into a trailing rift
+ * that i_analyze_rift_f couldn't diagnose.  This checks for another variant:
+ * where one vector has silence and the other doesn't.  We then assume
+ * that the silence (and anything following it) is garbage.
+ *
+ * Note that while this function checks both A and B for silence, the caller
+ * assumes that only one or the other has silence.
+ */
 void  analyze_rift_silence_f(int16_t *A,int16_t *B,long sizeA,long sizeB,
 			     long aoffset, long boffset,
 			     long *matchA, long *matchB){
   *matchA=-1;
   *matchB=-1;
 
+  /* Search for MIN_WORDS_RIFT samples, or to the end of the vector,
+   * whichever comes first.
+   */
   sizeA=min(sizeA,aoffset+MIN_WORDS_RIFT);
   sizeB=min(sizeB,boffset+MIN_WORDS_RIFT);
 
   aoffset++;
   boffset++;
 
+  /* Check whether A has only "silence" within the search range.  Note
+   * that "silence" here is a single, repeated value (zero or not).
+   */
   while(aoffset<sizeA){
     if(A[aoffset]!=A[aoffset-1]){
       *matchA=0;
@@ -182,6 +483,12 @@
     aoffset++;
   }
 
+  /* Check whether B has only "silence" within the search range.  Note
+   * that "silence" here is a single, repeated value (zero or not).
+   *
+   * Also note that while the caller assumes that only matchA or matchB
+   * is set, we check both vectors here.
+   */
   while(boffset<sizeB){
     if(B[boffset]!=B[boffset-1]){
       *matchB=0;

Modified: trunk/cdparanoia/paranoia/isort.c
===================================================================
--- trunk/cdparanoia/paranoia/isort.c	2008-05-12 19:04:53 UTC (rev 14874)
+++ trunk/cdparanoia/paranoia/isort.c	2008-05-12 19:19:13 UTC (rev 14875)
@@ -9,11 +9,30 @@
 /* Old isort got a bit complex.  This re-constrains complexity to
    give a go at speed through a more alpha-6-like mechanism. */
 
+
+/* "Sort" is a bit of a misnomer in this implementation.  It's actually
+ * basically a hash table of sample values (with a linked-list collision
+ * resolution), which lets you quickly determine where in a vector a
+ * particular sample value occurs.
+ *
+ * Collisions aren't due to hash collisions, as the table has one bucket
+ * for each possible sample value.  Instead, the "collisions" represent
+ * multiple occurrences of a given value.
+ */
+
 #include <stdlib.h>
 #include <string.h>
 #include "p_block.h"
 #include "isort.h"
 
+
+/* ===========================================================================
+ * sort_alloc()
+ *
+ * Allocates and initializes a new, empty sort_info object, which can be
+ * used to index up to (size) samples from a vector.
+ */
+
 sort_info *sort_alloc(long size){
   sort_info *ret=calloc(1,sizeof(sort_info));
 
@@ -30,7 +49,21 @@
   return(ret);
 }
 
+
+/* ===========================================================================
+ * sort_unsortall() (internal)
+ *
+ * This function resets the index for further use with a different vector
+ * or range, without the overhead of an unnecessary free/alloc.
+ */
+
 void sort_unsortall(sort_info *i){
+  /* If there were few enough different samples encountered (and hence few
+   * enough buckets used), we can just zero out those buckets.  If there
+   * were many (2000 is picked somewhat arbitrarily), it's faster simply to
+   * zero out all buckets with a memset() rather than walking the data
+   * structure and zeroing them out one by one.
+   */
   if(i->lastbucket>2000){ /* a guess */
     memset(i->head,0,65536*sizeof(sort_link *));
   }else{
@@ -41,8 +74,20 @@
 
   i->lastbucket=0;
   i->sortbegin=-1;
+
+  /* Curiously, this function preserves the vector association created
+   * by sort_setup(), but it is used only internally by sort_setup, so
+   * preserving this association is unnecessary.
+   */
 }
 
+
+/* ===========================================================================
+ * sort_free()
+ *
+ * Releases all memory consumed by a sort_info object.
+ */
+
 void sort_free(sort_info *i){
   free(i->revindex);
   free(i->head);
@@ -50,49 +95,135 @@
   free(i);
 }
  
+
+/* ===========================================================================
+ * sort_sort() (internal)
+ *
+ * This function builds the index to allow for fast searching for sample
+ * values within a portion (sortlo - sorthi) of the object's associated
+ * vector.  It is called internally and only when needed.
+ */
+
 static void sort_sort(sort_info *i,long sortlo,long sorthi){
   long j;
 
+  /* We walk backward through the range to index because we insert new
+   * samples at the head of each bucket's list.  At the end, they'll be
+   * sorted from first to last occurrence.
+   */
   for(j=sorthi-1;j>=sortlo;j--){
+    /* i->vector[j] = the signed 16-bit sample to index.
+     * hv           = pointer to the head of the sorted list of occurences
+     *                of this sample
+     * l            = the node to associate with this sample
+     *
+     * We add 32768 to convert the signed 16-bit integer to an unsigned
+     * range from 0 to 65535.
+     *
+     * Note that l is located within i->revindex at a position
+     * corresponding to the sample's position in the vector.  This allows
+     * ipos() to determine the sample position from a returned sort_link.
+     */
     sort_link **hv=i->head+i->vector[j]+32768;
     sort_link *l=i->revindex+j;
 
+    /* If this is the first time we've encountered this sample, add its
+     * bucket to the list of buckets used.  This list is used only for
+     * resetting the index quickly.
+     */
     if(*hv==NULL){
       i->bucketusage[i->lastbucket]=i->vector[j]+32768;
       i->lastbucket++;
     }
+
+    /* Point the new node at the old head, then assign the new node as
+     * the new head.
+     */
     l->next=*hv;    
     *hv=l;
   }
+
+  /* Mark the index as initialized.
+   */
   i->sortbegin=0;
 }
 
-/* size *must* be less than i->maxsize */
+
+/* ===========================================================================
+ * sort_setup()
+ *
+ * This function initializes a previously allocated sort_info_t.  The
+ * sort_info_t is associated with a vector of samples of length
+ * (size), whose position begins at (*abspos) within the CD's stream
+ * of samples.  Only the range of samples between (sortlo, sorthi)
+ * will eventually be indexed for fast searching.  (sortlo, sorthi)
+ * are absolute sample positions.
+ *
+ * Note: size *must* be <= the size given to the preceding sort_alloc(),
+ * but no error checking is done here.
+ */
+
 void sort_setup(sort_info *i,int16_t *vector,long *abspos,
 		long size,long sortlo,long sorthi){
+  /* Reset the index if it has already been built.
+   */
   if(i->sortbegin!=-1)sort_unsortall(i);
 
   i->vector=vector;
   i->size=size;
   i->abspos=abspos;
 
+  /* Convert the absolute (sortlo, sorthi) to offsets within the vector.
+   * Note that the index will not be built until sort_getmatch() is called.
+   * Here we're simply hanging on to the range to index until then.
+   */
   i->lo=min(size,max(sortlo-*abspos,0));
   i->hi=max(0,min(sorthi-*abspos,size));
 }
 
+/* ===========================================================================
+ * sort_getmatch()
+ *
+ * This function returns a sort_link_t pointer which refers to the
+ * first sample equal to (value) in the vector.  It only searches for
+ * hits within (overlap) samples of (post), where (post) is an offset
+ * within the vector.  The caller can determine the position of the
+ * matched sample using ipos(sort_info *, sort_link *).
+ *
+ * This function returns NULL if no matches were found.
+ */
+
 sort_link *sort_getmatch(sort_info *i,long post,long overlap,int value){
   sort_link *ret;
 
+  /* If the vector hasn't been indexed yet, index it now.
+   */
   if(i->sortbegin==-1)sort_sort(i,i->lo,i->hi);
   /* Now we reuse lo and hi */
   
+  /* We'll only return samples within (overlap) samples of (post).
+   * Clamp the boundaries to search to the boundaries of the array,
+   * convert the signed sample to an unsigned offset, and store the
+   * state so that future calls to sort_nextmatch do the right thing.
+   *
+   * Reusing lo and hi this way is awful.
+   */
   post=max(0,min(i->size,post));
   i->val=value+32768;
   i->lo=max(0,post-overlap);       /* absolute position */
   i->hi=min(i->size,post+overlap); /* absolute position */
 
+  /* Walk through the linked list of samples with this value, until
+   * we find the first one within the bounds specified.  If there
+   * aren't any, return NULL.
+   */
   ret=i->head[i->val];
+
   while(ret){
+    /* ipos() calculates the offset (in terms of the original vector)
+     * of this hit.
+     */
+
     if(ipos(i,ret)<i->lo){
       ret=ret->next;
     }else{
@@ -105,10 +236,25 @@
   return(ret);
 }
 
+
+/* ===========================================================================
+ * sort_nextmatch()
+ *
+ * This function returns a sort_link_t pointer which refers to the next sample
+ * matching the criteria previously passed to sort_getmatch().  See
+ * sort_getmatch() for details.
+ *
+ * This function returns NULL if no further matches were found.
+ */
+
 sort_link *sort_nextmatch(sort_info *i,sort_link *prev){
   sort_link *ret=prev->next;
 
+  /* If there aren't any more hits, or we've passed the boundary requested
+   * of sort_getmatch(), we're done.
+   */
   if(!ret || ipos(i,ret)>=i->hi)return(NULL); 
+
   return(ret);
 }
 

Modified: trunk/cdparanoia/paranoia/isort.h
===================================================================
--- trunk/cdparanoia/paranoia/isort.h	2008-05-12 19:04:53 UTC (rev 14874)
+++ trunk/cdparanoia/paranoia/isort.h	2008-05-12 19:19:13 UTC (rev 14875)
@@ -31,18 +31,112 @@
 
 } sort_info;
 
+/*! ========================================================================
+ * sort_alloc()
+ *
+ * Allocates and initializes a new, empty sort_info object, which can
+ * be used to index up to (size) samples from a vector.
+ */
 extern sort_info *sort_alloc(long size);
+
+/*! ========================================================================
+ * sort_unsortall() (internal)
+ *
+ * This function resets the index for further use with a different
+ * vector or range, without the overhead of an unnecessary free/alloc.
+ */
 extern void sort_unsortall(sort_info *i);
+
+/*! ========================================================================
+ * sort_setup()
+ *
+ * This function initializes a previously allocated sort_info_t.  The
+ * sort_info_t is associated with a vector of samples of length
+ * (size), whose position begins at (*abspos) within the CD's stream
+ * of samples.  Only the range of samples between (sortlo, sorthi)
+ * will eventually be indexed for fast searching.  (sortlo, sorthi)
+ * are absolute sample positions.
+ *
+ * Note: size *must* be <= the size given to the preceding sort_alloc(),
+ * but no error checking is done here.
+ */
 extern void sort_setup(sort_info *i,int16_t *vector,long *abspos,long size,
 		       long sortlo, long sorthi);
+
+/* =========================================================================
+ * sort_free()
+ *
+ * Releases all memory consumed by a sort_info object.
+ */
 extern void sort_free(sort_info *i);
+
+/*! ========================================================================
+ * sort_getmatch()
+ *
+ * This function returns a sort_link_t pointer which refers to the
+ * first sample equal to (value) in the vector.  It only searches for
+ * hits within (overlap) samples of (post), where (post) is an offset
+ * within the vector.  The caller can determine the position of the
+ * matched sample using ipos(sort_info *, sort_link *).
+ *
+ * This function returns NULL if no matches were found.
+ */
 extern sort_link *sort_getmatch(sort_info *i,long post,long overlap,int value);
+
+/*! ========================================================================
+ * sort_nextmatch()
+ *
+ * This function returns a sort_link_t pointer which refers to the
+ * next sample matching the criteria previously passed to
+ * sort_getmatch().  See sort_getmatch() for details.
+ *
+ * This function returns NULL if no further matches were found.
+ */
 extern sort_link *sort_nextmatch(sort_info *i,sort_link *prev);
 
+/* ===========================================================================
+ * is()
+ *
+ * This macro returns the size of the vector indexed by the given sort_info_t.
+ */
 #define is(i) (i->size)
+
+/* ===========================================================================
+ * ib()
+ *
+ * This macro returns the absolute position of the first sample in the vector
+ * indexed by the given sort_info_t.
+ */
 #define ib(i) (*i->abspos)
+
+/* ===========================================================================
+ * ie()
+ *
+ * This macro returns the absolute position of the sample after the last
+ * sample in the vector indexed by the given sort_info_t.
+ */
 #define ie(i) (i->size+*i->abspos)
+
+/* ===========================================================================
+ * iv()
+ *
+ * This macro returns the vector indexed by the given sort_info_t.
+ */
 #define iv(i) (i->vector)
+
+/* ===========================================================================
+ * ipos()
+ *
+ * This macro returns the relative position (offset) within the indexed vector
+ * at which the given match was found.
+ *
+ * It uses a little-known and frightening aspect of C pointer arithmetic:
+ * subtracting a pointer is not an arithmetic subtraction, but rather the
+ * additive inverse.  In other words, since
+ *   q     = p + n returns a pointer to the nth object in p,
+ *   q - p = p + n - p, and
+ *   q - p = n, not the difference of the two addresses.
+ */
 #define ipos(i,l) (l-i->revindex)
 
 #endif

Modified: trunk/cdparanoia/paranoia/overlap.c
===================================================================
--- trunk/cdparanoia/paranoia/overlap.c	2008-05-12 19:04:53 UTC (rev 14874)
+++ trunk/cdparanoia/paranoia/overlap.c	2008-05-12 19:19:13 UTC (rev 14875)
@@ -89,6 +89,18 @@
 
 /**** Statistical and heuristic[al? :-] management ************************/
 
+/* ===========================================================================
+ * offset_adjust_settings (internal)
+ *
+ * This function is called by offset_add_value() every time 10 samples have
+ * been accumulated.  This function updates the internal statistics for
+ * paranoia (dynoverlap, dyndrift) that compensate for jitter and drift.
+ *
+ * (dynoverlap) influences how far stage 1 and stage 2 search for matching
+ * runs.  In low-jitter conditions, it will be very small (or even 0),
+ * narrowing our search.  In high-jitter conditions, it will be much larger,
+ * widening our search at the cost of speed.
+ */
 void offset_adjust_settings(cdrom_paranoia *p, void(*callback)(long,int)){
   if(p->stage2.offpoints>=10){
     /* drift: look at the average offset value.  If it's over one
@@ -164,17 +176,34 @@
   }
 }
 
+
+/* ===========================================================================
+ * offset_add_value (internal)
+ *
+ * This function adds the given jitter detected (value) to the statistics
+ * for the given stage (o).  It is called whenever jitter has been identified
+ * by stage 1 or 2.  After every 10 samples, we update the overall jitter-
+ * compensation settings (e.g. dynoverlap).  This allows us to narrow our
+ * search for matching runs (in both stages) in low-jitter conditions
+ * and also widen our search appropriately when there is jitter.
+ */
 void offset_add_value(cdrom_paranoia *p,offsets *o,long value,
 			     void(*callback)(long,int)){
   if(o->offpoints!=-1){
 
+    /* Track the average magnitude of jitter (in either direction) */
     o->offdiff+=abs(value);
     o->offpoints++;
     o->newpoints++;
+
+    /* Track the net value of the jitter (to track drift) */
     o->offaccum+=value;
+
+    /* Track the largest jitter we've encountered in each direction */
     if(value<o->offmin)o->offmin=value;
     if(value>o->offmax)o->offmax=value;
-    
+
+    /* After 10 samples, update dynoverlap, etc. */
     if(o->newpoints>=10)offset_adjust_settings(p,callback);
   }
 }

Modified: trunk/cdparanoia/paranoia/p_block.c
===================================================================
--- trunk/cdparanoia/paranoia/p_block.c	2008-05-12 19:04:53 UTC (rev 14874)
+++ trunk/cdparanoia/paranoia/p_block.c	2008-05-12 19:19:13 UTC (rev 14875)
@@ -282,6 +282,15 @@
 
 /**** Initialization *************************************************/
 
+/*!  Get the beginning and ending sector bounds given cursor position.
+
+  There are a couple of subtle differences between this and the
+  cdda_firsttrack_sector and cdda_lasttrack_sector. If the cursor is
+  at a sector later than cdda_firsttrack_sector, that sector will be
+  used. As for the difference between cdda_lasttrack_sector, if the CD
+  is mixed and there is a data track after the cursor but before the
+  last audio track, the end of the audio sector before that is used.
+*/
 void i_paranoia_firstlast(cdrom_paranoia *p){
   int i;
   cdrom_drive *d=p->d;

Modified: trunk/cdparanoia/paranoia/paranoia.c
===================================================================
--- trunk/cdparanoia/paranoia/paranoia.c	2008-05-12 19:04:53 UTC (rev 14874)
+++ trunk/cdparanoia/paranoia/paranoia.c	2008-05-12 19:19:13 UTC (rev 14875)
@@ -46,6 +46,29 @@
 
   **************************************************************/
 
+/* ===========================================================================
+ * Let's translate the above vivid metaphor into something a mere mortal
+ * can understand:
+ *
+ * Non-silent audio is "solid."  Silent audio is "wet" and fluid.  The reason
+ * to treat silence as fluid is that if there's a long enough span of
+ * silence, we can't reliably detect jitter or dropped samples within that
+ * span (since all silence looks alike).  Non-silent audio, on the other
+ * hand, is distinctive and can be reliably reassembled.
+ *
+ * So we treat long spans of silence specially.  We only consider an edge
+ * of a non-silent region ("continent" or "island") to be "wet" if it borders
+ * a long span of silence.  Short spans of silence are merely damp and can
+ * be reliably placed within a continent.
+ *
+ * We position ("anchor") the non-silent regions somewhat arbitrarily (since
+ * they may be jittered and we have no way to verify their exact position),
+ * and fill the intervening space with silence.
+ *
+ * See i_silence_match() for the gory details.
+ * ===========================================================================
+ */
+
 #include <stdlib.h>
 #include <unistd.h>
 #include <stdio.h>
@@ -105,6 +128,20 @@
 
 /**** matching and analysis code *****************************************/
 
+/* ===========================================================================
+ * i_paranoia_overlap() (internal)
+ *
+ * This function is called when buffA[offsetA] == buffB[offsetB].  This
+ * function searches backward and forward to see how many consecutive
+ * samples also match.
+ *
+ * This function is called by do_const_sync() when we're not doing any
+ * verification.  Its more complicated sibling is i_paranoia_overlap2.
+ *
+ * This function returns the number of consecutive matching samples.
+ * If (ret_begin) or (ret_end) are not NULL, it fills them with the
+ * offsets of the first and last matching samples in A.
+ */
 static inline long i_paranoia_overlap(int16_t *buffA,int16_t *buffB,
 			       long offsetA, long offsetB,
 			       long sizeA,long sizeB,
@@ -112,19 +149,39 @@
   long beginA=offsetA,endA=offsetA;
   long beginB=offsetB,endB=offsetB;
 
+  /* Scan backward to extend the matching run in that direction. */
   for(;beginA>=0 && beginB>=0;beginA--,beginB--)
     if(buffA[beginA]!=buffB[beginB])break;
   beginA++;
   beginB++;
   
+  /* Scan forward to extend the matching run in that direction. */
   for(;endA<sizeA && endB<sizeB;endA++,endB++)
     if(buffA[endA]!=buffB[endB])break;
   
+  /* Return the result of our search. */
   if(ret_begin)*ret_begin=beginA;
   if(ret_end)*ret_end=endA;
   return(endA-beginA);
 }
 
+
+/* ===========================================================================
+ * i_paranoia_overlap2() (internal)
+ *
+ * This function is called when buffA[offsetA] == buffB[offsetB].  This
+ * function searches backward and forward to see how many consecutive
+ * samples also match.
+ *
+ * This function is called by do_const_sync() when we're verifying the
+ * data coming off the CD.  Its less complicated sibling is
+ * i_paranoia_overlap, which is a good place to look to see the simplest
+ * outline of how this function works.
+ *
+ * This function returns the number of consecutive matching samples.
+ * If (ret_begin) or (ret_end) are not NULL, it fills them with the
+ * offsets of the first and last matching samples in A.
+ */
 static inline long i_paranoia_overlap2(int16_t *buffA,int16_t *buffB,
 				       char *flagsA,char *flagsB,
 				       long offsetA, long offsetB,
@@ -133,23 +190,32 @@
   long beginA=offsetA,endA=offsetA;
   long beginB=offsetB,endB=offsetB;
   
+  /* Scan backward to extend the matching run in that direction. */
   for(;beginA>=0 && beginB>=0;beginA--,beginB--){
     if(buffA[beginA]!=buffB[beginB])break;
+
     /* don't allow matching across matching sector boundaries */
-    /* don't allow matching through known missing data */
+    /* Stop if both samples were at the edges of a low-level read.
+     */
     if((flagsA[beginA]&flagsB[beginB]&FLAGS_EDGE)){
       beginA--;
       beginB--;
       break;
     }
+
+    /* don't allow matching through known missing data */
     if((flagsA[beginA]&FLAGS_UNREAD)|| (flagsB[beginB]&FLAGS_UNREAD))break;
   }
   beginA++;
   beginB++;
   
+  /* Scan forward to extend the matching run in that direction. */
   for(;endA<sizeA && endB<sizeB;endA++,endB++){
     if(buffA[endA]!=buffB[endB])break;
+
     /* don't allow matching across matching sector boundaries */
+    /* Stop if both samples were at the edges of a low-level read.
+     */
     if((flagsA[endA]&flagsB[endB]&FLAGS_EDGE) &&endA!=beginA){
       break;
     }
@@ -158,20 +224,29 @@
     if((flagsA[endA]&FLAGS_UNREAD)||(flagsB[endB]&FLAGS_UNREAD))break;
   }
 
+  /* Return the result of our search. */
   if(ret_begin)*ret_begin=beginA;
   if(ret_end)*ret_end=endA;
   return(endA-beginA);
 }
 
-/* Top level of the first stage matcher */
 
-/* We match each analysis point of new to the preexisting blocks
-recursively.  We can also optionally maintain a list of fragments of
-the preexisting block that didn't match anything, and match them back
-afterward. */
-
-#define OVERLAP_ADJ (MIN_WORDS_OVERLAP/2-1)
-
+/* ===========================================================================
+ * do_const_sync() (internal)
+ *
+ * This function is called when samples A[posA] == B[posB].  It tries to
+ * build a matching run from that point, looking forward and backward to
+ * see how many consecutive samples match.  Since the starting samples
+ * might only be coincidentally identical, we only consider the run to
+ * be a true match if it's longer than MIN_WORDS_SEARCH.
+ *
+ * This function returns the length of the run if a matching run was found,
+ * or 0 otherwise.  If a matching run was found, (begin) and (end) are set
+ * to the absolute positions of the beginning and ending samples of the
+ * run in A, and (offset) is set to the jitter between the c_blocks.
+ * (I.e., offset indicates the distance between what A considers sample N
+ * on the CD and what B considers sample N.)
+ */
 static inline long do_const_sync(c_block *A,
 				 sort_info *B,char *flagB,
 				 long posA,long posB,
@@ -179,6 +254,11 @@
   char *flagA=A->flags;
   long ret=0;
 
+  /* If we're doing any verification whatsoever, we have flags in stage
+   * 1, and will take them into account.  Otherwise (e.g. in stage 2),
+   * we just do the simple equality test for samples on both sides of
+   * the initial match.
+   */
   if(flagB==NULL)
     ret=i_paranoia_overlap(cv(A),iv(B),posA,posB,
 			   cs(A),is(B),begin,end);
@@ -187,8 +267,16 @@
       ret=i_paranoia_overlap2(cv(A),iv(B),flagA,flagB,posA,posB,cs(A),
 			      is(B),begin,end);
 	
+  /* Small matching runs could just be coincidental.  We only consider this
+   * a real match if it's long enough.
+   */
   if(ret>MIN_WORDS_SEARCH){
     *offset=+(posA+cb(A))-(posB+ib(B));
+
+    /* Note that try_sort_sync()'s swaps A & B when it calls this function,
+     * so while we adjust begin & end to be relative to A here, that means
+     * it's relative to B in try_sort_sync().
+     */
     *begin+=cb(A);
     *end+=cb(A);
     return(ret);
@@ -197,6 +285,30 @@
   return(0);
 }
 
+
+/* ===========================================================================
+ * try_sort_sync() (internal)
+ *
+ * Starting from the sample in B with the absolute position (post), look
+ * for a matching run in A.  This search will look in A for a first
+ * matching sample within (p->dynoverlap) samples around (post).  If it
+ * finds one, it will then determine how many consecutive samples match
+ * both A and B from that point, looking backwards and forwards.  If
+ * this search produces a matching run longer than MIN_WORDS_SEARCH, we
+ * consider it a match.
+ *
+ * When used by stage 1, the "post" is planted with respect to the old
+ * c_block being compare to the new c_block.  In stage 2, the "post" is
+ * planted with respect to the verified root.
+ *
+ * This function returns 1 if a match is found and 0 if not.  When a match
+ * is found, (begin) and (end) are set to the boundaries of the run, and
+ * (offset) is set to the difference in position of the run in A and B.
+ * (begin) and (end) are the absolute positions of the samples in
+ * B.  (offset) transforms A to B's frame of reference.  I.e., an offset of
+ * 2 would mean that A's absolute 3 is equivalent to B's 5.
+ */
+
 /* post is w.r.t. B.  in stage one, we post from old.  In stage 2 we
    post from root. Begin, end, offset count from B's frame of
    reference */
@@ -217,7 +329,19 @@
     {
       long zeropos=post-ib(A);
       if(zeropos>=0 && zeropos<is(A)){
+
+	/* Before we bother with the search for a matching samples,
+	 * we check the simple case.  If there's no jitter at all
+	 * (i.e. the absolute positions of A's and B's samples are
+	 * consistent), A's sample at (post) should be identical
+	 * to B's sample at the same position.
+	 */
 	if(cv(B)[post-cb(B)]==iv(A)[zeropos]){
+
+	  /* The first sample matched, now try to grow the matching run
+	   * in both directions.  We only consider it a match if more
+	   * than MIN_WORDS_SEARCH consecutive samples match.
+	   */
 	  if(do_const_sync(B,A,Aflags,
 			   post-cb(B),zeropos,
 			   begin,end,offset)){
@@ -232,25 +356,76 @@
   }else
     return(0);
   
+  /* If the samples with the same absolute position didn't match, it's
+   * either a bad sample, or the two c_blocks are jittered with respect
+   * to each other.  Now we search through A for samples that do have
+   * the same value as B's post.  The search looks from first to last
+   * occurrence witin (dynoverlap) samples of (post).
+   */
   ptr=sort_getmatch(A,post-ib(A),dynoverlap,cv(B)[post-cb(B)]);
   
   while(ptr){
     
+    /* We've found a matching sample, so try to grow the matching run in
+     * both directions.  If we find a long enough run (longer than
+     * MIN_WORDS_SEARCH), we've found a match.
+     */
     if(do_const_sync(B,A,Aflags,
 		     post-cb(B),ipos(A,ptr),
 		     begin,end,offset)){
       offset_add_value(p,&(p->stage1),*offset,callback);
       return(1);
     }
+
+    /* The matching sample was just a fluke -- there weren't enough adjacent
+     * samples that matched to consider a matching run.  So now we check
+     * for the next occurrence of that value in A.
+     */
     ptr=sort_nextmatch(A,ptr);
   }
   
+  /* We didn't find any matches. */
   *begin=-1;
   *end=-1;
   *offset=-1;
   return(0);
 }
 
+
+/* ===========================================================================
+ * STAGE 1 MATCHING
+ * ===========================================================================
+ */
+
+/* Top level of the first stage matcher */
+
+/* We match each analysis point of new to the preexisting blocks
+recursively.  We can also optionally maintain a list of fragments of
+the preexisting block that didn't match anything, and match them back
+afterward. */
+
+#define OVERLAP_ADJ (MIN_WORDS_OVERLAP/2-1)
+
+
+/* ===========================================================================
+ * stage1_matched() (internal)
+ *
+ * This function is called whenever stage 1 verification finds two identical
+ * runs of samples from different reads.  The runs must be more than
+ * MIN_WORDS_SEARCH samples long.  They may be jittered (i.e. their absolute
+ * positions on the CD may not match due to inaccurate seeking) with respect
+ * to each other, but they have been verified to have no dropped samples
+ * within them.
+ *
+ * This function provides feedback via the callback mechanism and marks the
+ * runs as verified.  The details of the marking are somehwat subtle and
+ * are described near the relevant code.
+ *
+ * Subsequent portions of the stage 1 code will build a verified fragment
+ * from this run.  The verified fragment will eventually be merged
+ * into the verified root (and its absolute position determined) in
+ * stage 2.
+ */
 static inline void stage1_matched(c_block *old,c_block *new,
 				 long matchbegin,long matchend,
 				 long matchoffset,void (*callback)(long,int)){
@@ -260,6 +435,10 @@
   long newadjbegin=matchbegin-matchoffset-cb(new);
   long newadjend=matchend-matchoffset-cb(new);
   
+
+  /* Provide feedback via the callback about the samples we've just
+   * verified.
+   */
   if(matchbegin-matchoffset<=cb(new) ||
      matchbegin<=cb(old) ||
      (new->flags[newadjbegin]&FLAGS_EDGE) ||
@@ -278,6 +457,51 @@
   }else
     if(callback)(*callback)(matchend,PARANOIA_CB_FIXUP_ATOM);
   
+
+  /* Mark verified samples as "verified," but trim the verified region
+   * by OVERLAP_ADJ samples on each side.  There are several significant
+   * implications of this trimming:
+   *
+   * 1) Why we trim at all:  We have to trim to distinguish between two
+   * adjacent verified runs and one long verified run.  We encounter this
+   * situation when samples have been dropped:
+   *
+   *   matched portion of read 1 ....)(.... matched portion of read 1
+   *       read 2 adjacent run  .....)(..... read 2 adjacent run
+   *                                 ||
+   *                      dropped samples in read 2
+   *
+   * So at this point, the fact that we have two adjacent runs means
+   * that we have not yet verified that the two runs really are adjacent.
+   * (In fact, just the opposite:  there are two runs because they were
+   * matched by separate runs, indicating that some samples didn't match
+   * across the length of read 2.)
+   *
+   * If we verify that they are actually adjacent (e.g. if the two runs
+   * are simply a result of matching runs from different reads, not from
+   * dropped samples), we will indeed mark them as one long merged run.
+   *
+   * 2) Why we trim by this amount: We want to ensure that when we
+   * verify the relationship between these two runs, we do so with
+   * an overlapping fragment at least OVERLAP samples long.  Following
+   * from the above example:
+   *
+   *                (..... matched portion of read 3 .....)
+   *       read 2 adjacent run  .....)(..... read 2 adjacent run
+   *
+   * Assuming there were no dropped samples between the adjacent runs,
+   * the matching portion of read 3 will need to be at least OVERLAP
+   * samples long to mark the two runs as one long verified run.
+   * If there were dropped samples, read 3 wouldn't match across the
+   * two runs, proving our caution worthwhile.
+   *
+   * 3) Why we partially discard the work we've done:  We don't.
+   * When subsequently creating verified fragments from this run,
+   * we compensate for this trimming.  Thus the verified fragment will
+   * contain the full length of verified samples.  Only the c_blocks
+   * will reflect this trimming.
+   */
+
   /* Mark the verification flags.  Don't mark the first or
      last OVERLAP/2 elements so that overlapping fragments
      have to overlap by OVERLAP to actually merge. We also
@@ -296,6 +520,30 @@
     
 }
 
+
+/* ===========================================================================
+ * i_iterate_stage1 (internal)
+ *
+ * This function is called by i_stage1() to compare newly read samples with
+ * previously read samples, searching for contiguous runs of identical
+ * samples.  Matching runs indicate that at least two reads of the CD
+ * returned identical data, with no dropped samples in that run.
+ * The runs may be jittered (i.e. their absolute positions on the CD may
+ * not be accurate due to inaccurate seeking) at this point.  Their
+ * positions will be determined in stage 2.
+ *
+ * This function compares the new c_block (which has been indexed in
+ * p->sortcache) to a previous c_block.  It is called for each previous
+ * c_block.  It searches for runs of identical samples longer than
+ * MIN_WORDS_SEARCH.  Samples in matched runs are marked as verified.
+ *
+ * Subsequent stage 1 code builds verified fragments from the runs of
+ * verified samples.  These fragments are merged into the verified root
+ * in stage 2.
+ *
+ * This function returns the number of distinct runs verified in the new
+ * c_block when compared against this old c_block.
+ */
 static long i_iterate_stage1(cdrom_paranoia *p,c_block *old,c_block *new,
 			     void(*callback)(long,int)){
 
@@ -316,8 +564,23 @@
   /* match return values are in terms of the new vector, not old */
 
   for(j=searchbegin;j<searchend;j+=23){
+
+    /* Skip past any samples verified in previous comparisons to
+     * other old c_blocks.  Also, obviously, don't bother verifying
+     * unread/unmatchable samples.
+     */
     if((new->flags[j-cb(new)]&(FLAGS_VERIFIED|FLAGS_UNREAD))==0){      
       tried++;
+
+      /* Starting from the sample in the old c_block with the absolute
+       * position j, look for a matching run in the new c_block.  This
+       * search will look a certain distance around j, and if successful
+       * will extend the matching run as far backward and forward as
+       * it can.
+       *
+       * The search will only return 1 if it finds a matching run long
+       * enough to be deemed significant.
+       */
       if(try_sort_sync(p,i,new->flags,old,j,&matchbegin,&matchend,&matchoffset,
 		       callback)==1){
 	
@@ -329,6 +592,11 @@
 	  long j=matchbegin-cb(old);
 	  long end=matchend-cb(old);
 	  for(;j<end;j++)if(cv(old)[j]!=0)break;
+
+	  /* Mark the matched samples in both c_blocks as verified.
+	   * In reality, not all the samples are marked.  See
+	   * stage1_matched() for details.
+	   */
 	  if(j<end){
 	    stage1_matched(old,new,matchbegin,matchend,matchoffset,callback);
 	  }else{
@@ -336,10 +604,13 @@
 	  }
 	}
 	ret++;
+
+	/* Skip past this verified run to look for more matches. */
 	if(matchend-1>j)j=matchend-1;
       }
     }
-  }
+  } /* end for */
+
 #ifdef NOISY 
   fprintf(stderr,"iterate_stage1: search area=%ld[%ld-%ld] tried=%ld matched=%ld spans=%ld\n",
 	  searchsize,searchbegin,searchend,tried,matched,ret);
@@ -348,6 +619,36 @@
   return(ret);
 }
 
+
+/* ===========================================================================
+ * i_stage1() (internal)
+ *
+ * Compare newly read samples against previously read samples, searching
+ * for contiguous runs of identical samples.  Matching runs indicate that
+ * at least two reads of the CD returned identical data, with no dropped
+ * samples in that run.  The runs may be jittered (i.e. their absolute
+ * positions on the CD may not be accurate due to inaccurate seeking) at
+ * this point.  Their positions will be determined in stage 2.
+ *
+ * This function compares a new c_block against all other c_blocks in memory,
+ * searching for sufficiently long runs of identical samples.  Since each
+ * c_block represents a separate call to read_c_block, this ensures that
+ * multiple reads have returned identical data.  (Additionally, read_c_block
+ * varies the reads so that multiple reads are unlikely to produce identical
+ * errors, so any matches between reads are considered verified.  See
+ * i_read_c_block for more details.)
+ *
+ * Each time we find such a  run (longer than MIN_WORDS_SEARCH), we mark
+ * the samples as "verified" in both c_blocks.  Runs of verified samples in
+ * the new c_block are promoted into verified fragments, which will later
+ * be merged into the verified root in stage 2.
+ *
+ * In reality, not all the verified samples are marked as "verified."
+ * See stage1_matched() for an explanation.
+ *
+ * This function returns the number of verified fragments created by the
+ * stage 1 matching.
+ */
 static long i_stage1(cdrom_paranoia *p,c_block *new,
 		     void(*callback)(long,int)){
 
@@ -356,9 +657,22 @@
   int ret=0;
   long begin=0,end;
   
+  /* We're going to be comparing the new c_block against the other
+   * c_blocks in memory.  Initialize the "sort cache" index to allow
+   * for fast searching through the new c_block.  (The index will
+   * actually be built the first time we search.)
+   */
   if(ptr)sort_setup(p->sortcache,cv(new),&cb(new),cs(new),
 		    cb(new),ce(new));
 
+  /* Iterate from oldest to newest c_block, comparing the new c_block
+   * to each, looking for a sufficiently long run of identical samples
+   * (longer than MIN_WORDS_SEARCH), which will be marked as "verified"
+   * in both c_blocks.
+   *
+   * Since the new c_block is already in the list (at the head), don't
+   * compare it against itself.
+   */
   while(ptr && ptr!=new){
 
     if(callback)(*callback)(cb(new),PARANOIA_CB_VERIFY);
@@ -369,6 +683,9 @@
 
   /* parse the verified areas of new into v_fragments */
   
+  /* Find each run of contiguous verified samples in the new c_block
+   * and create a verified fragment from each run.
+   */
   begin=0;
   while(begin<size){
     for(;begin<size;begin++)if(new->flags[begin]&FLAGS_VERIFIED)break;
@@ -377,6 +694,15 @@
     
     ret++;
 
+    /* We create a new verified fragment from the contiguous run
+     * of verified samples.
+     *
+     * We expand the "verified" range by OVERLAP_ADJ on each side
+     * to compensate for trimming done to the verified range by
+     * stage1_matched().  The samples were actually verified, and
+     * hence belong in the verified fragment.  See stage1_matched()
+     * for an explanation of the trimming.
+     */
     new_v_fragment(p,new,cb(new)+max(0,begin-OVERLAP_ADJ),
 		   cb(new)+min(size,end+OVERLAP_ADJ),
 		   (end+OVERLAP_ADJ>=size && new->lastsector));
@@ -384,19 +710,56 @@
     begin=end;
   }
   
+  /* Return the number of distinct verified fragments we found with
+   * stage 1 matching.
+   */
   return(ret);
 }
 
-/* reconcile v_fragments to root buffer.  Free if matched, fragment/fixup root
-   if necessary */
 
+/* ===========================================================================
+ * STAGE 2 MATCHING
+ * ===========================================================================
+ */
+
 typedef struct sync_result {
   long offset;
   long begin;
   long end;
 } sync_result;
 
-/* do *not* match using zero posts */
+/* Reconcile v_fragments to root buffer.  Free if matched, fragment/fixup root
+   if necessary.
+
+   Do *not* match using zero posts
+*/
+
+/* ===========================================================================
+ * i_iterate_stage2 (internal)
+ *
+ * This function searches for a sufficiently long run of identical samples
+ * between the passed verified fragment and the verified root.  The search
+ * is similar to that performed by i_iterate_stage1.  Of course, what we do
+ * as a result of a match is different.
+ *
+ * Our search is slightly different in that we refuse to match silence to
+ * silence.  All silence looks alike, and it would result in too many false
+ * positives here, so we handle silence separately.
+ *
+ * Also, because we're trying to determine whether this fragment as a whole
+ * overlaps with the root at all, we narrow our search (since it should match
+ * immediately or not at all).  This is in contrast to stage 1, where we
+ * search the entire vector looking for all possible matches.
+ *
+ * This function returns 0 if no match was found (including failure to find
+ * one due to silence), or 1 if we found a match.
+ *
+ * When a match is found, the sync_result_t is set to the boundaries of
+ * matching run (begin/end, in terms of the root) and how far out of sync
+ * the fragment is from the canonical root (offset).  Note that this offset
+ * is opposite in sign from the notion of offset used by try_sort_sync()
+ * and stage 1 generally.
+ */
 static long i_iterate_stage2(cdrom_paranoia *p,v_fragment *v,
 			     sync_result *r,void(*callback)(long,int)){
   root_block *root=&(p->root);
@@ -407,35 +770,110 @@
       fprintf(stderr,"Stage 2 search: fbv=%ld fev=%ld\n",fb(v),fe(v));
 #endif
 
+  /* Quickly check whether there could possibly be any overlap between
+   * the verified fragment and the root.  Our search will allow up to
+   * (p->dynoverlap) jitter between the two, so we expand the fragment
+   * search area by p->dynoverlap on both sides and see if that expanded
+   * area overlaps with the root.
+   *
+   * We could just as easily expand root's boundaries by p->dynoverlap
+   * instead and achieve the same result.
+   */
   if(min(fe(v)+p->dynoverlap,re(root))-
     max(fb(v)-p->dynoverlap,rb(root))<=0)return(0);
 
   if(callback)(*callback)(fb(v),PARANOIA_CB_VERIFY);
 
-  /* just a bit of v; determine the correct area */
+  /* We're going to try to match the fragment to the root while allowing
+   * for p->dynoverlap jitter, so we'll actually be looking at samples
+   * in the fragment whose position claims to be up to p->dynoverlap
+   * outside the boundaries of the root.  But, of course, don't extend
+   * past the edges of the fragment.
+   */
   fbv=max(fb(v),rb(root)-p->dynoverlap);
 
-  /* we want to avoid zeroes */
+  /* Skip past leading zeroes in the fragment, and bail if there's nothing
+   * but silence.  We handle silence later separately.
+   */
   while(fbv<fe(v) && fv(v)[fbv-fb(v)]==0)fbv++;
   if(fbv==fe(v))return(0);
+
+  /* This is basically the same idea as the initial calculation for fbv
+   * above.  Look at samples up to p->dynoverlap outside the boundaries
+   * of the root, but don't extend past the edges of the fragment.
+   *
+   * However, we also limit the search to no more than 256 samples.
+   * Unlike stage 1, we're not trying to find all possible matches within
+   * two runs -- rather, we're trying to see if the fragment as a whole
+   * overlaps with the root.  If we can't find a match within 256 samples,
+   * there's probably no match to be found (because this fragment doesn't
+   * overlap with the root).
+   */
   fev=min(min(fbv+256,re(root)+p->dynoverlap),fe(v));
   
   {
-    /* spread the search area a bit.  We post from root, so containment
-       must strictly adhere to root */
+    /* Because we'll allow for up to (p->dynoverlap) jitter between the
+     * fragment and the root, we expand the search area (fbv to fev) by
+     * p->dynoverlap on both sides.  But, because we're iterating through
+     * root, we need to constrain the search area not to extend beyond
+     * the root's boundaries.
+     */
     long searchend=min(fev+p->dynoverlap,re(root));
     long searchbegin=max(fbv-p->dynoverlap,rb(root));
     sort_info *i=p->sortcache;
     long j;
     
+    /* Initialize the "sort cache" index to allow for fast searching
+     * through the verified fragment between (fbv,fev).  (The index will
+     * actually be built the first time we search.)
+     */
     sort_setup(i,fv(v),&fb(v),fs(v),fbv,fev);
+
     for(j=searchbegin;j<searchend;j+=23){
+
+      /* Skip past silence in the root.  If there are just a few silent
+       * samples, the effect is minimal.  The real reason we need this is
+       * for large regions of silence.  All silence looks alike, so you
+       * could false-positive "match" two runs of silence that are either
+       * unrelated or ought to be jittered, and try_sort_sync can't
+       * accurately determine jitter (offset) from silence.
+       *
+       * Therefore, we want to post on a non-zero sample.  If there's
+       * nothing but silence left in the root, bail.  We don't want
+       * to match it here.
+       */
       while(j<searchend && rv(root)[j-rb(root)]==0)j++;
       if(j==searchend)break;
 
+      /* Starting from the (non-zero) sample in the root with the absolute
+       * position j, look for a matching run in the verified fragment.  This
+       * search will look a certain distance around j, and if successful
+       * will extend the matching run as far backward and forward as
+       * it can.
+       *
+       * The search will only return 1 if it finds a matching run long
+       * enough to be deemed significant.  Note that the search is limited
+       * by the boundaries given to sort_setup() above.
+       *
+       * Note also that flags aren't used in stage 2 (since neither verified
+       * fragments nor the root have them).
+       */
       if(try_sort_sync(p,i,NULL,rc(root),j,
 		       &matchbegin,&matchend,&offset,callback)){
 	
+	/* If we found a matching run, we return the results of our match.
+	 *
+	 * Note that we flip the sign of (offset) because try_sort_sync()
+	 * returns it in terms of the fragment (i.e. what we add
+	 * to the fragment's position to yield the corresponding position
+	 * in the root), but here we consider the root to be canonical,
+	 * and so our returned "offset" reflects how the fragment is offset
+	 * from the root.
+	 *
+	 * E.g.: If the fragment's sample 10 corresponds to root's 12,
+	 * try_sort_sync() would return 2.  But since root is canonical,
+	 * we say that the fragment is off by -2.
+	 */
 	r->begin=matchbegin;
 	r->end=matchend;
 	r->offset=-offset;
@@ -448,13 +886,33 @@
   return(0);
 }
 
-/* simple test for a root vector that ends in silence*/
+
+/* ===========================================================================
+ * i_silence_test() (internal)
+ *
+ * If the entire root is silent, or there's enough trailing silence
+ * to be significant (MIN_SILENCE_BOUNDARY samples), mark the beginning
+ * of the silence and "light" the silence flag.  This flag will remain lit
+ * until i_silence_match() appends some non-silent samples to the root.
+ *
+ * We do this because if there's a long enough span of silence, we can't
+ * reliably detect jitter or dropped samples within that span.  See
+ * i_silence_match() for details on how we recover from this situation.
+ */
 static void i_silence_test(root_block *root){
   int16_t *vec=rv(root);
   long end=re(root)-rb(root)-1;
   long j;
   
+  /* Look backward from the end of the root to find the first non-silent
+   * sample.
+   */
   for(j=end-1;j>=0;j--)if(vec[j]!=0)break;
+
+  /* If the entire root is silent, or there's enough trailing silence
+   * to be significant, mark the beginning of the silence and "light"
+   * the silence flag.
+   */
   if(j<0 || end-j>MIN_SILENCE_BOUNDARY){
     root->silenceflag=1;
     root->silencebegin=rb(root)+j+1;
@@ -463,9 +921,32 @@
   }
 }
 
-/* match into silence vectors at offset zero if at all possible.  This
-   also must be called with vectors in ascending begin order in case
-   there are nonzero islands */
+ 
+ /* ===========================================================================
+  * i_silence_match() (internal)
+  *
+  * This function is merges verified fragments into the verified root in cases
+  * where there is a problematic amount of silence (MIN_SILENCE_BOUNDARY
+  * samples) at the end of the root.
+  *
+  * We need a special approach because if there's a long enough span of
+  * silence, we can't reliably detect jitter or dropped samples within that
+  * span (since all silence looks alike).
+  *
+  * Only fragments that begin with MIN_SILENCE_BOUNDARY samples are eligible
+  * to be merged in this case.  Fragments that are too far beyond the edge
+  * of the root to possibly overlap are also disregarded.
+  *
+  * Our first approach is to assume that such fragments have no jitter (since
+  * we can't establish otherwise) and merge them.  However, if it's clear
+  * that there must be jitter (i.e. because non-silent samples overlap when
+  * we assume no jitter), we assume the fragment has the minimum possible
+  * jitter and then merge it.
+  *
+  * This function extends silence fairly aggressively, so it must be called
+  * with fragments in ascending order (beginning position) in case there are
+  * small non-silent regions within the silence.
+  */
 static long i_silence_match(root_block *root, v_fragment *v,
 			  void(*callback)(long,int)){
 
@@ -474,80 +955,203 @@
   long end=fs(v),begin;
   long j;
 
-  /* does this vector begin wet? */
+  /* See how much leading silence this fragment has.  If there are fewer than
+   * MIN_SILENCE_BOUNDARY leading silent samples, we don't do this special
+   * silence matching.
+   *
+   * This fragment could actually belong here, but we can't be sure unless
+   * it has enough silence on its leading edge.  This fragment will likely
+   * stick around until we do successfully extend the root, at which point
+   * it will be merged using the usual method.
+   */
   if(end<MIN_SILENCE_BOUNDARY)return(0);
   for(j=0;j<end;j++)if(vec[j]!=0)break;
   if(j<MIN_SILENCE_BOUNDARY)return(0);
+
+  /* Convert the offset of the first non-silent sample to an absolute
+   * position.  For the time being, we will assume that this position
+   * is accurate, with no jitter.
+   */
   j+=fb(v);
 
-  /* is the new silent section ahead of the end of the old by <
-     p->dynoverlap? */
+  /* If this fragment is ahead of the root, see if that could just be due
+   * to jitter (if it's within p->dynoverlap samples of the end of root).
+   */
   if(fb(v)>=re(root) && fb(v)-p->dynoverlap<re(root)){
-    /* extend the zeroed area of root */
+
+    /* This fragment is within jitter range of the root, so we extend the
+     * root's silence so that it overlaps with this fragment.  At this point
+     * we know that the fragment has at least MIN_SILENCE_BOUNDARY silent
+     * samples at the beginning, so we overlap by that amount.
+     */
     long addto=fb(v)+MIN_SILENCE_BOUNDARY-re(root);
     int16_t vec[addto];
     memset(vec,0,sizeof(vec));
     c_append(rc(root),vec,addto);
   }
 
-  /* do we have an 'effortless' overlap? */
+  /* Calculate the overlap of the root's trailing silence and the fragment's
+   * leading silence.  (begin,end) are the boundaries of that overlap.
+   */
   begin=max(fb(v),root->silencebegin);
   end=min(j,re(root));
   
+  /* If there is an overlap, we assume that both the root and the fragment
+   * are jitter-free (since there's no way for us to tell otherwise).
+   */
   if(begin<end){
 
-    /* don't use it unless it will extend... */
-
+    /* If the fragment will extend the root, then we append it to the root.
+     * Otherwise, no merging is necessary, as the fragment should already
+     * be contained within the root.
+     */
     if(fe(v)>re(root)){
       long voff=begin-fb(v);
       
+      /* Truncate the overlapping silence from the end of the root.
+       */
       c_remove(rc(root),begin-rb(root),-1);
+
+      /* Append the fragment to the root, starting from the point of overlap.
+       */
       c_append(rc(root),vec+voff,fs(v)-voff);
     }
+
+    /* Record the fact that we merged this fragment assuming zero jitter.
+     */
     offset_add_value(p,&p->stage2,0,callback);
 
   }else{
+
+    /* We weren't able to merge the fragment assuming zero jitter.
+     *
+     * Check whether the fragment's leading silence ends before the root's
+     * trailing silence begins.  If it does, we assume that the root is
+     * jittered forward.
+     */
     if(j<begin){
-      /* OK, we'll have to force it a bit as the root is jittered
-         forward */
+
+      /* We're going to append the non-silent samples of the fragment
+       * to the root where its silence begins.
+       */
+
+      /* Compute the amount of silence at the beginning of the fragment.
+       */
       long voff=j-fb(v);
 
-      /* don't use it unless it will extend... */
+      /* If attaching the non-silent tail of the fragment to the end
+       * of the non-silent portion of the root will extend the root,
+       * then we'll append the samples to the root.  Otherwise, no
+       * merging is necessary, as the fragment should already be contained
+       * within the root.
+       */
       if(begin+fs(v)-voff>re(root)){
+
+	/* Truncate the trailing silence from the root.
+	 */
 	c_remove(rc(root),root->silencebegin-rb(root),-1);
+
+	/* Append the non-silent tail of the fragment to the root.
+	 */
 	c_append(rc(root),vec+voff,fs(v)-voff);
       }
+
+      /* Record the fact that we merged this fragment assuming (end-begin)
+       * jitter.
+       */
       offset_add_value(p,&p->stage2,end-begin,callback);
+
     }else
+
+      /* We only get here if the fragment is past the end of the root,
+       * which means it must be farther than (dynoverlap) away, due to our
+       * root extension above.
+       */
+
+      /* We weren't able to merge this fragment into the root after all.
+       */
       return(0);
   }
 
-  /* test the new root vector for ending in silence */
+
+  /* We only get here if we merged the fragment into the root.  Update
+   * the root's silence flag.
+   *
+   * Note that this is the only place silenceflag is reset.  In other words,
+   * once i_silence_test() lights the silence flag, it can only be reset
+   * by i_silence_match().
+   */
   root->silenceflag=0;
+
+  /* Now see if the new, extended root ends in silence.
+   */
   i_silence_test(root);
 
+
+  /* Since we merged the fragment, we can free it now.  But first we propagate
+   * its lastsector flag.
+   */
   if(v->lastsector)root->lastsector=1;
   free_v_fragment(v);
   return(1);
 }
 
+
+/* ===========================================================================
+ * i_stage2_each (internal)
+ *
+ * This function (which is entirely too long) attempts to merge the passed
+ * verified fragment into the verified root.
+ *
+ * First this function looks for a run of identical samples between
+ * the root and the fragment.  If it finds a long enough run, it then
+ * checks for "rifts" (see below) and fixes the root and/or fragment as
+ * necessary.  Finally, if the fragment will extend the tail of the root,
+ * we merge the fragment and extend the root.
+ *
+ * Most of the ugliness in this function has to do with handling "rifts",
+ * which are points of disagreement between the root and the verified
+ * fragment.  This can happen when a drive consistently drops a few samples
+ * or stutters and repeats a few samples.  It has to be consistent enough
+ * to result in a verified fragment (i.e. it happens twice), but inconsistent
+ * enough (e.g. due to the jiggled reads) not to happen every time.
+ *
+ * This function returns 1 if the fragment was successfully merged into the
+ * root, and 0 if not.
+ */
 static long i_stage2_each(root_block *root, v_fragment *v,
 			  void(*callback)(long,int)){
 
   cdrom_paranoia *p=v->p;
   long dynoverlap=p->dynoverlap/2*2;
   
+  /* If this fragment has already been merged & freed, abort. */
   if(!v || !v->one)return(0);
 
+  /* If there's no verified root yet, abort. */
   if(!rv(root)){
     return(0);
   }else{
     sync_result r;
 
+    /* Search for a sufficiently long run of identical samples between
+     * the verified fragment and the verified root.  There's a little
+     * bit of subtlety in the search when silence is involved.
+     */
     if(i_iterate_stage2(p,v,&r,callback)){
 
+      /* Convert the results of the search to be relative to the root. */
       long begin=r.begin-rb(root);
       long end=r.end-rb(root);
+
+      /* Convert offset into a value that will transform a relative
+       * position in the root to the corresponding relative position in
+       * the fragment.  I.e., if offset = -2, then the sample at relative
+       * position 2 in the root is at relative position 0 in the fragment.
+       *
+       * While a bit opaque, this does reduce the number of calculations
+       * below.
+       */
       long offset=r.begin+r.offset-fb(v)-begin;
       long temp;
       c_block *l=NULL;
@@ -559,18 +1163,80 @@
       fprintf(stderr,"Stage 2 match\n");
 #endif
 
-      /* chase backward */
-      /* note that we don't extend back right now, only forward. */
+      /* Now that we've found a sufficiently long run of identical samples
+       * between the fragment and the root, we need to check for rifts.
+       *
+       * A "rift", as mentioned above, is a disagreement between the
+       * fragment and the root.  When there's a rift, the matching run
+       * found by i_iterate_stage2() will obviously stop where the root
+       * and the fragment disagree.
+       *
+       * So we detect rifts by checking whether the matching run extends
+       * to the ends of the fragment and root.  If the run does extend to
+       * the ends of the fragment and root, then all overlapping samples
+       * agreed, and there's no rift.  If, however, the matching run
+       * stops with samples left over in both the root and the fragment,
+       * that means the root and fragment disagreed at that point.
+       * Leftover samples at the beginning of the match indicate a
+       * leading rift, and leftover samples at the end of the match indicate
+       * a trailing rift.
+       *
+       * Once we detect a rift, we attempt to fix it, depending on the
+       * nature of the disagreement.  See i_analyze_rift_[rf] for details
+       * on how we determine what kind of rift it is.  See below for
+       * how we attempt to fix the rifts.
+       */
+
+      /* First, check for a leading rift, fix it if possible, and then
+       * extend the match forward until either we hit the limit of the
+       * overlapping samples, or until we encounter another leading rift.
+       * Keep doing this until we hit the beginning of the overlap.
+       *
+       * Note that while we do fix up leading rifts, we don't extend
+       * the root backward (earlier samples) -- only forward (later
+       * samples).
+       */
+
+      /* If the beginning of the match didn't reach the beginning of
+       * either the fragment or the root, we have a leading rift to be
+       * examined.
+       *
+       * Remember that (begin) is the offset into the root, and (begin+offset)
+       * is the equivalent offset into the fragment.  If neither one is at
+       * zero, then they both have samples before the match, and hence a
+       * rift.
+       */
       while((begin+offset>0 && begin>0)){
 	long matchA=0,matchB=0,matchC=0;
+
+	/* (begin) is the offset into the root of the first matching sample,
+	 * (beginL) is the offset into the fragment of the first matching
+	 * sample.  These samples are at the edge of the rift.
+	 */
 	long beginL=begin+offset;
 
+	/* The first time we encounter a leading rift, allocate a
+	 * scratch copy of the verified fragment which we'll use if
+	 * we need to fix up the fragment before merging it into
+	 * the root.
+	 */
 	if(l==NULL){
 	  int16_t *buff=malloc(fs(v)*sizeof(int16_t));
 	  l=c_alloc(buff,fb(v),fs(v));
 	  memcpy(buff,fv(v),fs(v)*sizeof(int16_t));
 	}
 
+	/* Starting at the first mismatching sample, see how far back the
+	 * rift goes, and determine what kind of rift it is.  Note that
+	 * we're searching through the fixed up copy of the fragment.
+	 *
+	 * matchA  > 0 if there are samples missing from the root
+	 * matchA  < 0 if there are duplicate samples (stuttering) in the root
+	 * matchB  > 0 if there are samples missing from the fragment
+	 * matchB  < 0 if there are duplicate samples in the fragment
+	 * matchC != 0 if there's a section of garbage, after which
+	 *             the fragment and root agree and are in sync
+	 */
 	i_analyze_rift_r(rv(root),cv(l),
 			 rs(root),cs(l),
 			 begin-1,beginL-1,
@@ -582,85 +1248,214 @@
 #endif		
 	
 	if(matchA){
-	  /* a problem with root */
+	  /* There's a problem with the root */
+
 	  if(matchA>0){
-	    /* dropped bytes; add back from v */
+	    /* There were (matchA) samples dropped from the root.  We'll add
+	     * them back from the fixed up fragment.
+	     */
 	    if(callback)(*callback)(begin+rb(root)-1,PARANOIA_CB_FIXUP_DROPPED);
 	    if(rb(root)+begin<p->root.returnedlimit)
 	      break;
 	    else{
+
+	      /* At the edge of the rift in the root, insert the missing
+	       * samples from the fixed up fragment.  They're the (matchA)
+	       * samples immediately preceding the edge of the rift in the
+	       * fragment.
+	       */
 	      c_insert(rc(root),begin,cv(l)+beginL-matchA,
 			 matchA);
+
+	      /* We just inserted (matchA) samples into the root, so update
+	       * our begin/end offsets accordingly.  Also adjust the
+	       * (offset) to compensate (since we use it to find samples in
+	       * the fragment, and the fragment hasn't changed).
+	       */
 	      offset-=matchA;
 	      begin+=matchA;
 	      end+=matchA;
 	    }
+
 	  }else{
-	    /* duplicate bytes; drop from root */
+	    /* There were (-matchA) duplicate samples (stuttering) in the
+	     * root.  We'll drop them.
+	     */
 	    if(callback)(*callback)(begin+rb(root)-1,PARANOIA_CB_FIXUP_DUPED);
 	    if(rb(root)+begin+matchA<p->root.returnedlimit) 
 	      break;
 	    else{
+
+	      /* Remove the (-matchA) samples immediately preceding the
+	       * edge of the rift in the root.
+	       */
 	      c_remove(rc(root),begin+matchA,-matchA);
+
+	      /* We just removed (-matchA) samples from the root, so update
+	       * our begin/end offsets accordingly.  Also adjust the offset
+	       * to compensate.  Remember that matchA < 0, so we're actually
+	       * subtracting from begin/end.
+	       */
 	      offset-=matchA;
 	      begin+=matchA;
 	      end+=matchA;
 	    }
 	  }
 	}else if(matchB){
-	  /* a problem with the fragment */
+	  /* There's a problem with the fragment */
+
 	  if(matchB>0){
-	    /* dropped bytes */
+	    /* There were (matchB) samples dropped from the fragment.  We'll
+	     * add them back from the root.
+	     */
 	    if(callback)(*callback)(begin+rb(root)-1,PARANOIA_CB_FIXUP_DROPPED);
+
+	    /* At the edge of the rift in the fragment, insert the missing
+	     * samples from the root.  They're the (matchB) samples
+	     * immediately preceding the edge of the rift in the root.
+	     * Note that we're fixing up the scratch copy of the fragment.
+	     */
 	    c_insert(l,beginL,rv(root)+begin-matchB,
 			 matchB);
+
+	    /* We just inserted (matchB) samples into the fixed up fragment,
+	     * so update (offset), since we use it to find samples in the
+	     * fragment based on the root's unchanged offsets.
+	     */
 	    offset+=matchB;
+
 	  }else{
-	    /* duplicate bytes */
+	    /* There were (-matchB) duplicate samples (stuttering) in the
+	     * fixed up fragment.  We'll drop them.
+	     */
 	    if(callback)(*callback)(begin+rb(root)-1,PARANOIA_CB_FIXUP_DUPED);
+
+	    /* Remove the (-matchB) samples immediately preceding the edge
+	     * of the rift in the fixed up fragment.
+	     */
 	    c_remove(l,beginL+matchB,-matchB);
+
+	    /* We just removed (-matchB) samples from the fixed up fragment,
+	     * so update (offset), since we use it to find samples in the
+	     * fragment based on the root's unchanged offsets.
+	     */
 	    offset+=matchB;
 	  }
+
 	}else if(matchC){
-	  /* Uhh... problem with both */
-	  
-	  /* Set 'disagree' flags in root */
+
+	  /* There are (matchC) samples that simply disagree between the
+	   * fragment and the root.  On the other side of the mismatch, the
+	   * fragment and root agree again.  We can't classify the mismatch
+	   * as either a stutter or dropped samples, and we have no way of
+	   * telling whether the fragment or the root is right.
+	   *
+	   * The original comment indicated that we set "disagree" flags
+	   * in the root, but it seems to be historical.
+	   */
+
 	  if(rb(root)+begin-matchC<p->root.returnedlimit)
 	    break;
+
+	  /* Overwrite the mismatching (matchC) samples in root with the
+	   * samples from the fixed up fragment.
+	   */
 	  c_overwrite(rc(root),begin-matchC,
 			cv(l)+beginL-matchC,matchC);
 	  
 	}else{
-	  /* do we have a mismatch due to silence beginning/end case? */
-	  /* in the 'chase back' case, we don't do anything. */
+      
+	  /* We may have had a mismatch because we ran into leading silence.
+	   *
+	   * Since we don't extend the root in that direction, we don't
+	   * do anything, just move on to trailing rifts.
+	   */
 
-	  /* Did not determine nature of difficulty... 
-	     report and bail */
+	  /* If the rift was too complex to fix (see i_analyze_rift_r),
+	   * we just stop and leave the leading edge where it is.
+	   */
 	    
 	  /*RRR(*callback)(post,PARANOIA_CB_XXX);*/
 	  break;
 	}
-	/* not the most efficient way, but it will do for now */
+
+	/* Recalculate the offset of the edge of the rift in the fixed
+	 * up fragment, in case it changed.
+	 */
 	beginL=begin+offset;
+
+	/* Now that we've fixed up the root or fragment as necessary, see
+	 * how far we can extend the matching run.  This function is
+	 * overkill, as it tries to extend the matching run in both
+	 * directions (and rematches what we already matched), but it works.
+	 */
 	i_paranoia_overlap(rv(root),cv(l),
 			   begin,beginL,
 			   rs(root),cs(l),
 			   &begin,&end);	
-      }
+
+      } /* end while (leading rift) */
       
-      /* chase forward */
+
+      /* Second, check for a trailing rift, fix it if possible, and then
+       * extend the match forward until either we hit the limit of the
+       * overlapping samples, or until we encounter another trailing rift.
+       * Keep doing this until we hit the end of the overlap.
+       */
+
+      /* If the end of the match didn't reach the end of either the fragment
+       * or the root, we have a trailing rift to be examined.
+       *
+       * Remember that (end) is the offset into the root, and (end+offset)
+       * is the equivalent offset into the fragment.  If neither one is
+       * at the end of the vector, then they both have samples after the
+       * match, and hence a rift.
+       *
+       * (temp) is the size of the (potentially fixed-up) fragment.  If
+       * there was a leading rift, (l) is the fixed up fragment, and
+       * (offset) is now relative to it.
+       */
       temp=l?cs(l):fs(v);
       while(end+offset<temp && end<rs(root)){
 	long matchA=0,matchB=0,matchC=0;
+
+	/* (begin) is the offset into the root of the first matching sample,
+	 * (beginL) is the offset into the fragment of the first matching
+	 * sample.  We know these samples match and will use these offsets
+	 * later when we try to extend the matching run.
+	 */
 	long beginL=begin+offset;
+
+	/* (end) is the offset into the root of the first mismatching sample
+	 * after the matching run, (endL) is the offset into the fragment of
+	 * the equivalent sample.  These samples are at the edge of the rift.
+	 */
 	long endL=end+offset;
 	
+	/* The first time we encounter a rift, allocate a scratch copy of
+	 * the verified fragment which we'll use if we need to fix up the
+	 * fragment before merging it into the root.
+	 *
+	 * Note that if there was a leading rift, we'll already have
+	 * this (potentially fixed-up) scratch copy allocated.
+	 */
 	if(l==NULL){
 	  int16_t *buff=malloc(fs(v)*sizeof(int16_t));
 	  l=c_alloc(buff,fb(v),fs(v));
 	  memcpy(buff,fv(v),fs(v)*sizeof(int16_t));
 	}
 
+	/* Starting at the first mismatching sample, see how far forward the
+	 * rift goes, and determine what kind of rift it is.  Note that we're
+	 * searching through the fixed up copy of the fragment.
+	 *
+	 * matchA  > 0 if there are samples missing from the root
+	 * matchA  < 0 if there are duplicate samples (stuttering) in the root
+	 * matchB  > 0 if there are samples missing from the fragment
+	 * matchB  < 0 if there are duplicate samples in the fragment
+	 * matchC != 0 if there's a section of garbage, after which
+	 *             the fragment and root agree and are in sync
+	 */
 	i_analyze_rift_f(rv(root),cv(l),
 			 rs(root),cs(l),
 			 end,endL,
@@ -672,79 +1467,191 @@
 #endif
 	
 	if(matchA){
-	  /* a problem with root */
+	  /* There's a problem with the root */
+
 	  if(matchA>0){
-	    /* dropped bytes; add back from v */
+	    /* There were (matchA) samples dropped from the root.  We'll add
+	     * them back from the fixed up fragment.
+	     */
 	    if(callback)(*callback)(end+rb(root),PARANOIA_CB_FIXUP_DROPPED);
 	    if(end+rb(root)<p->root.returnedlimit)
 	      break;
+
+	    /* At the edge of the rift in the root, insert the missing
+	     * samples from the fixed up fragment.  They're the (matchA)
+	     * samples immediately preceding the edge of the rift in the
+	     * fragment.
+	     */
 	    c_insert(rc(root),end,cv(l)+endL,matchA);
+
+	    /* Although we just inserted samples into the root, we did so
+	     * after (begin) and (end), so we needn't update those offsets.
+	     */
+
 	  }else{
-	    /* duplicate bytes; drop from root */
+	    /* There were (-matchA) duplicate samples (stuttering) in the
+	     * root.  We'll drop them.
+	     */
 	    if(callback)(*callback)(end+rb(root),PARANOIA_CB_FIXUP_DUPED);
 	    if(end+rb(root)<p->root.returnedlimit)
 	      break;
+
+	    /* Remove the (-matchA) samples immediately following the edge
+	     * of the rift in the root.
+	     */
 	    c_remove(rc(root),end,-matchA);
+
+	    /* Although we just removed samples from the root, we did so
+	     * after (begin) and (end), so we needn't update those offsets.
+	     */
+
 	  }
 	}else if(matchB){
-	  /* a problem with the fragment */
+	  /* There's a problem with the fragment */
+
 	  if(matchB>0){
-	    /* dropped bytes */
+	    /* There were (matchB) samples dropped from the fragment.  We'll
+	     * add them back from the root.
+	     */
 	    if(callback)(*callback)(end+rb(root),PARANOIA_CB_FIXUP_DROPPED);
+
+	    /* At the edge of the rift in the fragment, insert the missing
+	     * samples from the root.  They're the (matchB) samples
+	     * immediately following the dge of the rift in the root.
+	     * Note that we're fixing up the scratch copy of the fragment.
+	     */
 	    c_insert(l,endL,rv(root)+end,matchB);
+
+	    /* Although we just inserted samples into the fragment, we did so
+	     * after (begin) and (end), so (offset) hasn't changed either.
+	     */
+
 	  }else{
-	    /* duplicate bytes */
+	    /* There were (-matchB) duplicate samples (stuttering) in the
+	     * fixed up fragment.  We'll drop them.
+	     */
 	    if(callback)(*callback)(end+rb(root),PARANOIA_CB_FIXUP_DUPED);
+
+	    /* Remove the (-matchB) samples immediately following the edge
+	     * of the rift in the fixed up fragment.
+	     */
 	    c_remove(l,endL,-matchB);
+
+	    /* Although we just removed samples from the fragment, we did so
+	     * after (begin) and (end), so (offset) hasn't changed either.
+	     */
 	  }
 	}else if(matchC){
-	  /* Uhh... problem with both */
-	  
-	  /* Set 'disagree' flags in root */
+
+          /* There are (matchC) samples that simply disagree between the
+           * fragment and the root.  On the other side of the mismatch, the
+           * fragment and root agree again.  We can't classify the mismatch
+           * as either a stutter or dropped samples, and we have no way of
+           * telling whether the fragment or the root is right.
+           *
+           * The original comment indicated that we set "disagree" flags
+           * in the root, but it seems to be historical.
+           */
+
 	  if(end+rb(root)<p->root.returnedlimit)
 	    break;
+
+          /* Overwrite the mismatching (matchC) samples in root with the
+           * samples from the fixed up fragment.
+           */
 	  c_overwrite(rc(root),end,cv(l)+endL,matchC);
+
 	}else{
+
+	  /* We may have had a mismatch because we ran into trailing silence.
+	   */
+
+	  /* At this point we have a trailing rift.  We check whether
+	   * one of the vectors (fragment or root) has trailing silence.
+	   */
 	  analyze_rift_silence_f(rv(root),cv(l),
 				 rs(root),cs(l),
 				 end,endL,
 				 &matchA,&matchB);
 	  if(matchA){
-	    /* silence in root */
+
+	    /* The contents of the root's trailing rift are silence.  The
+	     * fragment's are not (otherwise there wouldn't be a rift).
+	     * We therefore assume that the root has garbage from this
+	     * point forward and truncate it.
+	     *
+	     * This will have the effect of eliminating the trailing
+	     * rift, causing the fragment's samples to be appended to
+	     * the root.
+	     */
 	    /* Can only do this if we haven't already returned data */
 	    if(end+rb(root)>=p->root.returnedlimit){
 	      c_remove(rc(root),end,-1);
 	    }
 
 	  }else if(matchB){
-	    /* silence in fragment; lose it */
-	    
+
+	    /* The contents of the fragment's trailing rift are silence.
+	     * The root's are not (otherwise there wouldn't be a rift).
+	     * We therefore assume that the fragment has garbage from this
+	     * point forward.
+	     *
+	     * We needn't actually truncate the fragment, because the root
+	     * has already been fixed up from this fragment as much as
+	     * possible, and the truncated fragment wouldn't extend the
+	     * root.  Therefore, we can consider this (truncated) fragment
+	     * to be already merged into the root.  So we dispose of it and
+	     * return a success.
+	     */
 	    if(l)i_cblock_destructor(l);
 	    free_v_fragment(v);
 	    return(1);
 
 	  }else{
-	    /* Could not determine nature of difficulty... 
-	       report and bail */
+
+	    /* If the rift was too complex to fix (see i_analyze_rift_f),
+	     * we just stop and leave the trailing edge where it is.
+	     */
 	    
 	    /*RRR(*callback)(post,PARANOIA_CB_XXX);*/
 	  }
 	  break;
 	}
-	/* not the most efficient way, but it will do for now */
+
+        /* Now that we've fixed up the root or fragment as necessary, see
+         * how far we can extend the matching run.  This function is
+         * overkill, as it tries to extend the matching run in both
+         * directions (and rematches what we already matched), but it works.
+         */
 	i_paranoia_overlap(rv(root),cv(l),
 			   begin,beginL,
 			   rs(root),cs(l),
 			   NULL,&end);
-      }
 
-      /* if this extends our range, let's glom */
+      } /* end while (trailing rift) */
+
+
+      /* Third and finally, if the overlapping verified fragment extends
+       * our range forward (later samples), we append ("glom") the new
+       * samples to the end of the root.
+       *
+       * Note that while we did fix up leading rifts, we don't extend
+       * the root backward (earlier samples) -- only forward (later
+       * samples).
+       *
+       * This is generally fine, since the verified root is supposed to
+       * slide from earlier samples to later samples across multiple calls
+       * to paranoia_read().
+       */
       {
 	long sizeA=rs(root);
 	long sizeB;
 	long vecbegin;
 	int16_t *vector;
 	  
+	/* If there were any rifts, we'll use the fixed up fragment (l),
+	 * otherwise, we use the original fragment (v).
+	 */
 	if(l){
 	  sizeB=cs(l);
 	  vector=cv(l);
@@ -755,6 +1662,11 @@
 	  vecbegin=fb(v);
 	}
 
+	/* Convert the fragment-relative offset (sizeB) into an offset
+	 * relative to the root (A), and see if the offset is past the
+	 * end of the root (> sizeA).  If it is, this fragment will extend
+	 * our root.
+	 */
 	if(sizeB-offset>sizeA || v->lastsector){	  
 	  if(v->lastsector){
 	    root->lastsector=1;
@@ -762,12 +1674,30 @@
 	  
 	  if(end<sizeA)c_remove(rc(root),end,-1);
 	  
+	  /* Extend the root with the samples from the end of the
+	   * (potentially fixed up) fragment.
+	   */
 	  if(sizeB-offset-end)c_append(rc(root),vector+end+offset,
 					 sizeB-offset-end);
 	  
+	  /* Any time we update the root we need to check whether it ends
+	   * with a large span of silence.
+	   */
 	  i_silence_test(root);
 
-	  /* add offset into dynoverlap stats */
+	  /* Add the offset into our stage 2 statistics.
+	   *
+	   * Note that we convert our peculiar offset (which is in terms of
+	   * the relative positions of samples within each vector) back into
+	   * the actual offset between what A considers sample N and what B
+	   * considers sample N.
+	   *
+	   * We do this at the end of rift handling because any original
+	   * offset returned by i_iterate_stage2() might have been due to
+	   * dropped or duplicated samples.  Once we've fixed up the root
+	   * and the fragment, we have an offset which more reliably
+	   * indicates jitter.
+	   */
 	  offset_add_value(p,&p->stage2,offset+vecbegin-rb(root),callback);
 	}
       }
@@ -775,18 +1705,35 @@
       free_v_fragment(v);
       return(1);
       
-    }else{
-      /* D'oh.  No match.  What to do with the fragment? */
+    }else{ /* !i_iterate_stage2(...) */
+
+      /* We were unable to merge this fragment into the root.
+       *
+       * Check whether the fragment should have overlapped with the root,
+       * even taking possible jitter into account.  (I.e., If the fragment
+       * ends so far before the end of the root that even (dynoverlap)
+       * samples of jitter couldn't push it beyond the end of the root,
+       * it should have overlapped.)
+       *
+       * It is, however, possible that we failed to match using the normal
+       * tests because we're dealing with silence, which we handle
+       * separately.
+       *
+       * If the fragment should have overlapped, and we're not dealing
+       * with the special silence case, we don't know what to make of
+       * this fragment, and we just discard it.
+       */
       if(fe(v)+dynoverlap<re(root) && !root->silenceflag){
 	/* It *should* have matched.  No good; free it. */
 	free_v_fragment(v);
       }
+
       /* otherwise, we likely want this for an upcoming match */
       /* we don't free the sort info (if it was collected) */
       return(0);
       
     }
-  }
+  } /* endif rv(root) */
 }
 
 static int i_init_root(root_block *root, v_fragment *v,long begin,
@@ -807,6 +1754,8 @@
       root->vector=c_alloc(buff,fb(v),fs(v));
     }    
 
+    /* Check whether the new root has a long span of trailing silence.
+     */
     i_silence_test(root);
 
     return(1);
@@ -818,6 +1767,32 @@
   return((*(v_fragment **)a)->begin-(*(v_fragment **)b)->begin);
 }
 
+
+/* ===========================================================================
+ * i_stage2 (internal)
+ *
+ * This function attempts to extend the verified root by merging verified
+ * fragments into it.  It keeps extending the tail end of the root until
+ * it runs out of matching fragments.  See i_stage2_each (and
+ * i_iterate_stage2) for details of fragment matching and merging.
+ *
+ * This function is called by paranoia_read_limited when the verified root
+ * doesn't contain sufficient data to satisfy the request for samples.
+ * If this function fails to extend the verified root far enough (having
+ * exhausted the currently available verified fragments), the caller
+ * will then read the device again to try and establish more verified
+ * fragments.
+ *
+ * We first try to merge all the fragments in ascending order using the
+ * standard method (i_stage2_each()), and then we try to merge the
+ * remaining fragments using silence matching (i_silence_match())
+ * if the root has a long span of trailing silence.  See the initial
+ * comments on silence and  i_silence_match() for an explanation of this
+ * distinction.
+ *
+ * This function returns the number of verified fragments successfully
+ * merged into the verified root.
+ */
 static int i_stage2(cdrom_paranoia *p,long beginword,long endword,
 			  void(*callback)(long,int)){
 
@@ -833,8 +1808,16 @@
      matching in the event that there are still audio vectors with
      content to be sunk before the silence */
 
+  /* This flag is not the silence flag.  Rather, it indicates whether
+   * we succeeded in adding a verified fragment to the verified root.
+   * In short, we keep adding fragments until we no longer find a
+   * match.
+   */
   while(flag){
-    /* loop through all the current fragments */
+
+    /* Convert the linked list of verified fragments into an array,
+     * to be sorted in order of beginning sample position
+     */
     v_fragment *first=v_first(p);
     long active=p->fragments->active,count=0;
     v_fragment *list[active];
@@ -845,26 +1828,58 @@
       first=next;
     }
 
+    /* Reset the flag so that if we don't match any fragments, we
+     * stop looping.  Then, proceed only if there are any fragments
+     * to match.
+     */
     flag=0;
     if(count){
-      /* sorted in ascending order of beginning */
+
+      /* Sort the array of verified fragments in order of beginning
+       * sample position.
+       */
       qsort(list,active,sizeof(v_fragment *),&vsort);
       
-      /* we try a nonzero based match even if in silent mode in
-	 the case that there are still cached vectors to sink
-	 behind continent->ocean boundary */
+      /* We don't check for the silence flag yet, because even if the
+       * verified root ends in silence (and thus the silence flag is set),
+       * there may be a non-silent region at the beginning of the verified
+       * root, into which we can merge the verified fragments.
+       */
 
+      /* Iterate through the verified fragments, starting at the fragment
+       * with the lowest beginning sample position.
+       */
       for(count=0;count<active;count++){
 	first=list[count];
+
+	/* Make sure this fragment hasn't already been merged (and
+	 * thus freed). */
 	if(first->one){
+
+	  /* If we don't have a verified root yet, just promote the first
+	   * fragment (with lowest beginning sample) to be the verified
+	   * root.
+	   */
 	  if(rv(root)==NULL){
 	    if(i_init_root(&(p->root),first,beginword,callback)){
 	      free_v_fragment(first);
+
+	      /* Consider this a merged fragment, so set the flag
+	       * to keep looping.
+	       */
 	      flag=1;
 	      ret++;
 	    }
 	  }else{
+
+	    /* Try to merge this fragment with the verified root,
+	     * extending the tail of the root.
+	     */
 	    if(i_stage2_each(root,first,callback)){
+
+	      /* If we successfully merged the fragment, set the flag
+	       * to keep looping.
+	       */
 	      ret++;
 	      flag=1;
 	    }
@@ -872,22 +1887,50 @@
 	}
       }
 
-      /* silence handling */
+      /* If the verified root ends in a long span of silence, iterate
+       * through the remaining unmerged fragments to see if they can be
+       * merged using our special silence matching.
+       */
       if(!flag && p->root.silenceflag){
 	for(count=0;count<active;count++){
 	  first=list[count];
+
+	  /* Make sure this fragment hasn't already been merged (and
+	   * thus freed). */
 	  if(first->one){
 	    if(rv(root)!=NULL){
+
+	      /* Try to merge the fragment into the root.  This will only
+	       * succeed if the fragment overlaps and begins with sufficient
+	       * silence to be a presumed match.
+	       *
+	       * Note that the fragments must be passed to i_silence_match()
+	       * in ascending order, as they are here.
+	       */
 	      if(i_silence_match(root,first,callback)){
+
+		/* If we successfully merged the fragment, set the flag
+		 * to keep looping.
+		 */
 		ret++;
 		flag=1;
 	      }
 	    }
 	  }
-	}
+	} /* end for */
       }
-    }
-  }
+    } /* end if(count) */
+
+    /* If we were able to extend the verified root at all during this pass
+     * through the loop, loop again to see if we can merge any remaining
+     * fragments with the extended root.
+     */
+
+  } /* end while */
+
+  /* Return the number of fragments we successfully merged into the
+   * verified root.
+   */
   return(ret);
 }
 
@@ -1011,6 +2054,7 @@
       c_append(rc(root),temp,CD_FRAMESIZE_RAW);
       free(temp);
     }
+
     root->returnedlimit=re(root);
   }
 }    
@@ -1062,7 +2106,40 @@
   return(ret);
 }
 
-/* returns last block read, -1 on error */
+
+
+/* ===========================================================================
+ * read_c_block() (internal)
+ *
+ * This funtion reads many (p->readahead) sectors, encompassing at least
+ * the requested words.
+ *
+ * It returns a c_block which encapsulates these sectors' data and sector
+ * number.  The sectors come come from multiple low-level read requests.
+ *
+ * This function reads many sectors in order to exhaust any caching on the
+ * drive itself, as caching would simply return the same incorrect data
+ * over and over.  Paranoia depends on truly re-reading portions of the
+ * disc to make sure the reads are accurate and correct any inaccuracies.
+ *
+ * Which precise sectors are read varies ("jiggles") between calls to
+ * read_c_block, to prevent consistent errors across multiple reads
+ * from being misinterpreted as correct data.
+ *
+ * The size of each low-level read is determined by the underlying driver
+ * (p->d->nsectors), which allows the driver to specify how many sectors
+ * can be read in a single request.  Historically, the Linux kernel could
+ * only read 8 sectors at a time, with likely dropped samples between each
+ * read request.  Other operating systems may have different limitations.
+ *
+ * This function is called by paranoia_read_limited(), which breaks the
+ * c_block of read data into runs of samples that are likely to be
+ * contiguous, verifies them and stores them in verified fragments, and
+ * eventually merges the fragments into the verified root.
+ *
+ * This function returns the last c_block read or NULL on error.
+ */
+
 c_block *i_read_c_block(cdrom_paranoia *p,long beginword,long endword,
 		     void(*callback)(long,int)){
 
@@ -1084,6 +2161,13 @@
   long dynoverlap=(p->dynoverlap+CD_FRAMEWORDS-1)/CD_FRAMEWORDS; 
   long anyflag=0;
 
+
+  /* Calculate the first sector to read.  This calculation takes
+   * into account the need to jitter the starting point of the read
+   * to reveal consistent errors as well as the low reliability of
+   * the edge words of a read.
+   */
+
   /* What is the first sector to read?  want some pre-buffer if
      we're not at the extreme beginning of the disc */
   
@@ -1116,6 +2200,10 @@
   
   readat+=driftcomp;
   
+  /* Create a new, empty c_block and add it to the head of the
+   * list of c_blocks in memory.  It will be empty until the end of
+   * this subroutine.
+   */
   if(p->enable&(PARANOIA_MODE_OVERLAP|PARANOIA_MODE_VERIFY)){
     flags=calloc(totaltoread*CD_FRAMEWORDS,1);
     new=new_c_block(p);
@@ -1130,12 +2218,27 @@
   sofar=0;
   firstread=-1;
   
+  /* Issue each of the low-level reads until we've read enough sectors
+   * to exhaust the drive's cache.
+   *
+   * p->readahead   = total number of sectors to read
+   * p->d->nsectors = number of sectors to read per request
+   *
+   * The driver determines this latter number, which is the maximum
+   * number of sectors the kernel can reliably read per request.  In
+   * old Linux kernels, there was a hard limit of 8 sectors per read.
+   * While this limit has since been removed, certain motherboards
+   * can't handle DMA requests larger than 64K.  And other operating
+   * systems may have similar limitations.  So the method of splitting
+   * up reads is still useful.
+   */
+
   /* actual read loop */
 
   while(sofar<totaltoread){
-    long secread=sectatonce;
-    long adjread=readat;
-    long thisread;
+    long secread=sectatonce;  /* number of sectors to read this request */
+    long adjread=readat;      /* first sector to read for this request */
+    long thisread;            /* how many sectors were read this request */
 
     /* don't under/overflow the audio session */
     if(adjread<p->current_firstsector){
@@ -1150,6 +2253,15 @@
     if(secread>0){
       
       if(firstread<0)firstread=adjread;
+
+      /* Issue the low-level read to the driver.
+       */
+
+      /* If the low-level read returned too few sectors, pad the result
+       * with null data and mark it as invalid (FLAGS_UNREAD).  We pad
+       * because we're going to be appending further reads to the current
+       * c_block.
+       */
       if((thisread=cdda_read(p->d,buffer+sofar*CD_FRAMEWORDS,adjread,
 			    secread))<secread){
 
@@ -1166,6 +2278,20 @@
       }
       if(thisread!=0)anyflag=1;
       
+
+      /* Because samples are likely to be dropped between read requests,
+       * mark the samples near the the boundaries of the read requests
+       * as suspicious (FLAGS_EDGE).  This means that any span of samples
+       * against which these adjacent read requests are compared must
+       * overlap beyond the edges and into the more trustworthy data.
+       * Such overlapping spans are accordingly at least MIN_WORDS_OVERLAP
+       * words long (and naturally longer if any samples were dropped
+       * between the read requests).
+       *
+       *          (EEEEE...overlapping span...EEEEE)
+       * (read 1 ...........EEEEE)   (EEEEE...... read 2 ......EEEEE) ...
+       *         dropped samples --^
+       */
       if(flags && sofar!=0){
 	/* Don't verify across overlaps that are too close to one
            another */
@@ -1174,6 +2300,10 @@
 	  flags[sofar*CD_FRAMEWORDS+i]|=FLAGS_EDGE;
       }
 
+
+      /* Move the read cursor ahead by the number of sectors we attempted
+       * to read.
+       */
       p->lastread=adjread+secread;
       
       if(adjread+secread-1==p->current_lastsector)
@@ -1183,13 +2313,24 @@
       
       sofar+=secread;
       readat=adjread+secread; 
-    }else
+    }else /* secread <= 0 */
       if(readat<p->current_firstsector)
 	readat+=sectatonce; /* due to being before the readable area */
       else
 	break; /* due to being past the readable area */
-  }
+    
+    
+    /* Keep issuing read requests until we've read enough sectors to
+     * exhaust the drive's cache.
+     */
+    
+  } /* end while */
 
+
+  /* If we managed to read any sectors at all (anyflag), fill in the
+   * previously allocated c_block with the read data.  Otherwise, free
+   * our buffers, dispose of the c_block, and return NULL.
+   */
   if(anyflag){
     new->vector=buffer;
     new->begin=firstread*CD_FRAMEWORDS-p->dyndrift;
@@ -1204,9 +2345,17 @@
   return(new);
 }
 
-/* The returned buffer is *not* to be freed by the caller.  It will
-   persist only until the next call to paranoia_read() for this p */
 
+/** ==========================================================================
+ * paranoia_read(), paranoia_read_limited()
+ *
+ * These functions "read" the next sector of audio data and returns
+ * a pointer to a full sector of verified samples (2352 bytes).
+ *
+ * The returned buffer is *not* to be freed by the caller.  It will
+ *   persist only until the next call to paranoia_read() for this p 
+*/
+
 int16_t *paranoia_read(cdrom_paranoia *p, void(*callback)(long,int)){
   return paranoia_read_limited(p,callback,20);
 }
@@ -1224,6 +2373,16 @@
 
   if(beginword>p->root.returnedlimit)p->root.returnedlimit=beginword;
   lastend=re(root);
+
+
+  /* Since paranoia reads and verifies chunks of data at a time
+   * (which it needs to counteract dropped samples and inaccurate
+   * seeking), the requested samples may already be in memory,
+   * in the verified "root".
+   *
+   * The root is where paranoia stores samples that have been
+   * verified and whose position has been accurately determined.
+   */
   
   /* First, is the sector we want already in the root? */
   while(rv(root)==NULL ||
@@ -1233,14 +2392,37 @@
 	re(root)<endword){
     
     /* Nope; we need to build or extend the root verified range */
+
+    /* We may have already read the necessary samples and placed
+     * them into verified fragments, but not yet merged them into
+     * the verified root.  We'll check that before we actually
+     * try to read data from the drive.
+     */
     
     if(p->enable&(PARANOIA_MODE_VERIFY|PARANOIA_MODE_OVERLAP)){
+
+      /* We need to make sure our memory consumption doesn't grow
+       * to the size of the whole CD.  But at the same time, we
+       * need to hang onto some of the verified data (even perhaps
+       * data that's already been returned by paranoia_read()) in
+       * order to verify and accurately position future samples.
+       *
+       * Therefore, we free some of the verified data that we
+       * no longer need.
+       */
       i_paranoia_trim(p,beginword,endword);
       recover_cache(p);
+
       if(rb(root)!=-1 && p->root.lastsector)
 	i_end_case(p,endword+(MAX_SECTOR_OVERLAP*CD_FRAMEWORDS),
 			callback);
       else
+
+	/* Merge as many verified fragments into the verified root
+	 * as we need to satisfy the pending request.  We may
+	 * not have all the fragments we need, in which case we'll
+	 * read data from the CD further below.
+	 */
 	i_stage2(p,beginword,
 		      endword+(MAX_SECTOR_OVERLAP*CD_FRAMEWORDS),
 		      callback);
@@ -1248,6 +2430,9 @@
       i_end_case(p,endword+(MAX_SECTOR_OVERLAP*CD_FRAMEWORDS),
 		 callback); /* only trips if we're already done */
     
+    /* If we were able to fill the verified root with data already
+     * in memory, we don't need to read any more data from the drive.
+     */
     if(!(rb(root)==-1 || rb(root)>beginword || 
 	 re(root)<endword+(MAX_SECTOR_OVERLAP*CD_FRAMEWORDS))) 
       break;
@@ -1255,13 +2440,35 @@
     /* Hmm, need more.  Read another block */
 
     {    
+      /* Read many sectors, encompassing at least the requested words.
+       *
+       * The returned c_block encapsulates these sectors' data and
+       * sector number.  The sectors come come from multiple low-level
+       * read requests, and words which were near the boundaries of
+       * those read requests are marked with FLAGS_EDGE.
+       */
       c_block *new=i_read_c_block(p,beginword,endword,callback);
       
       if(new){
 	if(p->enable&(PARANOIA_MODE_OVERLAP|PARANOIA_MODE_VERIFY)){
       
+	  /* If we need to verify these samples, send them to
+	   * stage 1 verification, which will add verified samples
+	   * to the set of verified fragments.  Verified fragments
+	   * will be merged into the verified root during stage 2
+	   * overlap analysis.
+	   */
 	  if(p->enable&PARANOIA_MODE_VERIFY)
 	    i_stage1(p,new,callback);
+
+	  /* If we're only doing overlapping reads (no stage 1
+	   * verification), consider each low-level read in the
+	   * c_block to be a verified fragment.  We exclude the
+	   * edges from these fragments to enforce the requirement
+	   * that we overlap the reads by the minimum amount.
+	   * These fragments will be merged into the verified
+	   * root during stage 2 overlap analysis.
+	   */
 	  else{
 	    /* just make v_fragments from the boundary information. */
 	    long begin=0,end=0;
@@ -1281,6 +2488,11 @@
 	  
 	}else{
 
+	  /* If we're not doing any overlapping reads or verification
+	   * of data, skip over the stage 1 and stage 2 verification and
+	   * promote this c_block directly to the current "verified" root.
+	   */
+
 	  if(p->root.vector)i_cblock_destructor(p->root.vector);
 	  free_elem(new->e,0);
 	  p->root.vector=new;
@@ -1323,9 +2535,19 @@
 	}
       }
     }
-  }
+
+    /* Having read data from the drive and placed it into verified
+     * fragments, we now loop back to try to extend the root with
+     * the newly loaded data.  Alternatively, if the root already
+     * contains the needed data, we'll just fall through.
+     */
+
+  } /* end while */
   p->cursor++;
 
+  /* Return a pointer into the verified root.  Thus, the caller
+   * must NOT free the returned pointer!
+   */
   return(rv(root)+(beginword-rb(root)));
 }
 



More information about the commits mailing list