[vorbis-dev] new mdct

Takehiro Tominaga tominaga at isoternet.org
Sat Aug 26 01:56:23 PDT 2000



>>>>> "T" == Takehiro Tominaga <tominaga at isoternet.org> writes:

    T> and the output is not identical. I think this is becuase of
    T> floating point rounding problem, but there may be a bug.

Sorry. That's my silly bug....
I made a new one. check this out.

Basic strategy of optimization is

1 loop integration.
  "step1/step2" in old mdct code and "step4,5,6,7/step8" is now merged.

2 memory access order optimization
  because almost all the today's computer is optimized for positive
diirection sequencial read/write.

And, I have some question.

you can easyly find we can "more optimize" the mdct_backward by
calculating (window[i]*B[2*i]) in the initilization routine.
mdct_forward is vice verce. but this may make a appling
pre-window time information (a feature extension?) impossible.

in mapping0.c,

   mdct_backward(...);
  /* now apply the decoded pre-window time information */
  /* NOT IMPLEMENTED */

  /* window the data */
  pcm[] = pcm[]*window[];

So i have a question: what kind of "pre-window time information"
operation do you planing ?
--- 
Takehiro TOMINAGA // may the source be with you!

diff -uBb vorbis/lib/mapping0.c vorbis.mdct/lib/mapping0.c
--- vorbis/lib/mapping0.c	Tue Aug 15 18:09:43 2000
+++ vorbis.mdct/lib/mapping0.c	Thu Aug 24 02:55:32 2000
@@ -349,26 +349,16 @@
   for(i=0;i<vi->channels;i++){
     double *pcm=vb->pcm[i];
     _analysis_output("out",seq+i,pcm,n/2,0,1);
-    mdct_backward(vd->transform[vb->W][0],pcm,pcm);
-  }
+    mdct_backward(vd->transform[vb->W][0],pcm, window);
 
   /* now apply the decoded pre-window time information */
   /* NOT IMPLEMENTED */
   
-  /* window the data */
-  for(i=0;i<vi->channels;i++){
-    double *pcm=vb->pcm[i];
-    if(nonzero[i])
-      for(j=0;j<n;j++)
-	pcm[j]*=window[j];
-    else
-      for(j=0;j<n;j++)
-	pcm[j]=0.;
     _analysis_output("final",seq++,pcm,n,0,0);
-  }
             
   /* now apply the decoded post-window time information */
   /* NOT IMPLEMENTED */
+  }
 
   /* all done! */
   return(0);
diff -uBb vorbis/lib/mdct.c vorbis.mdct/lib/mdct.c
--- vorbis/lib/mdct.c	Wed Aug 23 23:07:38 2000
+++ vorbis.mdct/lib/mdct.c	Fri Aug 25 02:39:41 2000
@@ -49,13 +49,10 @@
   double *trig=malloc(sizeof(double)*(n+n/4));
   double *AE=trig;
   double *AO=trig+1;
-  double *BE=AE+n/2;
-  double *BO=BE+1;
-  double *CE=BE+n/2;
-  double *CO=CE+1;
+  double *B=AE+n/2;
   
   int i;
-  int log2n=lookup->log2n=rint(log(n)/log(2));
+  int log2n=lookup->log2n=rint(log(n)/log(2))-1;
   lookup->n=n;
   lookup->trig=trig;
   lookup->bitrev=bitrev;
@@ -63,21 +60,30 @@
   /* trig lookups... */
 
   for(i=0;i<n/4;i++){
-    AE[i*2]=cos((M_PI/n)*(4*i));
-    AO[i*2]=-sin((M_PI/n)*(4*i));
-    BE[i*2]=cos((M_PI/(2*n))*(2*i+1));
-    BO[i*2]=sin((M_PI/(2*n))*(2*i+1));
+    AE[i*2]= cos((M_PI/n)*(4*(n/4-i-1)));
+    AO[i*2]=-sin((M_PI/n)*(4*(n/4-i-1)));
   }
+
   for(i=0;i<n/8;i++){
-    CE[i*2]=cos((M_PI/n)*(4*i+2));
-    CO[i*2]=-sin((M_PI/n)*(4*i+2));
+    int j;
+
+    *B++= cos((M_PI/n)*(4*i+2));
+    *B++=-sin((M_PI/n)*(4*i+2));
+
+    j = i;
+    *B++=-cos((M_PI/(2*n))*(2*j+1))*0.5;
+    *B++= tan((M_PI/(2*n))*(2*j+1));
+
+    j = n/4 - i - 1;
+    *B++=-cos((M_PI/(2*n))*(2*j+1))*0.5;
+    *B++= tan((M_PI/(2*n))*(2*j+1));
   }
 
   /* bitreverse lookup... */
 
   {
-    int mask=(1<<(log2n-1))-1,i,j;
-    int msb=1<<(log2n-2);
+    int mask=(1<<log2n)-1,i,j;
+    int msb=1<<(log2n-1);
     for(i=0;i<n/8;i++){
       int acc=0;
       for(j=0;msb>>j;j++)
@@ -96,112 +102,47 @@
   }
 }
 
-static double *_mdct_kernel(double *x, double *w,
-			    int n, int n2, int n4, int n8,
-			    mdct_lookup *init){
+static void *_mdct_kernel(double *w,
+			  int n,int n2,
+			  mdct_lookup *init)
+{
   int i;
-  /* step 2 */
-
-  {
-    double *xA=x+n4;
-    double *xB=x;
-    double *w2=w+n4;
-    double *A=init->trig+n2;
-
-    for(i=0;i<n4;){
-      double x0=*xA - *xB;
-      double x1;
-      w2[i]=    *xA++ + *xB++;
-
-
-      x1=       *xA - *xB;
-      A-=4;
-
-      w[i++]=   x0 * A[0] + x1 * A[1];
-      w[i]=     x1 * A[0] - x0 * A[1];
-
-      w2[i++]=  *xA++ + *xB++;
-
-    }
-  }
 
   /* step 3 */
-
-  {
-    int r,s;
-    for(i=0;i<init->log2n-3;i++){
-      int k0=n>>(i+2);
-      int k1=1<<(i+3);
-      int wbase=n2-2;
-      double *A=init->trig;
-      double *temp;
-
-      for(r=0;r<(k0>>2);r++){
-        int w1=wbase;
-	int w2=w1-(k0>>1);
-	double AEv= A[0],wA;
-	double AOv= A[1],wB;
-	wbase-=2;
-
-	k0++;
-	for(s=0;s<(2<<i);s++){
-	  wB     =w[w1]   -w[w2];
-	  x[w1]  =w[w1]   +w[w2];
-
-	  wA     =w[++w1] -w[++w2];
-	  x[w1]  =w[w1]   +w[w2];
-
-	  x[w2]  =wA*AEv  - wB*AOv;
-	  x[w2-1]=wB*AEv  + wA*AOv;
-
-	  w1-=k0;
-	  w2-=k0;
-	}
-	k0--;
-
-	A+=k1;
-      }
-
-      temp=w;
-      w=x;
-      x=temp;
-    }
-  }
-
-  /* step 4, 5, 6, 7 */
-  {
-    double *C=init->trig+n;
-    int *bit=init->bitrev;
-    double *x1=x;
-    double *x2=x+n2-1;
-    for(i=0;i<n8;i++){
-      int t1=*bit++;
-      int t2=*bit++;
-
-      double wA=w[t1]-w[t2+1];
-      double wB=w[t1-1]+w[t2];
-      double wC=w[t1]+w[t2+1];
-      double wD=w[t1-1]-w[t2];
-
-      double wACE=wA* *C;
-      double wBCE=wB* *C++;
-      double wACO=wA* *C;
-      double wBCO=wB* *C++;
-      
-      *x1++=( wC+wACO+wBCE)*.5;
-      *x2--=(-wD+wBCO-wACE)*.5;
-      *x1++=( wD+wBCO-wACE)*.5; 
-      *x2--=( wC-wACO-wBCE)*.5;
-    }
-  }
-  return(x);
+  i = 2;
+  do {
+    int k0=n2>>i;
+    int r = -k0;
+
+    do {
+      double *A=init->trig+n2 + ((r+k0)<<i);
+      double AEv= A[-2],AOv= A[-1];
+      int s;
+      r-=2;
+      s=n2+r;
+
+      do {
+	double wB,wA;
+	wB = w[s+k0]   - w[s];
+	wA = w[s+k0+1] - w[s+1];
+
+	w[s+k0]   = w[s+k0]   + w[s];
+	w[s+k0+1] = w[s+k0+1] + w[s+1];
+
+	w[s  ] = wB*AEv  + wA*AOv;
+	w[s+1] = wA*AEv  - wB*AOv;
+	s-=k0*2;
+      } while (s>r);
+    } while (r>-k0*2);
+    i++;
+  } while (i<init->log2n);
 }
 
-void mdct_forward(mdct_lookup *init, double *in, double *out){
+void mdct_forward(mdct_lookup *init, double *in, double *out)
+{
   int n=init->n;
   double *x=alloca(sizeof(double)*(n/2));
   double *w=alloca(sizeof(double)*(n/2));
-  double *xx;
   int n2=n>>1;
   int n4=n>>2;
   int n8=n>>3;
@@ -212,118 +153,199 @@
     double tempA,tempB;
     int in1=n2+n4-4;
     int in2=in1+5;
-    double *A=init->trig+n2;
-
-    i=0;
+    double *A=init->trig;
     
     for(i=0;i<n8;i+=2){
-      A-=2;
       tempA= in[in1+2] + in[in2];
       tempB= in[in1] + in[in2+2];       
       in1 -=4;in2 +=4;
       x[i]=   tempB*A[1] + tempA*A[0];
       x[i+1]= tempB*A[0] - tempA*A[1];
+      A+=2;
     }
 
     in2=1;
 
     for(;i<n2-n8;i+=2){
-      A-=2;
       tempA= in[in1+2] - in[in2];
       tempB= in[in1] - in[in2+2];       
       in1 -=4;in2 +=4;
       x[i]=   tempB*A[1] + tempA*A[0];
       x[i+1]= tempB*A[0] - tempA*A[1];
+      A+=2;
     }
     
     in1=n-4;
 
     for(;i<n2;i+=2){
-      A-=2;
       tempA= -in[in1+2] - in[in2];
       tempB= -in[in1] - in[in2+2];       
       in1 -=4;in2 +=4;
       x[i]=   tempB*A[1] + tempA*A[0];
       x[i+1]= tempB*A[0] - tempA*A[1];
+      A+=2;
     }
   }
 
-  xx=_mdct_kernel(x,w,n,n2,n4,n8,init);
+  {
+    double *xA=x+n4;
+    double *xB=x;
+    double *w2=w+n4;
+    double *A=init->trig;
+
+    for(i=0;i<n4;){
+      double x0=*xA - *xB;
+      double x1;
+      w2[i]=    *xA++ + *xB++;
+
+
+      x1=       *xA - *xB;
+
+      w[i++]=   x0 * A[2] + x1 * A[3];
+      w[i]=     x1 * A[2] - x0 * A[3];
 
-  /* step 8 */
+      w2[i++]=  *xA++ + *xB++;
+      A+=4;
+
+    }
+  }
 
+  _mdct_kernel(w,n,n2,init);
+  /* step 4, 5, 6, 7 */
   {
+    int *bit=init->bitrev;
     double *B=init->trig+n2;
-    double *out2=out+n2;
-    double scale=4./n;
-    for(i=0;i<n4;i++){
-      out[i]   =(xx[0]*B[0]+xx[1]*B[1])*scale;
-      *(--out2)=(xx[0]*B[1]-xx[1]*B[0])*scale;
+    double scale=-4./n;
+    i = -n8;
+    do {
+      int t1=*bit++;
+      int t2=*bit++;
+
+      double wA=w[t1]-w[t2+1];
+      double wB=w[t1-1]+w[t2];
+      double wC=w[t1]+w[t2+1];
+      double wD=w[t1-1]-w[t2];
 
-      xx+=2;
-      B+=2;
+      double wACE=wA* *B;
+      double wBCE=wB* *B++;
+      double wACO=wA* *B;
+      double wBCO=wB* *B++;
+
+      {
+	double w0 = wC+wACO+wBCE;
+	double w1 = wD+wBCO-wACE;
+
+	out[n8+i]     = (w0 + w1*B[1])*scale*B[0];
+	out[n2-n8-i-1]= (w0*B[1] - w1)*scale*B[0];
     }
+      {
+	double w0 = wC-wACO-wBCE;
+	double w1 =-wD+wBCO-wACE;
+
+	out[n8-i-1] = (w0 + w1*B[3])*scale*B[2];
+	out[n2-n8+i]= (w0*B[3] - w1)*scale*B[2];
+      }
+      B+=4;
+      i++;
+    } while (i<0);
   }
 }
 
-void mdct_backward(mdct_lookup *init, double *in, double *out){
+void mdct_backward(mdct_lookup *init, double *inout, double *window)
+{
   int n=init->n;
-  double *x=alloca(sizeof(double)*(n/2));
   double *w=alloca(sizeof(double)*(n/2));
-  double *xx;
   int n2=n>>1;
   int n4=n>>2;
-  int n8=n>>3;
-  int i;
 
-  /* rotate + step 1 */
+  /* rotate + step 1,2 */
   {
-    double *inO=in+1;
-    double  *xO= x;
-    double  *A=init->trig+n2;
+    const double  *A=init->trig;
+    double *in = inout;
 
-    for(i=0;i<n8;i++){
-      A-=2;
-      *xO++=-*(inO+2)*A[1] - *inO*A[0];
-      *xO++= *inO*A[1] - *(inO+2)*A[0];
-      inO+=4;
-    }
+    int i = 0;
+    do {
+      double x0, x1, x2;
+      x2 = in[3+i*4]*A[i+1   ] + in[1+i*4]*A[i];
+      x0 = in[n2-4 ]*A[n4+i+1] + in[n2-2 ]*A[n4+i];
 
-    inO=in+n2-4;
+      w[n4+i] = x0 - x2;
+      x0      = x0 + x2;
 
-    for(i=0;i<n8;i++){
-      A-=2;
-      *xO++=*inO*A[1] + *(inO+2)*A[0];
-      *xO++=*inO*A[0] - *(inO+2)*A[1];
-      inO-=4;
-    }
+      x2 = in[1+i*4]*A[i+1]  - in[3+i*4]*A[i];
+      x1 = in[n2-4 ]*A[n4+i] - in[n2-2 ]*A[n4+i+1];
+
+      w[n4+i+1] = x1 + x2;
+      x1        = x1 - x2;
+
+      w[i  ] = x0 * A[i*2+0] + x1 * A[i*2+1];
+      w[i+1] = x1 * A[i*2+0] - x0 * A[i*2+1];
 
+      in -= 4;
+      i += 2;
+    } while (i<n4);
   }
 
-  xx=_mdct_kernel(x,w,n,n2,n4,n8,init);
+  _mdct_kernel(w,n,n2,init);
 
-  /* step 8 */
+  /* step 4, 5, 6, 7, 8 */
+  {
+    int *bit=init->bitrev;
+    const double *B=init->trig+n2;
+    int i = -n4/2;
+    inout += n4-i;
+    window += n4-i;
+
+    do {
+      double wC;
+      double wD;
+      double wACE;
+      double wBCE;
+      double wACO;
+      double wBCO;
 
   {
-    double *B=init->trig+n2;
-    int o1=n4,o2=o1-1;
-    int o3=n4+n2,o4=o3-1;
+	double wA;
+	double wB;
+	int t1=*bit++;
+	int t2=*bit++;
+	wA = w[t1]-w[t2+1];
+	wB = w[t1-1]+w[t2];
+	wC = w[t1]+w[t2+1];
+	wD = w[t1-1]-w[t2];
+	wACE = wA * *B;
+	wBCE = wB * *B++;
+	wACO = wA * *B;
+	wBCO = wB * *B++;
+      }
     
-    for(i=0;i<n4;i++){
-      double temp1= (*xx * B[1] - *(xx+1) * B[0]);
-      double temp2=-(*xx * B[0] + *(xx+1) * B[1]);
-    
-      out[o1]=-temp1;
-      out[o2]= temp1;
-      out[o3]= temp2;
-      out[o4]= temp2;
-
-      o1++;
-      o2--;
-      o3++;
-      o4--;
-      xx+=2;
-      B+=2;
+      {
+	double w0 = wC+wACO+wBCE;
+	double w1 = wD+wBCO-wACE;
+
+	double wA = (w0*B[1] - w1) * B[0];
+	double wB = (w0 + B[1]*w1) * B[0];
+
+	inout[i]         = wA * window[i];
+	inout[n4-1-i] = wB * window[n4-1-i];
+	inout[-1-n4-i  ] =-wA * window[-1-n4-i];
+	inout[n2+i]      = wB * window[n2+i];
     }
+
+      {
+	double w0  = wC-wACO-wBCE;
+	double w1  =-wD+wBCO-wACE;
+
+	double wA = (w0*B[3] - w1) * B[2];
+	double wB = (w0 + B[3]*w1) * B[2];
+
+	inout[-i-1]   = wA * window[-i-1];
+	inout[n4+i]   = wB * window[n4+i];
+	inout[-n4+i]  =-wA * window[-n4+i];
+	inout[n2-i-1] = wB * window[n2-i-1];
+      }
+      B+=4;
+      i++;
+    } while (i<0);
   }
 }

--- >8 ----
List archives:  http://www.xiph.org/archives/
Ogg project homepage: http://www.xiph.org/ogg/



More information about the Vorbis-dev mailing list