[vorbis-dev] new mdct
Takehiro Tominaga
tominaga at isoternet.org
Sat Aug 26 01:56:23 PDT 2000
>>>>> "T" == Takehiro Tominaga <tominaga at isoternet.org> writes:
T> and the output is not identical. I think this is becuase of
T> floating point rounding problem, but there may be a bug.
Sorry. That's my silly bug....
I made a new one. check this out.
Basic strategy of optimization is
1 loop integration.
"step1/step2" in old mdct code and "step4,5,6,7/step8" is now merged.
2 memory access order optimization
because almost all the today's computer is optimized for positive
diirection sequencial read/write.
And, I have some question.
you can easyly find we can "more optimize" the mdct_backward by
calculating (window[i]*B[2*i]) in the initilization routine.
mdct_forward is vice verce. but this may make a appling
pre-window time information (a feature extension?) impossible.
in mapping0.c,
mdct_backward(...);
/* now apply the decoded pre-window time information */
/* NOT IMPLEMENTED */
/* window the data */
pcm[] = pcm[]*window[];
So i have a question: what kind of "pre-window time information"
operation do you planing ?
---
Takehiro TOMINAGA // may the source be with you!
diff -uBb vorbis/lib/mapping0.c vorbis.mdct/lib/mapping0.c
--- vorbis/lib/mapping0.c Tue Aug 15 18:09:43 2000
+++ vorbis.mdct/lib/mapping0.c Thu Aug 24 02:55:32 2000
@@ -349,26 +349,16 @@
for(i=0;i<vi->channels;i++){
double *pcm=vb->pcm[i];
_analysis_output("out",seq+i,pcm,n/2,0,1);
- mdct_backward(vd->transform[vb->W][0],pcm,pcm);
- }
+ mdct_backward(vd->transform[vb->W][0],pcm, window);
/* now apply the decoded pre-window time information */
/* NOT IMPLEMENTED */
- /* window the data */
- for(i=0;i<vi->channels;i++){
- double *pcm=vb->pcm[i];
- if(nonzero[i])
- for(j=0;j<n;j++)
- pcm[j]*=window[j];
- else
- for(j=0;j<n;j++)
- pcm[j]=0.;
_analysis_output("final",seq++,pcm,n,0,0);
- }
/* now apply the decoded post-window time information */
/* NOT IMPLEMENTED */
+ }
/* all done! */
return(0);
diff -uBb vorbis/lib/mdct.c vorbis.mdct/lib/mdct.c
--- vorbis/lib/mdct.c Wed Aug 23 23:07:38 2000
+++ vorbis.mdct/lib/mdct.c Fri Aug 25 02:39:41 2000
@@ -49,13 +49,10 @@
double *trig=malloc(sizeof(double)*(n+n/4));
double *AE=trig;
double *AO=trig+1;
- double *BE=AE+n/2;
- double *BO=BE+1;
- double *CE=BE+n/2;
- double *CO=CE+1;
+ double *B=AE+n/2;
int i;
- int log2n=lookup->log2n=rint(log(n)/log(2));
+ int log2n=lookup->log2n=rint(log(n)/log(2))-1;
lookup->n=n;
lookup->trig=trig;
lookup->bitrev=bitrev;
@@ -63,21 +60,30 @@
/* trig lookups... */
for(i=0;i<n/4;i++){
- AE[i*2]=cos((M_PI/n)*(4*i));
- AO[i*2]=-sin((M_PI/n)*(4*i));
- BE[i*2]=cos((M_PI/(2*n))*(2*i+1));
- BO[i*2]=sin((M_PI/(2*n))*(2*i+1));
+ AE[i*2]= cos((M_PI/n)*(4*(n/4-i-1)));
+ AO[i*2]=-sin((M_PI/n)*(4*(n/4-i-1)));
}
+
for(i=0;i<n/8;i++){
- CE[i*2]=cos((M_PI/n)*(4*i+2));
- CO[i*2]=-sin((M_PI/n)*(4*i+2));
+ int j;
+
+ *B++= cos((M_PI/n)*(4*i+2));
+ *B++=-sin((M_PI/n)*(4*i+2));
+
+ j = i;
+ *B++=-cos((M_PI/(2*n))*(2*j+1))*0.5;
+ *B++= tan((M_PI/(2*n))*(2*j+1));
+
+ j = n/4 - i - 1;
+ *B++=-cos((M_PI/(2*n))*(2*j+1))*0.5;
+ *B++= tan((M_PI/(2*n))*(2*j+1));
}
/* bitreverse lookup... */
{
- int mask=(1<<(log2n-1))-1,i,j;
- int msb=1<<(log2n-2);
+ int mask=(1<<log2n)-1,i,j;
+ int msb=1<<(log2n-1);
for(i=0;i<n/8;i++){
int acc=0;
for(j=0;msb>>j;j++)
@@ -96,112 +102,47 @@
}
}
-static double *_mdct_kernel(double *x, double *w,
- int n, int n2, int n4, int n8,
- mdct_lookup *init){
+static void *_mdct_kernel(double *w,
+ int n,int n2,
+ mdct_lookup *init)
+{
int i;
- /* step 2 */
-
- {
- double *xA=x+n4;
- double *xB=x;
- double *w2=w+n4;
- double *A=init->trig+n2;
-
- for(i=0;i<n4;){
- double x0=*xA - *xB;
- double x1;
- w2[i]= *xA++ + *xB++;
-
-
- x1= *xA - *xB;
- A-=4;
-
- w[i++]= x0 * A[0] + x1 * A[1];
- w[i]= x1 * A[0] - x0 * A[1];
-
- w2[i++]= *xA++ + *xB++;
-
- }
- }
/* step 3 */
-
- {
- int r,s;
- for(i=0;i<init->log2n-3;i++){
- int k0=n>>(i+2);
- int k1=1<<(i+3);
- int wbase=n2-2;
- double *A=init->trig;
- double *temp;
-
- for(r=0;r<(k0>>2);r++){
- int w1=wbase;
- int w2=w1-(k0>>1);
- double AEv= A[0],wA;
- double AOv= A[1],wB;
- wbase-=2;
-
- k0++;
- for(s=0;s<(2<<i);s++){
- wB =w[w1] -w[w2];
- x[w1] =w[w1] +w[w2];
-
- wA =w[++w1] -w[++w2];
- x[w1] =w[w1] +w[w2];
-
- x[w2] =wA*AEv - wB*AOv;
- x[w2-1]=wB*AEv + wA*AOv;
-
- w1-=k0;
- w2-=k0;
- }
- k0--;
-
- A+=k1;
- }
-
- temp=w;
- w=x;
- x=temp;
- }
- }
-
- /* step 4, 5, 6, 7 */
- {
- double *C=init->trig+n;
- int *bit=init->bitrev;
- double *x1=x;
- double *x2=x+n2-1;
- for(i=0;i<n8;i++){
- int t1=*bit++;
- int t2=*bit++;
-
- double wA=w[t1]-w[t2+1];
- double wB=w[t1-1]+w[t2];
- double wC=w[t1]+w[t2+1];
- double wD=w[t1-1]-w[t2];
-
- double wACE=wA* *C;
- double wBCE=wB* *C++;
- double wACO=wA* *C;
- double wBCO=wB* *C++;
-
- *x1++=( wC+wACO+wBCE)*.5;
- *x2--=(-wD+wBCO-wACE)*.5;
- *x1++=( wD+wBCO-wACE)*.5;
- *x2--=( wC-wACO-wBCE)*.5;
- }
- }
- return(x);
+ i = 2;
+ do {
+ int k0=n2>>i;
+ int r = -k0;
+
+ do {
+ double *A=init->trig+n2 + ((r+k0)<<i);
+ double AEv= A[-2],AOv= A[-1];
+ int s;
+ r-=2;
+ s=n2+r;
+
+ do {
+ double wB,wA;
+ wB = w[s+k0] - w[s];
+ wA = w[s+k0+1] - w[s+1];
+
+ w[s+k0] = w[s+k0] + w[s];
+ w[s+k0+1] = w[s+k0+1] + w[s+1];
+
+ w[s ] = wB*AEv + wA*AOv;
+ w[s+1] = wA*AEv - wB*AOv;
+ s-=k0*2;
+ } while (s>r);
+ } while (r>-k0*2);
+ i++;
+ } while (i<init->log2n);
}
-void mdct_forward(mdct_lookup *init, double *in, double *out){
+void mdct_forward(mdct_lookup *init, double *in, double *out)
+{
int n=init->n;
double *x=alloca(sizeof(double)*(n/2));
double *w=alloca(sizeof(double)*(n/2));
- double *xx;
int n2=n>>1;
int n4=n>>2;
int n8=n>>3;
@@ -212,118 +153,199 @@
double tempA,tempB;
int in1=n2+n4-4;
int in2=in1+5;
- double *A=init->trig+n2;
-
- i=0;
+ double *A=init->trig;
for(i=0;i<n8;i+=2){
- A-=2;
tempA= in[in1+2] + in[in2];
tempB= in[in1] + in[in2+2];
in1 -=4;in2 +=4;
x[i]= tempB*A[1] + tempA*A[0];
x[i+1]= tempB*A[0] - tempA*A[1];
+ A+=2;
}
in2=1;
for(;i<n2-n8;i+=2){
- A-=2;
tempA= in[in1+2] - in[in2];
tempB= in[in1] - in[in2+2];
in1 -=4;in2 +=4;
x[i]= tempB*A[1] + tempA*A[0];
x[i+1]= tempB*A[0] - tempA*A[1];
+ A+=2;
}
in1=n-4;
for(;i<n2;i+=2){
- A-=2;
tempA= -in[in1+2] - in[in2];
tempB= -in[in1] - in[in2+2];
in1 -=4;in2 +=4;
x[i]= tempB*A[1] + tempA*A[0];
x[i+1]= tempB*A[0] - tempA*A[1];
+ A+=2;
}
}
- xx=_mdct_kernel(x,w,n,n2,n4,n8,init);
+ {
+ double *xA=x+n4;
+ double *xB=x;
+ double *w2=w+n4;
+ double *A=init->trig;
+
+ for(i=0;i<n4;){
+ double x0=*xA - *xB;
+ double x1;
+ w2[i]= *xA++ + *xB++;
+
+
+ x1= *xA - *xB;
+
+ w[i++]= x0 * A[2] + x1 * A[3];
+ w[i]= x1 * A[2] - x0 * A[3];
- /* step 8 */
+ w2[i++]= *xA++ + *xB++;
+ A+=4;
+
+ }
+ }
+ _mdct_kernel(w,n,n2,init);
+ /* step 4, 5, 6, 7 */
{
+ int *bit=init->bitrev;
double *B=init->trig+n2;
- double *out2=out+n2;
- double scale=4./n;
- for(i=0;i<n4;i++){
- out[i] =(xx[0]*B[0]+xx[1]*B[1])*scale;
- *(--out2)=(xx[0]*B[1]-xx[1]*B[0])*scale;
+ double scale=-4./n;
+ i = -n8;
+ do {
+ int t1=*bit++;
+ int t2=*bit++;
+
+ double wA=w[t1]-w[t2+1];
+ double wB=w[t1-1]+w[t2];
+ double wC=w[t1]+w[t2+1];
+ double wD=w[t1-1]-w[t2];
- xx+=2;
- B+=2;
+ double wACE=wA* *B;
+ double wBCE=wB* *B++;
+ double wACO=wA* *B;
+ double wBCO=wB* *B++;
+
+ {
+ double w0 = wC+wACO+wBCE;
+ double w1 = wD+wBCO-wACE;
+
+ out[n8+i] = (w0 + w1*B[1])*scale*B[0];
+ out[n2-n8-i-1]= (w0*B[1] - w1)*scale*B[0];
}
+ {
+ double w0 = wC-wACO-wBCE;
+ double w1 =-wD+wBCO-wACE;
+
+ out[n8-i-1] = (w0 + w1*B[3])*scale*B[2];
+ out[n2-n8+i]= (w0*B[3] - w1)*scale*B[2];
+ }
+ B+=4;
+ i++;
+ } while (i<0);
}
}
-void mdct_backward(mdct_lookup *init, double *in, double *out){
+void mdct_backward(mdct_lookup *init, double *inout, double *window)
+{
int n=init->n;
- double *x=alloca(sizeof(double)*(n/2));
double *w=alloca(sizeof(double)*(n/2));
- double *xx;
int n2=n>>1;
int n4=n>>2;
- int n8=n>>3;
- int i;
- /* rotate + step 1 */
+ /* rotate + step 1,2 */
{
- double *inO=in+1;
- double *xO= x;
- double *A=init->trig+n2;
+ const double *A=init->trig;
+ double *in = inout;
- for(i=0;i<n8;i++){
- A-=2;
- *xO++=-*(inO+2)*A[1] - *inO*A[0];
- *xO++= *inO*A[1] - *(inO+2)*A[0];
- inO+=4;
- }
+ int i = 0;
+ do {
+ double x0, x1, x2;
+ x2 = in[3+i*4]*A[i+1 ] + in[1+i*4]*A[i];
+ x0 = in[n2-4 ]*A[n4+i+1] + in[n2-2 ]*A[n4+i];
- inO=in+n2-4;
+ w[n4+i] = x0 - x2;
+ x0 = x0 + x2;
- for(i=0;i<n8;i++){
- A-=2;
- *xO++=*inO*A[1] + *(inO+2)*A[0];
- *xO++=*inO*A[0] - *(inO+2)*A[1];
- inO-=4;
- }
+ x2 = in[1+i*4]*A[i+1] - in[3+i*4]*A[i];
+ x1 = in[n2-4 ]*A[n4+i] - in[n2-2 ]*A[n4+i+1];
+
+ w[n4+i+1] = x1 + x2;
+ x1 = x1 - x2;
+
+ w[i ] = x0 * A[i*2+0] + x1 * A[i*2+1];
+ w[i+1] = x1 * A[i*2+0] - x0 * A[i*2+1];
+ in -= 4;
+ i += 2;
+ } while (i<n4);
}
- xx=_mdct_kernel(x,w,n,n2,n4,n8,init);
+ _mdct_kernel(w,n,n2,init);
- /* step 8 */
+ /* step 4, 5, 6, 7, 8 */
+ {
+ int *bit=init->bitrev;
+ const double *B=init->trig+n2;
+ int i = -n4/2;
+ inout += n4-i;
+ window += n4-i;
+
+ do {
+ double wC;
+ double wD;
+ double wACE;
+ double wBCE;
+ double wACO;
+ double wBCO;
{
- double *B=init->trig+n2;
- int o1=n4,o2=o1-1;
- int o3=n4+n2,o4=o3-1;
+ double wA;
+ double wB;
+ int t1=*bit++;
+ int t2=*bit++;
+ wA = w[t1]-w[t2+1];
+ wB = w[t1-1]+w[t2];
+ wC = w[t1]+w[t2+1];
+ wD = w[t1-1]-w[t2];
+ wACE = wA * *B;
+ wBCE = wB * *B++;
+ wACO = wA * *B;
+ wBCO = wB * *B++;
+ }
- for(i=0;i<n4;i++){
- double temp1= (*xx * B[1] - *(xx+1) * B[0]);
- double temp2=-(*xx * B[0] + *(xx+1) * B[1]);
-
- out[o1]=-temp1;
- out[o2]= temp1;
- out[o3]= temp2;
- out[o4]= temp2;
-
- o1++;
- o2--;
- o3++;
- o4--;
- xx+=2;
- B+=2;
+ {
+ double w0 = wC+wACO+wBCE;
+ double w1 = wD+wBCO-wACE;
+
+ double wA = (w0*B[1] - w1) * B[0];
+ double wB = (w0 + B[1]*w1) * B[0];
+
+ inout[i] = wA * window[i];
+ inout[n4-1-i] = wB * window[n4-1-i];
+ inout[-1-n4-i ] =-wA * window[-1-n4-i];
+ inout[n2+i] = wB * window[n2+i];
}
+
+ {
+ double w0 = wC-wACO-wBCE;
+ double w1 =-wD+wBCO-wACE;
+
+ double wA = (w0*B[3] - w1) * B[2];
+ double wB = (w0 + B[3]*w1) * B[2];
+
+ inout[-i-1] = wA * window[-i-1];
+ inout[n4+i] = wB * window[n4+i];
+ inout[-n4+i] =-wA * window[-n4+i];
+ inout[n2-i-1] = wB * window[n2-i-1];
+ }
+ B+=4;
+ i++;
+ } while (i<0);
}
}
--- >8 ----
List archives: http://www.xiph.org/archives/
Ogg project homepage: http://www.xiph.org/ogg/
More information about the Vorbis-dev
mailing list