[xiph-commits] r12725 - trunk/speex/libspeex

jm at svn.xiph.org jm at svn.xiph.org
Sun Mar 11 05:49:44 PDT 2007


Author: jm
Date: 2007-03-11 05:49:41 -0700 (Sun, 11 Mar 2007)
New Revision: 12725

Modified:
   trunk/speex/libspeex/kiss_fft.c
Log:
Reducing the number of butterfly function calls.


Modified: trunk/speex/libspeex/kiss_fft.c
===================================================================
--- trunk/speex/libspeex/kiss_fft.c	2007-03-11 05:17:56 UTC (rev 12724)
+++ trunk/speex/libspeex/kiss_fft.c	2007-03-11 12:49:41 UTC (rev 12725)
@@ -28,107 +28,138 @@
         kiss_fft_cpx * Fout,
         const size_t fstride,
         const kiss_fft_cfg st,
-        int m
+        int m,
+        int N,
+        int mm
         )
 {
     kiss_fft_cpx * Fout2;
-    kiss_fft_cpx * tw1 = st->twiddles;
+    kiss_fft_cpx * tw1;
     kiss_fft_cpx t;
-    Fout2 = Fout + m;
     if (!st->inverse) {
-       int i;
+       int i,j;
        kiss_fft_cpx *x=Fout;
-       for (i=0;i<2*m;i++)
+       for (i=0;i<2*m*N;i++)
        {
           x[i].r = SHR16(x[i].r,1);
           x[i].i = SHR16(x[i].i,1);
        }
+       kiss_fft_cpx * Fout_beg = Fout;
+       for (i=0;i<N;i++)
+       {
+          Fout = Fout_beg + i*mm;
+          Fout2 = Fout + m;
+          tw1 = st->twiddles;
+          for(j=0;j<m;j++)
+          {
+             C_MUL (t,  *Fout2 , *tw1);
+             tw1 += fstride;
+             C_SUB( *Fout2 ,  *Fout , t );
+             C_ADDTO( *Fout ,  t );
+             ++Fout2;
+             ++Fout;
+          }
+       }
+    } else {
+       int i,j;
+       kiss_fft_cpx * Fout_beg = Fout;
+       for (i=0;i<N;i++)
+       {
+          Fout = Fout_beg + i*mm;
+          Fout2 = Fout + m;
+          tw1 = st->twiddles;
+          for(j=0;j<m;j++)
+          {
+             C_MUL (t,  *Fout2 , *tw1);
+             tw1 += fstride;
+             C_SUB( *Fout2 ,  *Fout , t );
+             C_ADDTO( *Fout ,  t );
+             ++Fout2;
+             ++Fout;
+          }
+       }
     }
-
-    do{
-        C_MUL (t,  *Fout2 , *tw1);
-        tw1 += fstride;
-        C_SUB( *Fout2 ,  *Fout , t );
-        C_ADDTO( *Fout ,  t );
-        ++Fout2;
-        ++Fout;
-    }while (--m);
 }
 
 static void kf_bfly4(
         kiss_fft_cpx * Fout,
         const size_t fstride,
         const kiss_fft_cfg st,
-        const size_t m
+        const size_t m,
+        int N,
+        int mm
         )
 {
     kiss_fft_cpx *tw1,*tw2,*tw3;
     kiss_fft_cpx scratch[6];
-    size_t k=m;
     const size_t m2=2*m;
     const size_t m3=3*m;
+    int i, j;
 
-    tw3 = tw2 = tw1 = st->twiddles;
-
-    /*if (!st->inverse) {
-       int i;
-       kiss_fft_cpx *x=Fout;
-       for (i=0;i<4*m;i++)
+    if (st->inverse)
+    {
+       kiss_fft_cpx * Fout_beg = Fout;
+       for (i=0;i<N;i++)
        {
-          x[i].r = PSHR16(x[i].r,2);
-          x[i].i = PSHR16(x[i].i,2);
+          Fout = Fout_beg + i*mm;
+          tw3 = tw2 = tw1 = st->twiddles;
+          for (j=0;j<m;j++)
+          {
+             C_MUL(scratch[0],Fout[m] , *tw1 );
+             C_MUL(scratch[1],Fout[m2] , *tw2 );
+             C_MUL(scratch[2],Fout[m3] , *tw3 );
+             
+             C_SUB( scratch[5] , *Fout, scratch[1] );
+             C_ADDTO(*Fout, scratch[1]);
+             C_ADD( scratch[3] , scratch[0] , scratch[2] );
+             C_SUB( scratch[4] , scratch[0] , scratch[2] );
+             C_SUB( Fout[m2], *Fout, scratch[3] );
+             tw1 += fstride;
+             tw2 += fstride*2;
+             tw3 += fstride*3;
+             C_ADDTO( *Fout , scratch[3] );
+             
+             Fout[m].r = scratch[5].r - scratch[4].i;
+             Fout[m].i = scratch[5].i + scratch[4].r;
+             Fout[m3].r = scratch[5].r + scratch[4].i;
+             Fout[m3].i = scratch[5].i - scratch[4].r;
+             ++Fout;
+          }
        }
-    }*/
-    if (st->inverse)
-    {
-       do {
-          C_MUL(scratch[0],Fout[m] , *tw1 );
-          C_MUL(scratch[1],Fout[m2] , *tw2 );
-          C_MUL(scratch[2],Fout[m3] , *tw3 );
-          
-          C_SUB( scratch[5] , *Fout, scratch[1] );
-          C_ADDTO(*Fout, scratch[1]);
-          C_ADD( scratch[3] , scratch[0] , scratch[2] );
-          C_SUB( scratch[4] , scratch[0] , scratch[2] );
-          C_SUB( Fout[m2], *Fout, scratch[3] );
-          tw1 += fstride;
-          tw2 += fstride*2;
-          tw3 += fstride*3;
-          C_ADDTO( *Fout , scratch[3] );
-
-          Fout[m].r = scratch[5].r - scratch[4].i;
-          Fout[m].i = scratch[5].i + scratch[4].r;
-          Fout[m3].r = scratch[5].r + scratch[4].i;
-          Fout[m3].i = scratch[5].i - scratch[4].r;
-          ++Fout;
-       } while(--k);
     } else
     {
-       do {
-          C_MUL4(scratch[0],Fout[m] , *tw1 );
-          C_MUL4(scratch[1],Fout[m2] , *tw2 );
-          C_MUL4(scratch[2],Fout[m3] , *tw3 );
-          
-          Fout->r = PSHR16(Fout->r, 2);
-          Fout->i = PSHR16(Fout->i, 2);
-          C_SUB( scratch[5] , *Fout, scratch[1] );
-          C_ADDTO(*Fout, scratch[1]);
-          C_ADD( scratch[3] , scratch[0] , scratch[2] );
-          C_SUB( scratch[4] , scratch[0] , scratch[2] );
-          Fout[m2].r = PSHR16(Fout[m2].r, 2);
-          Fout[m2].i = PSHR16(Fout[m2].i, 2);
-          C_SUB( Fout[m2], *Fout, scratch[3] );
-          tw1 += fstride;
-          tw2 += fstride*2;
-          tw3 += fstride*3;
-          C_ADDTO( *Fout , scratch[3] );
-          
-          Fout[m].r = scratch[5].r + scratch[4].i;
-          Fout[m].i = scratch[5].i - scratch[4].r;
-          Fout[m3].r = scratch[5].r - scratch[4].i;
-          Fout[m3].i = scratch[5].i + scratch[4].r;
-          ++Fout;
-       }while(--k);
+       kiss_fft_cpx * Fout_beg = Fout;
+       for (i=0;i<N;i++)
+       {
+          Fout = Fout_beg + i*mm;
+          tw3 = tw2 = tw1 = st->twiddles;
+          for (j=0;j<m;j++)
+          {
+             C_MUL4(scratch[0],Fout[m] , *tw1 );
+             C_MUL4(scratch[1],Fout[m2] , *tw2 );
+             C_MUL4(scratch[2],Fout[m3] , *tw3 );
+             
+             Fout->r = PSHR16(Fout->r, 2);
+             Fout->i = PSHR16(Fout->i, 2);
+             C_SUB( scratch[5] , *Fout, scratch[1] );
+             C_ADDTO(*Fout, scratch[1]);
+             C_ADD( scratch[3] , scratch[0] , scratch[2] );
+             C_SUB( scratch[4] , scratch[0] , scratch[2] );
+             Fout[m2].r = PSHR16(Fout[m2].r, 2);
+             Fout[m2].i = PSHR16(Fout[m2].i, 2);
+             C_SUB( Fout[m2], *Fout, scratch[3] );
+             tw1 += fstride;
+             tw2 += fstride*2;
+             tw3 += fstride*3;
+             C_ADDTO( *Fout , scratch[3] );
+             
+             Fout[m].r = scratch[5].r + scratch[4].i;
+             Fout[m].i = scratch[5].i - scratch[4].r;
+             Fout[m3].r = scratch[5].r - scratch[4].i;
+             Fout[m3].i = scratch[5].i + scratch[4].r;
+             ++Fout;
+          }
+       }
     }
 }
 
@@ -284,6 +315,40 @@
         }
     }
 }
+#include <stdio.h>
+               
+static
+void kf_shuffle(
+         kiss_fft_cpx * Fout,
+         const kiss_fft_cpx * f,
+         const size_t fstride,
+         int in_stride,
+         int * factors,
+         const kiss_fft_cfg st
+            )
+{
+   const int p=*factors++; /* the radix  */
+   const int m=*factors++; /* stage's fft length/p */
+   
+    /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
+   if (m==1)
+   {
+      int j;
+      for (j=0;j<p;j++)
+      {
+         Fout[j] = *f;
+         f += fstride*in_stride;
+      }
+   } else {
+      int j;
+      for (j=0;j<p;j++)
+      {
+         kf_shuffle( Fout , f, fstride*p, in_stride, factors,st);
+         f += fstride*in_stride;
+         Fout += m;
+      }
+   }
+}
 
 static
 void kf_work(
@@ -292,24 +357,34 @@
         const size_t fstride,
         int in_stride,
         int * factors,
-        const kiss_fft_cfg st
+        const kiss_fft_cfg st,
+        int N,
+        int s2,
+        int m2
         )
 {
+   int i;
     kiss_fft_cpx * Fout_beg=Fout;
     const int p=*factors++; /* the radix  */
     const int m=*factors++; /* stage's fft length/p */
-    const kiss_fft_cpx * Fout_end = Fout + p*m;
-
-    if (m==1) {
-        do{
-            *Fout = *f;
-            f += fstride*in_stride;
-        }while(++Fout != Fout_end );
-    }else{
-        do{
-            kf_work( Fout , f, fstride*p, in_stride, factors,st);
-            f += fstride*in_stride;
-        }while( (Fout += m) != Fout_end );
+#if 0
+    /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
+    if (m==1)
+    {
+    /*   int j;
+       for (j=0;j<p;j++)
+       {
+          Fout[j] = *f;
+          f += fstride*in_stride;
+       }*/
+    } else {
+       int j;
+       for (j=0;j<p;j++)
+       {
+          kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m);
+          f += fstride*in_stride;
+          Fout += m;
+       }
     }
 
     Fout=Fout_beg;
@@ -321,6 +396,36 @@
         case 5: kf_bfly5(Fout,fstride,st,m); break; 
         default: kf_bfly_generic(Fout,fstride,st,m,p); break;
     }
+#else
+    /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/
+    if (m==1) 
+    {
+       /*for (i=0;i<N;i++)
+       {
+          int j;
+          Fout = Fout_beg+i*m2;
+          const kiss_fft_cpx * f2 = f+i*s2;
+          for (j=0;j<p;j++)
+          {
+             *Fout++ = *f2;
+             f2 += fstride*in_stride;
+          }
+       }*/
+    }else{
+       kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m);
+    }
+
+    
+       
+       
+       switch (p) {
+          case 2: kf_bfly2(Fout,fstride,st,m, N, m2); break;
+          case 3: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly3(Fout,fstride,st,m);} break; 
+          case 4: kf_bfly4(Fout,fstride,st,m, N, m2); break;
+          case 5: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly5(Fout,fstride,st,m);} break; 
+          default: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly_generic(Fout,fstride,st,m,p);} break;
+    }    
+#endif
 }
 
 /*  facbuf is populated by p1,m1,p2,m2, ...
@@ -405,7 +510,8 @@
        kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
        speex_move(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);*/
     } else {
-       kf_work( fout, fin, 1,in_stride, st->factors,st );
+       kf_shuffle( fout, fin, 1,in_stride, st->factors,st);
+       kf_work( fout, fin, 1,in_stride, st->factors,st, 1, in_stride, 1);
     }
 }
 



More information about the commits mailing list