[xiph-commits] r13769 - trunk/ghost/libghost
jm at svn.xiph.org
jm at svn.xiph.org
Fri Sep 7 03:05:38 PDT 2007
Author: jm
Date: 2007-09-07 03:05:37 -0700 (Fri, 07 Sep 2007)
New Revision: 13769
Modified:
trunk/ghost/libghost/ceft.c
Log:
Added a bunch of comments on the CEFT code
Modified: trunk/ghost/libghost/ceft.c
===================================================================
--- trunk/ghost/libghost/ceft.c 2007-09-06 23:52:39 UTC (rev 13768)
+++ trunk/ghost/libghost/ceft.c 2007-09-07 10:05:37 UTC (rev 13769)
@@ -26,30 +26,28 @@
#include "fftwrap.h"
+/* Number of bands to consider, excliding the DC */
#define NBANDS 15
+
+/* Start frequency of each band. The two extra elements are for the end of the last band (+1) and the end of the array itself */
int qbank[] = {1, 2, 4, 6, 8, 12, 16, 20, 24, 28, 36, 44, 52, 68, 84, 116, 128};
-//int qpulses[] = {2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
-//int qpulses[] = {2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0};
-//int qpulses[] = {2, 3, 2, 2, 3, 2, 2, 2, 1, 2, 1, 0, 0, 0, 0};
-//int qpulses[] = {3, 4, 3, 2, 3, 2, 2, 2, 1, 2, 1, 0, 0, 0, 0};
+/* Number of pulses in each band. The number of bits for each band with a non-zero
+ number of pulses is equal to (1 + nb_pulses * log2 (2 * width_of_band) ) */
//32 kbps
-int qpulses[] = {3, 4, 4, 3, 3, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0};
+int qpulses[] = {3, 4, 4, 3, 3, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0}; //85 bits
//44 kbps
-//int qpulses[] = {4, 7, 6, 4, 4, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0};
+//int qpulses[] = {4, 7, 6, 4, 4, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0}; //134 bits
-//int qpulses[] = {5, 5, 5, 5, 5, 5, 2, 2, 1, 2, 1, 0, 0, 0, 0};
-//int qpulses[] = {5, 5, 2, 2, 3, 2, 5, 5, 5, 5, 5, 5, 0, 0, 0};
-
-//int qpulses[] = {4, 4, 3, 3, 3, 3, 3, 2, 2, 3, 2, 2, 0, 0, 0};
-//int qpulses[] = {5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5};
-
-
+/* Number of bands only for the pitch prediction */
#define PBANDS 5
+/* Start frequency of each band */
int pbank[] = {1, 4, 8, 12, 20, 44};
+/* Algebraic pulse-base quantiser. The signal x is replaced by the sum of the pitch
+ a combination of pulses such that its norm is still equal to 1 */
void alg_quant(float *x, int N, int K, float *p)
{
float y[N];
@@ -107,6 +105,9 @@
}
+/* Improved algebraic pulse-base quantiser. The signal x is replaced by the sum of the pitch
+ a combination of pulses such that its norm is still equal to 1. The only difference with
+ the quantiser above is that the search is more complete. */
void alg_quant2(float *x, int N, int K, float *p)
{
int L = 5;
@@ -215,6 +216,7 @@
}
+/* Just replace the band with noise of unit energy */
void noise_quant(float *x, int N, int K, float *p)
{
int i;
@@ -231,6 +233,7 @@
}
}
+/* Compute the energy in each of the bands */
void compute_bank(float *X, float *bank)
{
int i;
@@ -243,11 +246,11 @@
bank[i] += X[j*2-1]*X[j*2-1];
bank[i] += X[j*2]*X[j*2];
}
- //bank[i] = sqrt(.5*bank[i]/(qbank[i+1]-qbank[i]));
bank[i] = sqrt(bank[i]);
}
}
+/* Normalise each band such that the energy is one. */
void normalise_bank(float *X, float *bank)
{
int i;
@@ -265,6 +268,7 @@
X[i] = 0;
}
+/* De-normalise the energy to produce the synthesis from the unit-energy bands */
void denormalise_bank(float *X, float *bank)
{
int i;
@@ -282,6 +286,7 @@
X[255] = 0;
}
+/* L2-norm of a vector */
float norm2(float *x, int len)
{
float E=0;
@@ -291,6 +296,7 @@
return E;
}
+/* Really crappy DFT for test purposes. May be harmful to young children */
void crappy_fft(float *X, int len, int R, int dir)
{
int i, j;
@@ -317,6 +323,7 @@
X[i] = out[i]/sqrt(len/2);
}
+/* Really crappy real-value DFT for test purposes. May be harmful to young children */
void crappy_rfft(float *X, int len, int R, int dir)
{
int N=len*2;
@@ -358,6 +365,7 @@
printf ("\n");*/
}
+/* Applies a series of rotations so that pulses are spread like a two-sided exponential */
void exp_rotation(float *X, int len, float theta, int dir)
{
int i;
@@ -424,6 +432,7 @@
}
}
+/* Apply a rotation to all bands to spread the effect of pulses */
void random_rotation(float *X, int R, int dir)
{
int i;
@@ -443,7 +452,7 @@
//printf ("\n");
}
-
+/* Quantise the normalised signal in each band */
void quant_bank(float *X, float *P, float centre)
{
int i;
@@ -466,6 +475,7 @@
X[255] = 0;
}
+/* Compute the best gain for each "pitch band" */
void compute_pitch_gain(float *X, float *P, float *gains, float *bank)
{
int i;
@@ -511,6 +521,7 @@
}*/
}
+/* Apply the (quantised) gain to each "pitch band" */
void pitch_quant_bank(float *X, float *P, float *gains)
{
int i;
@@ -528,7 +539,8 @@
P[i] = 0;
}
-
+/* Scales the pulse-codebook entry in each band such that unit-energy is conserved when
+ adding the pitch */
void pitch_renormalise_bank(float *X, float *P)
{
int i;
@@ -597,7 +609,7 @@
}
-
+/* Main CEFT encoder function */
void ceft_encode(CEFTState *st, float *in, float *out, float *pitch, float *window)
{
//float bark[BARK_BANDS];
@@ -611,18 +623,23 @@
float gains[PBANDS];
float mask[NBANDS];
static float obank[NBANDS+1];
+
+ /* FFT of windowed input signal */
spx_fft_float(st->frame_fft, in, X);
- /* Bands for the input signal */
+ /* Compute energy for each bands in the input signal */
compute_bank(X, bank);
+ /* Apply a window and FFT to the (already-delayed) pitch signal */
for (i=0;i<st->length;i++)
p[i] = pitch[i]*window[i];
spx_fft_float(st->frame_fft, p, Xp);
- /* Bands for the pitch signal */
+ /* Compute energy for each bands in the pitch signal */
compute_bank(Xp, pitch_bank);
+
+ /* Enable to dump data for training purposes */
#if 0
if (rand()%10 ==0 && fabs(X[0]) > 2 && (fabs(X[0]) > 10 || rand() % 5 == 0))
{
@@ -673,14 +690,16 @@
//printf ("%f\n", Sxw/Sw);
#endif
-
+ /* Normalise each band to have unit energy */
normalise_bank(X, bank);
+ /* All the following is for quantisation of the energy in each band */
float in_bank[NBANDS];
float qbank[NBANDS];
static float last_err[NBANDS];
static float last_bank[NBANDS];
+ /* Apply energy predictor */
for (i=0;i<NBANDS;i++)
{
in_bank[i] = 20*log10(bank[i]+1) + .0*last_err[i+1] - .9*last_bank[i];
@@ -688,6 +707,7 @@
for (i=0;i<NBANDS;i++)
qbank[i] = in_bank[i];
+ /* Vecctor-quantise the prediction error */
quantise_bands(in_bank, qbank, NBANDS);
#if 0
@@ -721,6 +741,7 @@
for (i=0;i<NBANDS;i++)
last_err[i] = qbank[i]-in_bank[i];
+ /* Undo the prediction */
for (i=0;i<NBANDS;i++)
{
qbank[i] += .9*last_bank[i];
@@ -733,6 +754,7 @@
for (i=0;i<NBANDS;i++)
last_bank[i] = qbank[i];
+ /* This is for quantisation of the DC. That is done independently from everything else */
{
float sign;
int id;
@@ -754,6 +776,7 @@
//printf ("%f\n", X[0]);
}
+ /* Normalise the pitch signal to have unit energy. */
normalise_bank(Xp, pitch_bank);
/*
@@ -764,19 +787,22 @@
printf ("\n");
*/
+ /* Apply spreading on input signal and pitch spectrum */
random_rotation(X, 10, -1);
random_rotation(Xp, 10, -1);
compute_pitch_gain(X, Xp, gains, bank);
quantise_pitch(gains, PBANDS);
pitch_quant_bank(X, Xp, gains);
-
+
+ /* Subtract the pitch prediction from the signal to encode */
for (i=1;i<st->length;i++)
X[i] -= Xp[i];
//Quantise input
quant_bank(X, Xp, centre);
+ /* Undo the pulse spreading */
random_rotation(X, 10, 1);
//pitch_renormalise_bank(X, Xp);
More information about the commits
mailing list