[xiph-cvs] cvs commit: speex/doc manual.lyx sampledec.c sampleenc.c

Wed Feb 26 11:51:18 PST 2003

jm          03/02/26 14:51:18

  Modified:    doc      manual.lyx sampledec.c sampleenc.c
  Log:
  comments in samples

Revision  Changes    Path
1.53      +65 -44    speex/doc/manual.lyx

Index: manual.lyx
===================================================================
RCS file: /usr/local/cvsroot/speex/doc/manual.lyx,v
retrieving revision 1.52
retrieving revision 1.53
diff -u -r1.52 -r1.53

--- manual.lyx	26 Feb 2003 18:15:15 -0000	1.52
+++ manual.lyx	26 Feb 2003 19:51:18 -0000	1.53
@@ -1,5 +1,5 @@
-#LyX 1.2 created this file. For more info see http://www.lyx.org/
-\lyxformat 220
+#LyX 1.3 created this file. For more info see http://www.lyx.org/
+\lyxformat 221
 \textclass article
 \language english
 \inputencoding auto
@@ -891,42 +891,42 @@
 \layout Description
 
 SPEEX_SET_MODE*
-\begin_inset Formula $\dagger $
+\begin_inset Formula $\dagger$
 \end_inset 
 
 
 \layout Description
 
 SPEEX_GET_MODE*
-\begin_inset Formula $\dagger $
+\begin_inset Formula $\dagger$
 \end_inset 
 
 
 \layout Description
 
 SPEEX_SET_LOW_MODE*
-\begin_inset Formula $\dagger $
+\begin_inset Formula $\dagger$
 \end_inset 
 
 
 \layout Description
 
 SPEEX_GET_LOW_MODE*
-\begin_inset Formula $\dagger $
+\begin_inset Formula $\dagger$
 \end_inset 
 
 
 \layout Description
 
 SPEEX_SET_HIGH_MODE*
-\begin_inset Formula $\dagger $
+\begin_inset Formula $\dagger$
 \end_inset 
 
 
 \layout Description
 
 SPEEX_GET_HIGH_MODE*
-\begin_inset Formula $\dagger $
+\begin_inset Formula $\dagger$
 \end_inset 
 
 
@@ -1015,7 +1015,7 @@
 \layout Description
 
 
-\begin_inset Formula $\dagger $
+\begin_inset Formula $\dagger$
 \end_inset 
 
  normally only used internally
@@ -2287,7 +2287,7 @@
 
 
 \begin_inset Formula \[
-y[n]=\sum _{i=1}^{N}a_{i}x[n-i]\]
+y[n]=\sum_{i=1}^{N}a_{i}x[n-i]\]
 
 \end_inset 
 
@@ -2302,7 +2302,7 @@
 .
  The prediction error is thus given by:
 \begin_inset Formula \[
-e[n]=x[n]-y[n]=x[n]-\sum _{i=1}^{N}a_{i}x[n-i]\]
+e[n]=x[n]-y[n]=x[n]-\sum_{i=1}^{N}a_{i}x[n-i]\]
 
 \end_inset 
 
@@ -2316,7 +2316,7 @@
 
  which minimize the quadratic error function:
 \begin_inset Formula \[
-E=\sum _{n=0}^{L-1}\left[e[n]\right]^{2}=\sum _{n=0}^{L-1}\left[x[n]-\sum _{i=1}^{N}a_{i}x[n-i]\right]^{2}\]
+E=\sum_{n=0}^{L-1}\left[e[n]\right]^{2}=\sum_{n=0}^{L-1}\left[x[n]-\sum_{i=1}^{N}a_{i}x[n-i]\right]^{2}\]
 
 \end_inset 
 
@@ -2326,7 +2326,7 @@
 
  equal to zero:
 \begin_inset Formula \[
-\frac{\partial E}{\partial a_{i}}=\frac{\partial }{\partial a_{i}}\sum _{n=0}^{L-1}\left[x[n]-\sum _{i=1}^{N}a_{i}x[n-i]\right]^{2}=0\]
+\frac{\partial E}{\partial a_{i}}=\frac{\partial}{\partial a_{i}}\sum_{n=0}^{L-1}\left[x[n]-\sum_{i=1}^{N}a_{i}x[n-i]\right]^{2}=0\]
 
 \end_inset 
 
@@ -2360,7 +2360,7 @@
 
 
 \begin_inset Formula \[
-R(m)=\sum _{i=0}^{N-1}x[i]x[i-m]\]
+R(m)=\sum_{i=0}^{N-1}x[i]x[i-m]\]
 
 \end_inset 
 
@@ -2374,22 +2374,20 @@
  filter, we have:
 \begin_inset Formula \[
 \mathbf{R}=\left[\begin{array}{cccc}
- R(0) & R(1) & \cdots  & R(N-1)\\
- R(1) & R(0) & \cdots  & R(N-2)\\
- \vdots  & \vdots  & \ddots  & \vdots \\
- R(N-1) & R(N-2) & \cdots  & R(0)\end{array}
-\right]\]
+R(0) & R(1) & \cdots & R(N-1)\\
+R(1) & R(0) & \cdots & R(N-2)\\
+\vdots & \vdots & \ddots & \vdots\\
+R(N-1) & R(N-2) & \cdots & R(0)\end{array}\right]\]
 
 \end_inset 
 
 
 \begin_inset Formula \[
 \mathbf{r}=\left[\begin{array}{c}
- R(1)\\
- R(2)\\
- \vdots \\
- R(N)\end{array}
-\right]\]
+R(1)\\
+R(2)\\
+\vdots\\
+R(N)\end{array}\right]\]
 
 \end_inset 
 
@@ -2443,7 +2441,7 @@
 The linear prediction model represents each speech sample as linear combination
  of past samples, plus an error signal called the excitation (or residual).
 \begin_inset Formula \[
-x[n]=\sum _{i=1}^{N}a_{i}x[n-i]+e[n]\]
+x[n]=\sum_{i=1}^{N}a_{i}x[n-i]+e[n]\]
 
 \end_inset 
 
@@ -2475,7 +2473,7 @@
 
 
 \begin_inset Formula \[
-A(z)=1-\sum _{i=1}^{N}a_{i}z^{-i}\]
+A(z)=1-\sum_{i=1}^{N}a_{i}z^{-i}\]
 
 \end_inset 
 
@@ -2549,7 +2547,7 @@
 \end_inset 
 
  is the pitch period, 
-\begin_inset Formula $\beta $
+\begin_inset Formula $\beta$
 \end_inset 
 
  is the pitch gain and 
@@ -2648,7 +2646,7 @@
 .
  That's why instead of minimizing the simple quadratic error
 \begin_inset Formula \[
-E=\sum _{n}\left(x[n]-\overline{x}[n]\right)^{2}\]
+E=\sum_{n}\left(x[n]-\overline{x}[n]\right)^{2}\]
 
 \end_inset 
 
@@ -2672,7 +2670,7 @@
 
 
 \begin_inset Formula \begin{equation}
-W(z)=\frac{A\left(\frac{z}{\gamma _{1}}\right)}{A\left(\frac{z}{\gamma _{2}}\right)}\label{eq:weighting_filter}\end{equation}
+W(z)=\frac{A\left(\frac{z}{\gamma_{1}}\right)}{A\left(\frac{z}{\gamma_{2}}\right)}\label{eq:weighting_filter}\end{equation}
 
 \end_inset 
 
@@ -2680,14 +2678,14 @@
 \layout Standard
 
 with control parameters 
-\begin_inset Formula $\gamma _{1}>\gamma _{2}$
+\begin_inset Formula $\gamma_{1}>\gamma_{2}$
 \end_inset 
 
 .
  If the noise is white in the perceptually weighted domain, then in the
  signal domain its spectral shape will be of the form
 \begin_inset Formula \[
-A_{noise}(z)=\frac{1}{W(z)}=\frac{A\left(\frac{z}{\gamma _{2}}\right)}{A\left(\frac{z}{\gamma _{1}}\right)}\]
+A_{noise}(z)=\frac{1}{W(z)}=\frac{A\left(\frac{z}{\gamma_{2}}\right)}{A\left(\frac{z}{\gamma_{1}}\right)}\]
 
 \end_inset 
 
@@ -2707,7 +2705,7 @@
 \end_inset 
 
 -plane, the filter 
-\begin_inset Formula $A(z/\gamma )$
+\begin_inset Formula $A(z/\gamma)$
 \end_inset 
 
  filter will have its poles at 
@@ -2735,12 +2733,12 @@
 \layout Standard
 
 This section looks at how Speex works for narrowband (
-\begin_inset Formula $8\: \mathrm{kHz}$
+\begin_inset Formula $8\:\mathrm{kHz}$
 \end_inset 
 
  sampling rate) operation.
  The frame size for this mode is 
-\begin_inset Formula $20\: \mathrm{ms}$
+\begin_inset Formula $20\:\mathrm{ms}$
 \end_inset 
 
 , corresponding to 160 samples.
@@ -2808,11 +2806,11 @@
 \end_inset 
 
  with 
-\begin_inset Formula $\gamma _{1}=0.9$
+\begin_inset Formula $\gamma_{1}=0.9$
 \end_inset 
 
  and 
-\begin_inset Formula $\gamma _{2}=0.6$
+\begin_inset Formula $\gamma_{2}=0.6$
 \end_inset 
 
 .
@@ -2838,7 +2836,7 @@
 
  is obtained by the past of the excitation by:
 \begin_inset Formula \[
-p[n]=\beta _{0}e[n-T-1]+\beta _{1}e[n-T]+\beta _{2}e[n-T+1]\]
+p[n]=\beta_{0}e[n-T-1]+\beta_{1}e[n-T]+\beta_{2}e[n-T+1]\]
 
 \end_inset 
 
@@ -2850,7 +2848,7 @@
 \end_inset 
 
  is the pitch period and the 
-\begin_inset Formula $\beta _{i}$
+\begin_inset Formula $\beta_{i}$
 \end_inset 
 
  are the prediction (filter) taps.
@@ -2875,7 +2873,7 @@
 \end_inset 
 
  range and the 
-\begin_inset Formula $\beta _{i}$
+\begin_inset Formula $\beta_{i}$
 \end_inset 
 
  coefficients are vector-quantized using 7 bits (15 kbps narrowband and
@@ -5458,18 +5456,18 @@
 
 One of the cause could be scaling of the input speech.
  Speex expects signals to have a 
-\begin_inset Formula $\pm 2^{15}$
+\begin_inset Formula $\pm2^{15}$
 \end_inset 
 
  (signed short) dynamic range.
  If the dynamic range of your signals is too small (e.g.
  
-\begin_inset Formula $\pm 1.0$
+\begin_inset Formula $\pm1.0$
 \end_inset 
 
 ), you will suffer important quantization noise.
  A good target is to have a dynamic range around 
-\begin_inset Formula $\pm 8000$
+\begin_inset Formula $\pm8000$
 \end_inset 
 
  which is large enough, but small enough to make sure there's no clipping
@@ -5486,7 +5484,7 @@
  for more than one audio stream (channel), which produces strange effects
  with the filter memories.
  If the input speech has an amplitude close to 
-\begin_inset Formula $\pm 2^{15}$
+\begin_inset Formula $\pm2^{15}$
 \end_inset 
 
 , it is possible that at decoding, the amplitude be a bit higher than that,
@@ -5575,13 +5573,29 @@
 
 This section shows sample code for encoding and decoding speech using the
  Speex API.
+ The commands can be used to encode and decode a file by calling:
+\family typewriter 
+
+\newline 
+% sampleenc in_file.sw | sampledec out_file.sw
+\family default 
+
+\newline 
+where both files are raw (no header) files encoded at 16 bits per sample
+ (in the machine natural endianness).
 \layout Subsection
 
 sampleenc.c
 \layout Standard
 
+sampleenc takes a raw 16 bits/sample file, encodes it and outputs a Speex
+ stream to stdout.
+ Note that the packing used is NOT compatible with that of speexenc/speexdec.
+\layout Standard
+
 
 \begin_inset Include \verbatiminput{sampleenc.c}
+preview false
 
 \end_inset 
 
@@ -5591,8 +5605,14 @@
 sampledec.c
 \layout Standard
 
+sampledec reads a Speex stream from stdin, decodes it and outputs it to
+ a raw 16 bits/sample file.
+ Note that the packing used is NOT compatible with that of speexenc/speexdec.
+\layout Standard
+
 
 \begin_inset Include \verbatiminput{sampledec.c}
+preview false
 
 \end_inset 
 
@@ -5608,7 +5628,8 @@
 \layout Standard
 
 
-\begin_inset Include \verbatiminput{draft-herlein-speex-rtp-profile-07.txt}
+\begin_inset Include \verbatiminput{draft-herlein-speex-rtp-profile-00.txt}
+preview false
 
 \end_inset 
 

<p><p>1.2       +18 -3     speex/doc/sampledec.c

Index: sampledec.c
===================================================================
RCS file: /usr/local/cvsroot/speex/doc/sampledec.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- sampledec.c	17 Feb 2003 05:02:03 -0000	1.1
+++ sampledec.c	26 Feb 2003 19:51:18 -0000	1.2
@@ -1,49 +1,64 @@
 #include <speex.h>
 #include <stdio.h>
-#include <stdlib.h>
 
+/*The frame size in hardcoded for this sample code but it doesn't have to be*/
 #define FRAME_SIZE 160
 int main(int argc, char **argv)
 {
    char *outFile;
    FILE *fout;
+   /*Holds the audio that will be written to file (16 bits per sample)*/
    short out[FRAME_SIZE];
+   /*Speex handle samples as float, so we need an array of floats*/
    float output[FRAME_SIZE];
    char cbits[200];
    int nbBytes;
+   /*Holds the state of the decoder*/
    void *state;
+   /*Holds bits so they can be read and written to by the Speex routines*/
    SpeexBits bits;
    int i, tmp;
 
-
+   /*Create a new decoder state in narrowband mode*/
    state = speex_decoder_init(&speex_nb_mode);
 
+   /*Set the perceptual enhancement on*/
    tmp=1;
    speex_decoder_ctl(state, SPEEX_SET_ENH, &tmp);
 
    outFile = argv[1];
    fout = fopen(outFile, "w");
 
+   /*Initialization of the structure that holds the bits*/
    speex_bits_init(&bits);
    while (1)
    {
+      /*Read the size encoded by sampleenc, this part will likely be 
+        different in your application*/
       fread(&nbBytes, sizeof(int), 1, stdin);
       fprintf (stderr, "nbBytes: %d\n", nbBytes);
       if (feof(stdin))
          break;
-
+      
+      /*Read the "packet" encoded by sampleenc*/
       fread(cbits, 1, nbBytes, stdin);
+      /*Copy the data into the bit-stream struct*/
       speex_bits_read_from(&bits, cbits, nbBytes);
 
+      /*Decode the data*/
       speex_decode(state, &bits, output);
 
+      /*Copy from float to short (16 bits) for output*/
       for (i=0;i<FRAME_SIZE;i++)
          out[i]=output[i];
 
+      /*Write the decoded audio to file*/
       fwrite(out, sizeof(short), FRAME_SIZE, fout);
    }
    
+   /*Destroy the decoder state*/
    speex_encoder_destroy(state);
+   /*Destroy the bit-stream truct*/
    speex_bits_destroy(&bits);
    fclose(fout);
    return 0;

<p><p>1.2       +17 -3     speex/doc/sampleenc.c

Index: sampleenc.c
===================================================================
RCS file: /usr/local/cvsroot/speex/doc/sampleenc.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- sampleenc.c	17 Feb 2003 05:02:03 -0000	1.1
+++ sampleenc.c	26 Feb 2003 19:51:18 -0000	1.2
@@ -1,7 +1,7 @@
 #include <speex.h>
 #include <stdio.h>
-#include <stdlib.h>
 
+/*The frame size in hardcoded for this sample code but it doesn't have to be*/
 #define FRAME_SIZE 160
 int main(int argc, char **argv)
 {
@@ -11,39 +11,53 @@
    float input[FRAME_SIZE];
    char cbits[200];
    int nbBytes;
+   /*Holds the state of the encoder*/
    void *state;
+   /*Holds bits so they can be read and written to by the Speex routines*/
    SpeexBits bits;
    int i, tmp;
 
-
+   /*Create a new encoder state in narrowband mode*/
    state = speex_encoder_init(&speex_nb_mode);
 
+   /*Set the quality to 8 (15 kbps)*/
    tmp=8;
    speex_encoder_ctl(state, SPEEX_SET_QUALITY, &tmp);
 
    inFile = argv[1];
    fin = fopen(inFile, "r");
 
+   /*Initialization of the structure that holds the bits*/
    speex_bits_init(&bits);
    while (1)
    {
+      /*Read a 16 bits/sample audio frame*/
       fread(in, sizeof(short), FRAME_SIZE, fin);
       if (feof(fin))
          break;
+      /*Copy the 16 bits values to float so Speex can work on them*/
       for (i=0;i<FRAME_SIZE;i++)
          input[i]=in[i];
+
+      /*Flush all the bits in the struct so we can encode a new frame*/
       speex_bits_reset(&bits);
 
+      /*Encode the frame*/
       speex_encode(state, input, &bits);
+      /*Copy the bits to an array of char that can be written*/
       nbBytes = speex_bits_write(&bits, cbits, 200);
 
+      /*Write the size of the frame first. This is what sampledec expects but
+       it's likely to be different in your own application*/
       fwrite(&nbBytes, sizeof(int), 1, stdout);
+      /*Write the compressed data*/
       fwrite(cbits, 1, nbBytes, stdout);
-      speex_bits_rewind(&bits);
       
    }
    
+   /*Destroy the encoder state*/
    speex_encoder_destroy(state);
+   /*Destroy the bit-packing struct*/
    speex_bits_destroy(&bits);
    fclose(fin);
    return 0;

<p><p>--- >8 ----
List archives:  http://www.xiph.org/archives/
Ogg project homepage: http://www.xiph.org/ogg/
To unsubscribe from this list, send a message to 'cvs-request at xiph.org'
containing only the word 'unsubscribe' in the body.  No subject is needed.
Unsubscribe messages sent to the list will be ignored/filtered.