[Speex-dev] Speex (in ios) really poor quality (and robotic) sound

Morgan Segalis morgan at hsware.net
Sun Sep 30 10:45:19 PDT 2012


Hi everyone,

I'm trying to encode/decode with speex, when I do not, the audio is loud and clear, but when I encode/decode to test audio quality, I get a really poor audio quality and a robotic sound.

Here's my init audio method : 

    #define AUDIO_QUALITY 10
    - (void) initAudio {
        try {	
            //SPEEX CONFIG
            speex_bits_init(&bits_in);
            speex_bits_init(&bits_out);
            enc_state = speex_encoder_init(&speex_nb_mode);
            dec_state = speex_decoder_init(&speex_nb_mode);
            int quality = AUDIO_QUALITY;
            speex_encoder_ctl(enc_state, SPEEX_SET_QUALITY, &quality);
            int tmp=1;
            speex_decoder_ctl(dec_state, SPEEX_SET_ENH, &tmp);
    
            OSStatus status;
            
            XThrowIfError(AudioSessionInitialize(NULL, NULL, rioInterruptionListener, self), "couldn't initialize audio session");
            
            float aBufferLength = 0.02; // In seconds
            status = AudioSessionSetProperty(kAudioSessionProperty_PreferredHardwareIOBufferDuration,
                                             sizeof(aBufferLength), &aBufferLength);
            XThrowIfError(status, "");
            
            UInt32 audioCategory = kAudioSessionCategory_PlayAndRecord;
    	    XThrowIfError(AudioSessionSetProperty(kAudioSessionProperty_AudioCategory, sizeof(audioCategory), &audioCategory), "couldn't set audio category");
    	    XThrowIfError(AudioSessionAddPropertyListener(kAudioSessionProperty_AudioRouteChange, propListener, self), "couldn't set property listener");
            
            // Describe audio component
            AudioComponentDescription desc;
            desc.componentType = kAudioUnitType_Output;
            desc.componentSubType = kAudioUnitSubType_RemoteIO;
            desc.componentFlags = 0;
            desc.componentFlagsMask = 0;
            desc.componentManufacturer = kAudioUnitManufacturer_Apple;
            
            // Get component
            AudioComponent inputComponent = AudioComponentFindNext(NULL, &desc);
            
            // Get audio units
            status = AudioComponentInstanceNew(inputComponent, &rioUnit);
            XThrowIfError(status, "1");
        
            // Enable IO for recording
            UInt32 flag = 1;
            status = AudioUnitSetProperty(rioUnit, 
                                          kAudioOutputUnitProperty_EnableIO, 
                                          kAudioUnitScope_Input, 
                                          kInputBus,
                                          &flag, 
                                          sizeof(flag));
            XThrowIfError(status, "2");
            
            // Enable IO for playback
            status = AudioUnitSetProperty(rioUnit, 
                                          kAudioOutputUnitProperty_EnableIO, 
                                          kAudioUnitScope_Output, 
                                          kOutputBus,
                                          &flag, 
                                          sizeof(flag));
            XThrowIfError(status, "3");
            
            // Describe format
            AudioStreamBasicDescription audioFormat;
            audioFormat.mSampleRate         = 8000.00;
            audioFormat.mFormatID           = kAudioFormatLinearPCM;
            audioFormat.mFormatFlags        =   kAudioFormatFlagIsSignedInteger |
                                                kAudioFormatFlagsNativeEndian |
                                                kAudioFormatFlagIsPacked;
            audioFormat.mFramesPerPacket    = 1;
            audioFormat.mChannelsPerFrame   = 1;
            audioFormat.mBitsPerChannel     = 16;
            audioFormat.mBytesPerPacket     = 2;
            audioFormat.mBytesPerFrame      = 2;
            
            // Apply format
            status = AudioUnitSetProperty(rioUnit, 
                                          kAudioUnitProperty_StreamFormat, 
                                          kAudioUnitScope_Output, 
                                          kInputBus, 
                                          &audioFormat, 
                                          sizeof(audioFormat));
            XThrowIfError(status, "");
            
            status = AudioUnitSetProperty(rioUnit, 
                                          kAudioUnitProperty_StreamFormat, 
                                          kAudioUnitScope_Input, 
                                          kOutputBus, 
                                          &audioFormat, 
                                          sizeof(audioFormat));
            XThrowIfError(status, "");
            
            // Set input callback
            AURenderCallbackStruct callbackStruct;
            callbackStruct.inputProc = recordingCallback;
            callbackStruct.inputProcRefCon = self;
            status = AudioUnitSetProperty(rioUnit, 
                                          kAudioOutputUnitProperty_SetInputCallback, 
                                          kAudioUnitScope_Global, 
                                          kInputBus, 
                                          &callbackStruct, 
                                          sizeof(callbackStruct));
            XThrowIfError(status, "");
            
            // Set output callback
            callbackStruct.inputProc = playingCallback;
            callbackStruct.inputProcRefCon = self;
            status = AudioUnitSetProperty(rioUnit, 
                                          kAudioUnitProperty_SetRenderCallback, 
                                          kAudioUnitScope_Global, 
                                          kOutputBus,
                                          &callbackStruct, 
                                          sizeof(callbackStruct));
            XThrowIfError(status, "");
            
            // Disable buffer allocation for the recorder (optional - do this if we want to pass in our own)
            flag = 0;
            status = AudioUnitSetProperty(rioUnit, 
                                          kAudioUnitProperty_ShouldAllocateBuffer,
                                          kAudioUnitScope_Output, 
                                          kInputBus,
                                          &flag, 
                                          sizeof(flag));
            
            // Allocate our own buffers (1 channel, 16 bits per sample, thus 16 bits per frame, thus 2 bytes per frame).
            // Practice learns the buffers used contain 512 frames, if this changes it will be fixed in processAudio.
            tempBuffer.mNumberChannels = 1;
            tempBuffer.mDataByteSize = FRAME_SIZE * 2;
            tempBuffer.mData = malloc( FRAME_SIZE * 2 );
            
            XThrowIfError(AudioSessionSetActive(true), "couldn't set audio session active\n");
            
            // Initialise
            status = AudioUnitInitialize(rioUnit);
            XThrowIfError(status, "");
            
            status = AudioOutputUnitStart(rioUnit);
            XThrowIfError(status, "");
        }
    	catch (CAXException &e) {
            NSLog(@"CAXException...");
    	}
    	catch (...) {
    		fprintf(stderr, "An unknown error occurred\n");
    	}
    }

my speex encode & decode function :


    #define FRAME_SIZE 160
    #define COMP_FRAME_SIZE 62
    char* encodeSpeex(spx_int16_t *buffer, UInt32 inSize, int *encodedSize) {
        char *outputBuffer = (char *)malloc(COMP_FRAME_SIZE);
    
        speex_bits_reset(&bits_in);
        speex_encode_int(enc_state, buffer, &bits_in);
        *encodedSize = speex_bits_write(&bits_in, outputBuffer, FRAME_SIZE * 2);
        return outputBuffer;
    }
    
    short* decodeSpeex(char* buffer, int encodedSize, int decodedSize) {
        short *outTemp = (short *)calloc(1, FRAME_SIZE * 2);
        speex_bits_read_from(&bits_out, buffer, encodedSize * FRAME_SIZE * *2);
        speex_decode_int(dec_state, &bits_out, outTemp);
        return outTemp;
    }

And at last, the function that call speex encode & decode and copy it to the buffer that will be playback by the specific callback : 

    - (void) processAudio: (AudioBufferList*) bufferList
    {
    	AudioBuffer sourceBuffer = bufferList->mBuffers[0];
        
        NSLog(@"Origin size: %lu", sourceBuffer.mDataByteSize);
        int size = 0;
        char *encodedAudio = encodeSpeex((spx_int16_t*) sourceBuffer.mData, sourceBuffer.mDataByteSize, &size);
        NSLog(@"Encoded size: %i", size);
        short* decodedAudio = decodeSpeex(encodedAudio, size, sourceBuffer.mDataByteSize);
        free(encodedAudio);

        memcpy(tempBuffer.mData, decodedAudio, FRAME_SIZE * 2);
        free(decodedAudio);        
    }

Anyone would have any idea why I get so poor quality ? According to speex's sample on the website, it should not be rendered like that...

Thank you for your help,

Morgan





More information about the Speex-dev mailing list