Index: libspeex/resample.c =================================================================== --- libspeex/resample.c (revision 14810) +++ libspeex/resample.c (working copy) @@ -74,6 +74,7 @@ #include "os_support.h" #endif /* OUTSIDE_SPEEX */ +#include "stack_alloc.h" #include #ifndef M_PI @@ -86,10 +87,6 @@ #define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x)))) #endif -/*#define float double*/ -#define FILTER_SIZE 64 -#define OVERSAMPLE 8 - #define IMAX(a,b) ((a) > (b) ? (a) : (b)) #define IMIN(a,b) ((a) < (b) ? (a) : (b)) @@ -97,6 +94,20 @@ #define NULL 0 #endif +#ifdef _USE_SSE +#include "resample_sse.h" +#endif + +/* Numer of elements to allocate on the stack */ +#ifdef VAR_ARRAYS +#define FIXED_STACK_ALLOC 16384 +#else +#define FIXED_STACK_ALLOC 1024 +#endif + +/* If the stack-space won't be large enough due to downsampling at more than 48->8 at Q>7, how much extra space should we permanently allocate instead? */ +#define COMFORT_BUFFER 160 + typedef int (*resampler_basic_func)(SpeexResamplerState *, spx_uint32_t , const spx_word16_t *, spx_uint32_t *, spx_word16_t *, spx_uint32_t *); struct SpeexResamplerState_ { @@ -317,47 +328,47 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) { - int N = st->filt_len; + const int N = st->filt_len; int out_sample = 0; - spx_word16_t *mem; int last_sample = st->last_sample[channel_index]; spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; - mem = st->mem + channel_index * st->mem_alloc_size; - while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) - { + const spx_word16_t *sinc_table = st->sinc_table; + const int out_stride = st->out_stride; + const int int_advance = st->int_advance; + const int frac_advance = st->frac_advance; + const spx_uint32_t den_rate = st->den_rate; + spx_word32_t sum; int j; - spx_word32_t sum=0; - /* We already have all the filter coefficients pre-computed in the table */ - const spx_word16_t *ptr; - /* Do the memory part */ - for (j=0;last_sample-N+1+j < 0;j++) + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) { - sum += MULT16_16(mem[last_sample+j],st->sinc_table[samp_frac_num*st->filt_len+j]); - } + const spx_word16_t *sinc = & sinc_table[samp_frac_num*N]; + const spx_word16_t *iptr = & in[last_sample]; - /* Do the new part */ - if (in != NULL) - { - ptr = in+st->in_stride*(last_sample-N+1+j); - for (;jsinc_table[samp_frac_num*st->filt_len+j]); - ptr += st->in_stride; +#ifndef OVERRIDE_INNER_PRODUCT_SINGLE + float accum[4] = {0,0,0,0}; + + for(j=0;jout_stride; - out_sample++; - last_sample += st->int_advance; - samp_frac_num += st->frac_advance; - if (samp_frac_num >= st->den_rate) + out[out_stride * out_sample++] = PSHR32(sum, 15); + last_sample += int_advance; + samp_frac_num += frac_advance; + if (samp_frac_num >= den_rate) { - samp_frac_num -= st->den_rate; + samp_frac_num -= den_rate; last_sample++; } } + st->last_sample[channel_index] = last_sample; st->samp_frac_num[channel_index] = samp_frac_num; return out_sample; @@ -368,47 +379,47 @@ /* This is the same as the previous function, except with a double-precision accumulator */ static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) { - int N = st->filt_len; + const int N = st->filt_len; int out_sample = 0; - spx_word16_t *mem; int last_sample = st->last_sample[channel_index]; spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; - mem = st->mem + channel_index * st->mem_alloc_size; - while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) - { + const spx_word16_t *sinc_table = st->sinc_table; + const int out_stride = st->out_stride; + const int int_advance = st->int_advance; + const int frac_advance = st->frac_advance; + const spx_uint32_t den_rate = st->den_rate; + double sum; int j; - double sum=0; - /* We already have all the filter coefficients pre-computed in the table */ - const spx_word16_t *ptr; - /* Do the memory part */ - for (j=0;last_sample-N+1+j < 0;j++) + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) { - sum += MULT16_16(mem[last_sample+j],(double)st->sinc_table[samp_frac_num*st->filt_len+j]); - } + const spx_word16_t *sinc = & sinc_table[samp_frac_num*N]; + const spx_word16_t *iptr = & in[last_sample]; - /* Do the new part */ - if (in != NULL) - { - ptr = in+st->in_stride*(last_sample-N+1+j); - for (;jsinc_table[samp_frac_num*st->filt_len+j]); - ptr += st->in_stride; +#ifndef OVERRIDE_INNER_PRODUCT_DOUBLE + double accum[4] = {0,0,0,0}; + + for(j=0;jout_stride; - out_sample++; - last_sample += st->int_advance; - samp_frac_num += st->frac_advance; - if (samp_frac_num >= st->den_rate) + out[out_stride * out_sample++] = PSHR32(sum, 15); + last_sample += int_advance; + samp_frac_num += frac_advance; + if (samp_frac_num >= den_rate) { - samp_frac_num -= st->den_rate; + samp_frac_num -= den_rate; last_sample++; } } + st->last_sample[channel_index] = last_sample; st->samp_frac_num[channel_index] = samp_frac_num; return out_sample; @@ -417,69 +428,61 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) { - int N = st->filt_len; + const int N = st->filt_len; int out_sample = 0; - spx_word16_t *mem; int last_sample = st->last_sample[channel_index]; spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; - mem = st->mem + channel_index * st->mem_alloc_size; + const spx_word16_t *sinc_table = st->sinc_table; + const int out_stride = st->out_stride; + const int int_advance = st->int_advance; + const int frac_advance = st->frac_advance; + const spx_uint32_t den_rate = st->den_rate; + int j; + spx_word16_t frac; + spx_word32_t sum; + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) { - int j; - spx_word32_t sum=0; + const spx_word16_t *sinc = & sinc_table[samp_frac_num*N]; + const spx_word16_t *iptr = & in[last_sample]; - /* We need to interpolate the sinc filter */ - spx_word32_t accum[4] = {0.f,0.f, 0.f, 0.f}; - spx_word16_t interp[4]; - const spx_word16_t *ptr; - int offset; - spx_word16_t frac; - offset = samp_frac_num*st->oversample/st->den_rate; + const int offset = samp_frac_num*st->oversample/st->den_rate; #ifdef FIXED_POINT - frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); + const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); #else - frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; + const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; #endif - /* This code is written like this to make it easy to optimise with SIMD. - For most DSPs, it would be best to split the loops in two because most DSPs - have only two accumulators */ - for (j=0;last_sample-N+1+j < 0;j++) - { - spx_word16_t curr_mem = mem[last_sample+j]; - accum[0] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset-2]); - accum[1] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset-1]); - accum[2] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset]); - accum[3] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset+1]); - } + spx_word16_t interp[4]; - if (in != NULL) - { - ptr = in+st->in_stride*(last_sample-N+1+j); - /* Do the new part */ - for (;jin_stride; + +#ifndef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE + spx_word32_t accum[4] = {0,0,0,0}; + + for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); } - } + cubic_coef(frac, interp); sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]); +#else + cubic_coef(frac, interp); + sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); +#endif - *out = PSHR32(sum,15); - out += st->out_stride; - out_sample++; - last_sample += st->int_advance; - samp_frac_num += st->frac_advance; - if (samp_frac_num >= st->den_rate) + out[out_stride * out_sample++] = PSHR32(sum,15); + last_sample += int_advance; + samp_frac_num += frac_advance; + if (samp_frac_num >= den_rate) { - samp_frac_num -= st->den_rate; + samp_frac_num -= den_rate; last_sample++; } } + st->last_sample[channel_index] = last_sample; st->samp_frac_num[channel_index] = samp_frac_num; return out_sample; @@ -490,63 +493,61 @@ /* This is the same as the previous function, except with a double-precision accumulator */ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) { - int N = st->filt_len; + const int N = st->filt_len; int out_sample = 0; - spx_word16_t *mem; int last_sample = st->last_sample[channel_index]; spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; - mem = st->mem + channel_index * st->mem_alloc_size; - while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) - { + const spx_word16_t *sinc_table = st->sinc_table; + const int out_stride = st->out_stride; + const int int_advance = st->int_advance; + const int frac_advance = st->frac_advance; + const spx_uint32_t den_rate = st->den_rate; int j; - spx_word32_t sum=0; + spx_word16_t frac; + spx_word32_t sum; - /* We need to interpolate the sinc filter */ - double accum[4] = {0.f,0.f, 0.f, 0.f}; - float interp[4]; - const spx_word16_t *ptr; - float alpha = ((float)samp_frac_num)/st->den_rate; - int offset = samp_frac_num*st->oversample/st->den_rate; - float frac = alpha*st->oversample - offset; - /* This code is written like this to make it easy to optimise with SIMD. - For most DSPs, it would be best to split the loops in two because most DSPs - have only two accumulators */ - for (j=0;last_sample-N+1+j < 0;j++) + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) { - double curr_mem = mem[last_sample+j]; - accum[0] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset-2]); - accum[1] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset-1]); - accum[2] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset]); - accum[3] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset+1]); - } - if (in != NULL) - { - ptr = in+st->in_stride*(last_sample-N+1+j); - /* Do the new part */ - for (;jin_stride; + const spx_word16_t *sinc = & sinc_table[samp_frac_num*N]; + const spx_word16_t *iptr = & in[last_sample]; + + const int offset = samp_frac_num*st->oversample/st->den_rate; +#ifdef FIXED_POINT + const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); +#else + const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; +#endif + spx_word16_t interp[4]; + + +#ifndef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE + double accum[4] = {0,0,0,0}; + + for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); } - } + cubic_coef(frac, interp); - sum = interp[0]*accum[0] + interp[1]*accum[1] + interp[2]*accum[2] + interp[3]*accum[3]; + sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]); +#else + cubic_coef(frac, interp); + sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); +#endif - *out = PSHR32(sum,15); - out += st->out_stride; - out_sample++; - last_sample += st->int_advance; - samp_frac_num += st->frac_advance; - if (samp_frac_num >= st->den_rate) + out[out_stride * out_sample++] = PSHR32(sum,15); + last_sample += int_advance; + samp_frac_num += frac_advance; + if (samp_frac_num >= den_rate) { - samp_frac_num -= st->den_rate; + samp_frac_num -= den_rate; last_sample++; } } + st->last_sample[channel_index] = last_sample; st->samp_frac_num[channel_index] = samp_frac_num; return out_sample; @@ -556,6 +557,7 @@ static void update_filter(SpeexResamplerState *st) { spx_uint32_t old_length; + spx_uint32_t comfort_length = 0; old_length = st->filt_len; st->oversample = quality_map[st->quality].oversample; @@ -584,6 +586,10 @@ st->cutoff = quality_map[st->quality].upsample_bandwidth; } + /* Will we run out of stackspace at runtime? */ + if ((st->filt_len + COMFORT_BUFFER) > FIXED_STACK_ALLOC) + comfort_length = COMFORT_BUFFER; + /* Choose the resampling type that requires the least amount of memory */ if (st->den_rate <= st->oversample) { @@ -643,18 +649,18 @@ if (!st->mem) { spx_uint32_t i; - st->mem = (spx_word16_t*)speex_alloc(st->nb_channels*(st->filt_len-1) * sizeof(spx_word16_t)); - for (i=0;inb_channels*(st->filt_len-1);i++) + st->mem_alloc_size = st->filt_len-1 + comfort_length; + st->mem = (spx_word16_t*)speex_alloc(st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t)); + for (i=0;inb_channels*st->mem_alloc_size;i++) st->mem[i] = 0; - st->mem_alloc_size = st->filt_len-1; /*speex_warning("init filter");*/ } else if (!st->started) { spx_uint32_t i; - st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*(st->filt_len-1) * sizeof(spx_word16_t)); - for (i=0;inb_channels*(st->filt_len-1);i++) + st->mem_alloc_size = st->filt_len-1 + comfort_length; + st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t)); + for (i=0;inb_channels*st->mem_alloc_size;i++) st->mem[i] = 0; - st->mem_alloc_size = st->filt_len-1; /*speex_warning("reinit filter");*/ } else if (st->filt_len > old_length) { @@ -662,10 +668,10 @@ /* Increase the filter length */ /*speex_warning("increase filter size");*/ int old_alloc_size = st->mem_alloc_size; - if (st->filt_len-1 > st->mem_alloc_size) + if ((st->filt_len-1 + comfort_length) > st->mem_alloc_size) { - st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*(st->filt_len-1) * sizeof(spx_word16_t)); - st->mem_alloc_size = st->filt_len-1; + st->mem_alloc_size = st->filt_len-1 + comfort_length; + st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t)); } for (i=st->nb_channels-1;i>=0;i--) { @@ -789,213 +795,204 @@ speex_free(st); } +/* + * Here 'in' should already be deinterleaved and have st->filt_len empty elements before the data. + */ - -static int speex_resampler_process_native(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +static int speex_resampler_process_native(SpeexResamplerState *st, spx_uint32_t channel_index, spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) { int j=0; - int N = st->filt_len; + const int N = st->filt_len; int out_sample = 0; - spx_word16_t *mem; - spx_uint32_t tmp_out_len = 0; - mem = st->mem + channel_index * st->mem_alloc_size; + spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size; st->started = 1; - /* Handle the case where we have samples left from a reduction in filter length */ - if (st->magic_samples[channel_index]) - { - int istride_save; - spx_uint32_t tmp_in_len; - spx_uint32_t tmp_magic; + /* Prepend the memory */ + if (in != mem) + for(j=0;jin_stride; - tmp_in_len = st->magic_samples[channel_index]; - tmp_out_len = *out_len; - /* magic_samples needs to be set to zero to avoid infinite recursion */ - tmp_magic = st->magic_samples[channel_index]; - st->magic_samples[channel_index] = 0; - st->in_stride = 1; - speex_resampler_process_native(st, channel_index, mem+N-1, &tmp_in_len, out, &tmp_out_len); - st->in_stride = istride_save; - /*speex_warning_int("extra samples:", tmp_out_len);*/ - /* If we couldn't process all "magic" input samples, save the rest for next time */ - if (tmp_in_len < tmp_magic) - { - spx_uint32_t i; - st->magic_samples[channel_index] = tmp_magic-tmp_in_len; - for (i=0;imagic_samples[channel_index];i++) - mem[N-1+i]=mem[N-1+i+tmp_in_len]; - } - out += tmp_out_len*st->out_stride; - *out_len -= tmp_out_len; - } - /* Call the right resampler through the function ptr */ out_sample = st->resampler_ptr(st, channel_index, in, in_len, out, out_len); if (st->last_sample[channel_index] < (spx_int32_t)*in_len) *in_len = st->last_sample[channel_index]; - *out_len = out_sample+tmp_out_len; + *out_len = out_sample; st->last_sample[channel_index] -= *in_len; - for (j=0;jin_stride*(j+*in_len-N+1)]; - } else { - for (;jmagic_samples[channel_index]; + spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size; + const int N = st->filt_len; -#ifdef FIXED_POINT -EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len) + speex_resampler_process_native(st, channel_index, mem, &tmp_in_len, *out, &out_len); + + st->magic_samples[channel_index] -= tmp_in_len; + + /* If we couldn't process all "magic" input samples, save the rest for next time */ + if (st->magic_samples[channel_index]) { spx_uint32_t i; - int istride_save, ostride_save; -#ifdef VAR_ARRAYS - spx_word16_t x[*in_len]; - spx_word16_t y[*out_len]; - /*VARDECL(spx_word16_t *x); - VARDECL(spx_word16_t *y); - ALLOC(x, *in_len, spx_word16_t); - ALLOC(y, *out_len, spx_word16_t);*/ - istride_save = st->in_stride; - ostride_save = st->out_stride; - if (in != NULL) - { - for (i=0;i<*in_len;i++) - x[i] = WORD2INT(in[i*st->in_stride]); - st->in_stride = st->out_stride = 1; - speex_resampler_process_native(st, channel_index, x, in_len, y, out_len); - } else { - st->in_stride = st->out_stride = 1; - speex_resampler_process_native(st, channel_index, NULL, in_len, y, out_len); + for (i=0;imagic_samples[channel_index];i++) + mem[N-1+i]=mem[N-1+i+tmp_in_len]; } - st->in_stride = istride_save; - st->out_stride = ostride_save; - for (i=0;i<*out_len;i++) - out[i*st->out_stride] = y[i]; -#else - spx_word16_t x[FIXED_STACK_ALLOC]; - spx_word16_t y[FIXED_STACK_ALLOC]; - spx_uint32_t ilen=*in_len, olen=*out_len; - istride_save = st->in_stride; - ostride_save = st->out_stride; - while (ilen && olen) - { - spx_uint32_t ichunk, ochunk; - ichunk = ilen; - ochunk = olen; - if (ichunk>FIXED_STACK_ALLOC) - ichunk=FIXED_STACK_ALLOC; - if (ochunk>FIXED_STACK_ALLOC) - ochunk=FIXED_STACK_ALLOC; - if (in != NULL) - { - for (i=0;iin_stride]); - st->in_stride = st->out_stride = 1; - speex_resampler_process_native(st, channel_index, x, &ichunk, y, &ochunk); + *out += out_len*st->out_stride; + return out_len; +} + +static int speex_resampler_process_native_buffer(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len, spx_word16_t *x, spx_uint32_t xlen) { + int j; + spx_uint32_t ilen = *in_len; + spx_uint32_t olen = *out_len; + + if (st->magic_samples[channel_index]) + olen -= speex_resampler_magic(st, channel_index, &out, olen); + if (! st->magic_samples[channel_index]) { + while (ilen && olen) { + spx_uint32_t ichunk = (ilen > (xlen - (st->filt_len - 1))) ? (xlen - (st->filt_len - 1)) : ilen; + spx_uint32_t ochunk = olen; + + if (in) { + for(j=0;jfilt_len-1]=in[j*st->in_stride]; } else { - st->in_stride = st->out_stride = 1; - speex_resampler_process_native(st, channel_index, NULL, &ichunk, y, &ochunk); + for(j=0;jfilt_len-1]=0; } - st->in_stride = istride_save; - st->out_stride = ostride_save; - for (i=0;iout_stride] = y[i]; - out += ochunk; - in += ichunk; + speex_resampler_process_native(st, channel_index, x, &ichunk, out, &ochunk); ilen -= ichunk; olen -= ochunk; + out += ochunk * st->out_stride; + if (in) + in += ichunk * st->in_stride; } + } *in_len -= ilen; *out_len -= olen; -#endif return RESAMPLER_ERR_SUCCESS; } + +#ifdef FIXED_POINT EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len) -{ - return speex_resampler_process_native(st, channel_index, in, in_len, out, out_len); -} #else EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len) +#endif { - return speex_resampler_process_native(st, channel_index, in, in_len, out, out_len); + if ((st->filt_len - 1 + COMFORT_BUFFER) > FIXED_STACK_ALLOC) { + return speex_resampler_process_native_buffer(st, channel_index, in, in_len, out, out_len, st->mem + channel_index * st->mem_alloc_size, st->mem_alloc_size); + } else { +#ifdef VAR_ARRAYS + const unsigned int totlen = st->filt_len + *in_len; + const unsigned int xlen = (totlen < FIXED_STACK_ALLOC) ? totlen : FIXED_STACK_ALLOC; + VARDECL(spx_word16_t *xstack); + ALLOC(xstack, xlen, spx_word16_t); +#else + const unsigned int xlen = FIXED_STACK_ALLOC; + spx_word16_t xstack[FIXED_STACK_ALLOC]; +#endif + return speex_resampler_process_native_buffer(st, channel_index, in, in_len, out, out_len, xstack, xlen); } -EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len) +} + +#ifdef FIXED_POINT +static int speex_resampler_process_nonnative_buffer(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len, spx_word16_t *x, spx_uint32_t xlen) +#else +static int speex_resampler_process_nonnative_buffer(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len, spx_word16_t *x, spx_uint32_t xlen) +#endif { - spx_uint32_t i; - int istride_save, ostride_save; + int j; + const int istride_save = st->in_stride; + const int ostride_save = st->out_stride; + spx_uint32_t ilen = *in_len; + spx_uint32_t olen = *out_len; #ifdef VAR_ARRAYS - spx_word16_t x[*in_len]; - spx_word16_t y[*out_len]; - /*VARDECL(spx_word16_t *x); - VARDECL(spx_word16_t *y); - ALLOC(x, *in_len, spx_word16_t); - ALLOC(y, *out_len, spx_word16_t);*/ - istride_save = st->in_stride; - ostride_save = st->out_stride; - if (in != NULL) - { - for (i=0;i<*in_len;i++) - x[i] = in[i*st->in_stride]; - st->in_stride = st->out_stride = 1; - speex_resampler_process_native(st, channel_index, x, in_len, y, out_len); - } else { - st->in_stride = st->out_stride = 1; - speex_resampler_process_native(st, channel_index, NULL, in_len, y, out_len); + const unsigned int ylen = (olen < FIXED_STACK_ALLOC) ? olen : FIXED_STACK_ALLOC; + VARDECL(spx_word16_t *ystack); + ALLOC(ystack, ylen, spx_word16_t); +#else + const unsigned int ylen = FIXED_STACK_ALLOC; + spx_word16_t ystack[FIXED_STACK_ALLOC]; +#endif + + st->out_stride = 1; + + while (ilen && olen) { + spx_word16_t *y = ystack; + spx_uint32_t ichunk = (ilen > (xlen - (st->filt_len - 1))) ? (xlen - (st->filt_len - 1)) : ilen; + spx_uint32_t ochunk = (olen > ylen) ? ylen : olen; + spx_uint32_t omagic = 0; + + if (st->magic_samples[channel_index]) { + omagic = speex_resampler_magic(st, channel_index, &y, ochunk); + ochunk -= omagic; + olen -= omagic; } - st->in_stride = istride_save; - st->out_stride = ostride_save; - for (i=0;i<*out_len;i++) - out[i*st->out_stride] = WORD2INT(y[i]); + if (! st->magic_samples[channel_index]) { + if (in) { + for(j=0;jfilt_len-1]=WORD2INT(in[j*istride_save]); #else - spx_word16_t x[FIXED_STACK_ALLOC]; - spx_word16_t y[FIXED_STACK_ALLOC]; - spx_uint32_t ilen=*in_len, olen=*out_len; - istride_save = st->in_stride; - ostride_save = st->out_stride; - while (ilen && olen) - { - spx_uint32_t ichunk, ochunk; - ichunk = ilen; - ochunk = olen; - if (ichunk>FIXED_STACK_ALLOC) - ichunk=FIXED_STACK_ALLOC; - if (ochunk>FIXED_STACK_ALLOC) - ochunk=FIXED_STACK_ALLOC; - if (in != NULL) - { - for (i=0;iin_stride]; - st->in_stride = st->out_stride = 1; + x[j+st->filt_len-1]=in[j*istride_save]; +#endif + } else { + for(j=0;jfilt_len-1]=0; + } + speex_resampler_process_native(st, channel_index, x, &ichunk, y, &ochunk); } else { - st->in_stride = st->out_stride = 1; - speex_resampler_process_native(st, channel_index, NULL, &ichunk, y, &ochunk); + ichunk = 0; + ochunk = 0; } - st->in_stride = istride_save; - st->out_stride = ostride_save; - for (i=0;iout_stride] = WORD2INT(y[i]); - out += ochunk; - in += ichunk; + + for (j=0;jout_stride = ostride_save; *in_len -= ilen; *out_len -= olen; -#endif + return RESAMPLER_ERR_SUCCESS; } + +#ifdef FIXED_POINT +EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len) +#else +EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len) #endif +{ + if ((st->filt_len - 1 + COMFORT_BUFFER) > FIXED_STACK_ALLOC) { + return speex_resampler_process_nonnative_buffer(st, channel_index, in, in_len, out, out_len, st->mem + channel_index * st->mem_alloc_size, st->mem_alloc_size); + } else { +#ifdef VAR_ARRAYS + const unsigned int totlen = st->filt_len + *in_len; + const unsigned int xlen = (totlen < FIXED_STACK_ALLOC) ? totlen : FIXED_STACK_ALLOC; + VARDECL(spx_word16_t *xstack); + ALLOC(xstack, xlen, spx_word16_t); +#else + const unsigned int xlen = FIXED_STACK_ALLOC; + spx_word16_t xstack[FIXED_STACK_ALLOC]; +#endif + return speex_resampler_process_nonnative_buffer(st, channel_index, in, in_len, out, out_len, xstack, xlen); + } +} EXPORT int speex_resampler_process_interleaved_float(SpeexResamplerState *st, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len) {