Out of curiosity, what compiler(s) got it wrong?<br><br>
<div class="gmail_quote">On Mon, Oct 26, 2009 at 9:41 AM, Thorvald Natvig <span dir="ltr"><<a href="mailto:thorvald@natvig.com">thorvald@natvig.com</a>></span> wrote:<br>
<blockquote style="BORDER-LEFT: #ccc 1px solid; MARGIN: 0px 0px 0px 0.8ex; PADDING-LEFT: 1ex" class="gmail_quote">From: Thorvald Natvig <<a href="mailto:slicer@users.sourceforge.net">slicer@users.sourceforge.net</a>><br>
<br>Some optimizing compilers miscompile the current SSE optimizations when<br>full optimizations are enabled. By using output value pointer instead of<br>a return value, we can bypass this misbehaviour.<br>---<br> libspeex/resample.c | 8 ++++----<br>
libspeex/resample_sse.h | 24 ++++++++----------------<br> 2 files changed, 12 insertions(+), 20 deletions(-)<br><br>diff --git a/libspeex/resample.c b/libspeex/resample.c<br>index 7b5a308..8131380 100644<br>--- a/libspeex/resample.c<br>
+++ b/libspeex/resample.c<br>@@ -361,7 +361,7 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c<br> sum = accum[0] + accum[1] + accum[2] + accum[3];<br> */<br> #else<br>- sum = inner_product_single(sinc, iptr, N);<br>
+ inner_product_single(&sum, sinc, iptr, N);<br> #endif<br><br> out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 15), 32767);<br>@@ -412,7 +412,7 @@ static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t c<br>
}<br> sum = accum[0] + accum[1] + accum[2] + accum[3];<br> #else<br>- sum = inner_product_double(sinc, iptr, N);<br>+ inner_product_double(&sum, sinc, iptr, N);<br> #endif<br><br> out[out_stride * out_sample++] = PSHR32(sum, 15);<br>
@@ -472,7 +472,7 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3<br> sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));<br>
#else<br> cubic_coef(frac, interp);<br>- sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);<br>+ interpolate_product_single(&sum, iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);<br>
#endif<br><br> out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 14), 32767);<br>@@ -534,7 +534,7 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3<br> sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);<br>
#else<br> cubic_coef(frac, interp);<br>- sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);<br>+ interpolate_product_double(&sum, iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);<br>
#endif<br><br> out[out_stride * out_sample++] = PSHR32(sum,15);<br>diff --git a/libspeex/resample_sse.h b/libspeex/resample_sse.h<br>index 64be8a1..86ff35e 100644<br>--- a/libspeex/resample_sse.h<br>+++ b/libspeex/resample_sse.h<br>
@@ -37,10 +37,9 @@<br> #include <xmmintrin.h><br><br> #define OVERRIDE_INNER_PRODUCT_SINGLE<br>-static inline float inner_product_single(const float *a, const float *b, unsigned int len)<br>+static inline void inner_product_single(float *ret, const float *a, const float *b, unsigned int len)<br>
{<br> int i;<br>- float ret;<br> __m128 sum = _mm_setzero_ps();<br> for (i=0;i<len;i+=8)<br> {<br>@@ -49,14 +48,12 @@ static inline float inner_product_single(const float *a, const float *b, unsigne<br> }<br>
sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));<br> sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));<br>- _mm_store_ss(&ret, sum);<br>- return ret;<br>+ _mm_store_ss(ret, sum);<br> }<br><br> #define OVERRIDE_INTERPOLATE_PRODUCT_SINGLE<br>
-static inline float interpolate_product_single(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {<br>+static inline void interpolate_product_single(float *ret, const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {<br>
int i;<br>- float ret;<br> __m128 sum = _mm_setzero_ps();<br> __m128 f = _mm_loadu_ps(frac);<br> for(i=0;i<len;i+=2)<br>@@ -67,18 +64,16 @@ static inline float interpolate_product_single(const float *a, const float *b, u<br>
sum = _mm_mul_ps(f, sum);<br> sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));<br> sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));<br>- _mm_store_ss(&ret, sum);<br>- return ret;<br>+ _mm_store_ss(ret, sum);<br>
}<br><br> #ifdef _USE_SSE2<br> #include <emmintrin.h><br> #define OVERRIDE_INNER_PRODUCT_DOUBLE<br><br>-static inline double inner_product_double(const float *a, const float *b, unsigned int len)<br>+static inline void inner_product_double(double *ret, const float *a, const float *b, unsigned int len)<br>
{<br> int i;<br>- double ret;<br> __m128d sum = _mm_setzero_pd();<br> __m128 t;<br> for (i=0;i<len;i+=8)<br>@@ -92,14 +87,12 @@ static inline double inner_product_double(const float *a, const float *b, unsign<br>
sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));<br> }<br> sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));<br>- _mm_store_sd(&ret, sum);<br>- return ret;<br>+ _mm_store_sd(ret, sum);<br> }<br>
<br> #define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE<br>-static inline double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {<br>+static inline void interpolate_product_double(double *ret, const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {<br>
int i;<br>- double ret;<br> __m128d sum;<br> __m128d sum1 = _mm_setzero_pd();<br> __m128d sum2 = _mm_setzero_pd();<br>@@ -121,8 +114,7 @@ static inline double interpolate_product_double(const float *a, const float *b,<br>
sum2 = _mm_mul_pd(f2, sum2);<br> sum = _mm_add_pd(sum1, sum2);<br> sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));<br>- _mm_store_sd(&ret, sum);<br>- return ret;<br>+ _mm_store_sd(ret, sum);<br> }<br><br> #endif<br>
<font color="#888888">--<br>1.6.4.msysgit.0.19.gd78f4<br><br>_______________________________________________<br>Speex-dev mailing list<br><a href="mailto:Speex-dev@xiph.org">Speex-dev@xiph.org</a><br><a href="http://lists.xiph.org/mailman/listinfo/speex-dev" target="_blank">http://lists.xiph.org/mailman/listinfo/speex-dev</a><br>
</font></blockquote></div><br>