[Vorbis-dev] [RFC PATCH v1 1/3] lib/simd: Introduce vectorization framework for libvorbis
Viswanath Puttagunta
viswanath.puttagunta at linaro.org
Wed Sep 10 12:15:07 PDT 2014
Many CPUs have vectorization support that can greatly
increase the performance of certain signal processing functions.
Introduces generic vectorization framework into libvorbis
and some ARM NEON implementations.
Signed-off-by: Viswanath Puttagunta <viswanath.puttagunta at linaro.org>
---
configure.ac | 5 +++++
lib/Makefile.am | 6 +++---
lib/simd/Makefile.am | 9 +++++++++
lib/simd/neon_simd.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
lib/simd/no_simd.c | 28 ++++++++++++++++++++++++++++
lib/simd/simd.h | 29 +++++++++++++++++++++++++++++
6 files changed, 121 insertions(+), 3 deletions(-)
create mode 100644 lib/simd/Makefile.am
create mode 100644 lib/simd/neon_simd.c
create mode 100644 lib/simd/no_simd.c
create mode 100644 lib/simd/simd.h
diff --git a/configure.ac b/configure.ac
index ee2db99..28b4aaa 100644
--- a/configure.ac
+++ b/configure.ac
@@ -93,6 +93,10 @@ AC_ARG_ENABLE(examples,
AM_CONDITIONAL(BUILD_EXAMPLES, [test "x$enable_examples" = xyes])
+AC_ARG_ENABLE([arm-neon],
+ [enables arm_neon],[arm_neon=${enableval}],[arm_neon=no])
+AM_CONDITIONAL([ARM_NEON], [test x$arm_neon = xyes])
+
dnl --------------------------------------------------
dnl Set build flags based on environment
dnl --------------------------------------------------
@@ -275,6 +279,7 @@ AC_CONFIG_FILES([
Makefile
m4/Makefile
lib/Makefile
+lib/simd/Makefile
lib/modes/Makefile
lib/books/Makefile
lib/books/coupled/Makefile
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 50f7ea4..0727398 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -1,8 +1,8 @@
## Process this file with automake to produce Makefile.in
-SUBDIRS = modes books
+SUBDIRS = simd modes books
-INCLUDES = -I$(top_srcdir)/include @OGG_CFLAGS@
+INCLUDES = -I$(top_srcdir)/include @OGG_CFLAGS@ -I$(top_srcdir)/lib/simd
lib_LTLIBRARIES = libvorbis.la libvorbisfile.la libvorbisenc.la
@@ -16,7 +16,7 @@ libvorbis_la_SOURCES = mdct.c smallft.c block.c envelope.c window.c lsp.c \
registry.h scales.h window.h lookup.h lookup_data.h\
codec_internal.h backends.h bitrate.h
libvorbis_la_LDFLAGS = -no-undefined -version-info @V_LIB_CURRENT@:@V_LIB_REVISION@:@V_LIB_AGE@
-libvorbis_la_LIBADD = @VORBIS_LIBS@ @OGG_LIBS@
+libvorbis_la_LIBADD = @VORBIS_LIBS@ @OGG_LIBS@ ./simd/libsimd.la
libvorbisfile_la_SOURCES = vorbisfile.c
libvorbisfile_la_LDFLAGS = -no-undefined -version-info @VF_LIB_CURRENT@:@VF_LIB_REVISION@:@VF_LIB_AGE@
diff --git a/lib/simd/Makefile.am b/lib/simd/Makefile.am
new file mode 100644
index 0000000..7225431
--- /dev/null
+++ b/lib/simd/Makefile.am
@@ -0,0 +1,9 @@
+INCLUDES = -I$(top_srcdir)/include -I$(top_srcdir)/lib @OGG_CFLAGS@
+noinst_LTLIBRARIES = libsimd.la
+
+if ARM_NEON
+libsimd_la_CPPFLAGS = -mfpu=neon-vfpv4 -O3
+libsimd_la_SOURCES = simd.h neon_simd.c
+else
+libsimd_la_SOURCES = simd.h no_simd.c
+endif
diff --git a/lib/simd/neon_simd.c b/lib/simd/neon_simd.c
new file mode 100644
index 0000000..381d704
--- /dev/null
+++ b/lib/simd/neon_simd.c
@@ -0,0 +1,47 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 *
+ * by the Xiph.Org Foundation http://www.xiph.org/ *
+ * *
+ ********************************************************************/
+
+/* Optimized functions using ARM NEON */
+
+#include <stdint.h>
+#include <arm_neon.h>
+#include "simd.h"
+
+/* wave_operation: Implements pseudo-code
+ * for(i = 0; i < n; i++)
+ * a[i] = a[i]*b[n-i-1] + a[i]*c[i]
+ * Assumes n is multiple of 4
+ */
+void wave_operation(float *a, float *b, float *c, int32_t n) {
+ float32x4_t result, tmpa, tmpb, tmpc, tmpd;
+ float *ai, *bi, *ci, *di;
+ float32x2_t vec64l, vec64h;
+
+ for (ai = a, bi = b+n-4, ci=c, di=b; \
+ ai < (a+n); ai += 4, bi-=4, ci+=4, di+=4) {
+ tmpa = vld1q_f32(ai);
+ tmpb = vld1q_f32(bi);
+ __builtin_prefetch(ai);
+ vec64l = vget_low_f32(tmpb);
+ vec64h = vget_high_f32(tmpb);
+ tmpb = vcombine_f32(vec64h, vec64l);
+ tmpb = vrev64q_f32(tmpb);
+ tmpc = vld1q_f32(ci);
+ tmpd = vld1q_f32(di);
+ __builtin_prefetch(ci);
+
+ result = vmulq_f32(tmpa, tmpb);
+ result = vmlaq_f32(result, tmpc, tmpd);
+
+ vst1q_f32(ai, result);
+ }
+}
diff --git a/lib/simd/no_simd.c b/lib/simd/no_simd.c
new file mode 100644
index 0000000..e8efacb
--- /dev/null
+++ b/lib/simd/no_simd.c
@@ -0,0 +1,28 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 *
+ * by the Xiph.Org Foundation http://www.xiph.org/ *
+ * *
+ ********************************************************************/
+
+/* Implementation when CPU Vectorization is not available */
+
+#include <stdint.h>
+#include "simd.h"
+#include "os.h"
+
+/* wave_operation: Implements pseudo-code
+ * for(i = 0; i < n; i++)
+ * a[i] = a[i]*b[n-i-1] + a[i]*c[i]
+ */
+void wave_operation(float *a, float *b, float *c, int32_t n) {
+ int32_t i;
+
+ for (i = 0; i < n; i++)
+ a[i] = a[i]*b[n-i-1] + b[i]*c[i];
+}
diff --git a/lib/simd/simd.h b/lib/simd/simd.h
new file mode 100644
index 0000000..8565434
--- /dev/null
+++ b/lib/simd/simd.h
@@ -0,0 +1,29 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 *
+ * by the Xiph.Org Foundation http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function: Declarations for functions that can be optimized with
+ CPU vectorization if available
+ ********************************************************************/
+
+#ifndef SIMD_H
+#define SIMD_H
+
+#include "os.h"
+
+/* wave_operation: Must implement pseudo-code
+ * for(i = 0; i < n; i++)
+ * a[i] = a[i]*b[n-i-1] + b[i]*c[i]
+ * n must be multiple of 4
+ */
+void wave_operation(float *a, float *b, float *c, int32_t n);
+
+#endif
--
1.7.9.5
More information about the Vorbis-dev
mailing list