[Flac-dev] Altivec, automake
Brady Patterson
brady at spaceship.com
Sun Jul 25 16:51:02 PDT 2004
Here's what I listed in that email. Merging doesn't appear to be necessary. If
you have any build problems, let me know.
Note that my detection code is Darwin-specific. It's a BSD call (sysctl()), so
a change to the platform-detection macros should enable it to work on other
BSDs. However, I don't know what that would be, and I couldn't determine any
safe way to do the check in Linux, nor do I have any way to test anything other
than OS X. The altivec code itself should work on any platform with altivec.
Regarding performance, command-line decoding will see a modest improvement, but
it's really bottlenecked by the MD5 checking (and I don't have a clue as to how
to optimize that). Real-time decoding should be improved substantially -- I've
been meaning to test that and will get back to you with the results.
--
Brady Patterson (brady at spaceship.com)
RLRR LRLL RLLR LRRL RRLR LLRL
On Sun, 25 Jul 2004, Josh Coalson wrote:
> PS, Brady, do you still have these patches? you don't have to
> make them current, just send them as-is, I can merge them.
-------------- next part --------------
Index: configure.in
===================================================================
RCS file: /cvsroot/flac/flac/configure.in,v
retrieving revision 1.82
diff -c -r1.82 configure.in
*** configure.in 19 May 2003 23:59:49 -0000 1.82
--- configure.in 25 Jul 2004 23:13:35 -0000
***************
*** 208,213 ****
--- 208,225 ----
AC_DEFINE(FLAC__USE_3DNOW)
fi
+ AC_ARG_ENABLE(altivec,
+ [ --disable-altivec Disable Altivec optimizations],
+ [case "${enableval}" in
+ yes) use_altivec=true ;;
+ no) use_altivec=false ;;
+ *) AC_MSG_ERROR(bad value ${enableval} for --enable-altivec) ;;
+ esac],[use_altivec=true])
+ AM_CONDITIONAL(FLaC__USE_ALTIVEC, test x$use_altivec = xtrue)
+ if test x$use_altivec = xtrue ; then
+ AC_DEFINE(FLAC__USE_ALTIVEC)
+ fi
+
AC_ARG_ENABLE(local-xmms-plugin,
[ --enable-local-xmms-plugin Install XMMS plugin to ~/.xmms/Plugins instead of system location],
[case "${enableval}" in
***************
*** 380,385 ****
--- 392,398 ----
AH_TEMPLATE(FLAC__NO_ASM, [define to disable use of assembly code])
AH_TEMPLATE(FLAC__SSE_OS, [define if your operating system supports SSE instructions])
AH_TEMPLATE(FLAC__USE_3DNOW, [define to enable use of 3Dnow! instructions])
+ AH_TEMPLATE(FLAC__USE_ALTIVEC, [define to enable use of Altivec instructions])
AH_TEMPLATE(ID3LIB_MAJOR, [define to major version number of id3lib])
AH_TEMPLATE(ID3LIB_MINOR, [define to minor version number of id3lib])
AH_TEMPLATE(ID3LIB_PATCH, [define to patch level of id3lib])
***************
*** 389,394 ****
--- 402,408 ----
src/Makefile \
src/libFLAC/Makefile \
src/libFLAC/ia32/Makefile \
+ src/libFLAC/ppc/Makefile \
src/libFLAC/include/Makefile \
src/libFLAC/include/private/Makefile \
src/libFLAC/include/protected/Makefile \
-------------- next part --------------
Index: cpu.c
===================================================================
RCS file: /cvsroot/flac/flac/src/libFLAC/cpu.c,v
retrieving revision 1.14
diff -c -r1.14 cpu.c
*** cpu.c 31 Jan 2003 23:34:57 -0000 1.14
--- cpu.c 25 Jul 2004 23:16:52 -0000
***************
*** 37,42 ****
--- 37,50 ----
#include <config.h>
#endif
+ #if defined FLAC__CPU_PPC
+ #if !defined FLAC__NO_ASM
+ #if defined __APPLE__ && defined __MACH__
+ #include <sys/sysctl.h>
+ #endif /* __APPLE__ && __MACH__ */
+ #endif /* FLAC__NO_ASM */
+ #endif /* FLAC__CPU_PPC */
+
const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV = 0x00008000;
const unsigned FLAC__CPUINFO_IA32_CPUID_MMX = 0x00800000;
const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR = 0x01000000;
***************
*** 78,83 ****
--- 86,115 ----
#else
info->use_asm = false;
#endif
+ #elif defined FLAC__CPU_PPC
+ info->type = FLAC__CPUINFO_TYPE_PPC;
+ #if !defined FLAC__NO_ASM
+ info->use_asm = true;
+ #ifdef FLAC__USE_ALTIVEC
+ #if defined __APPLE__ && defined __MACH__
+ {
+ int selectors[2] = { CTL_HW, HW_VECTORUNIT };
+ int result = 0;
+ size_t length = sizeof(result);
+ int error = sysctl(selectors, 2, &result, &length, 0, 0);
+
+ info->data.ppc.altivec = error==0 ? result!=0 : 0;
+ }
+ #else /* __APPLE__ && __MACH__ */
+ /* don't know of any other thread-safe way to check */
+ info->data.ppc.altivec = 0;
+ #endif /* __APPLE__ && __MACH__ */
+ #else /* FLAC__USE_ALTIVEC */
+ info->data.ppc.altivec = 0;
+ #endif /* FLAC__USE_ALTIVEC */
+ #else /* FLAC__NO_ASM */
+ info->use_asm = false;
+ #endif /* FLAC__NO_ASM */
#else
info->type = FLAC__CPUINFO_TYPE_UNKNOWN;
info->use_asm = false;
-------------- next part --------------
Index: stream_decoder.c
===================================================================
RCS file: /cvsroot/flac/flac/src/libFLAC/stream_decoder.c,v
retrieving revision 1.87
diff -c -r1.87 stream_decoder.c
*** stream_decoder.c 20 May 2003 00:01:50 -0000 1.87
--- stream_decoder.c 25 Jul 2004 23:17:39 -0000
***************
*** 101,110 ****
void (*local_lpc_restore_signal)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
void (*local_lpc_restore_signal_64bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
void (*local_lpc_restore_signal_16bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
void *client_data;
FLAC__BitBuffer *input;
FLAC__int32 *output[FLAC__MAX_CHANNELS];
! FLAC__int32 *residual[FLAC__MAX_CHANNELS];
FLAC__EntropyCodingMethod_PartitionedRiceContents partitioned_rice_contents[FLAC__MAX_CHANNELS];
unsigned output_capacity, output_channels;
FLAC__uint32 last_frame_number;
--- 101,111 ----
void (*local_lpc_restore_signal)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
void (*local_lpc_restore_signal_64bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
void (*local_lpc_restore_signal_16bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
+ void (*local_lpc_restore_signal_16bit_order8)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
void *client_data;
FLAC__BitBuffer *input;
FLAC__int32 *output[FLAC__MAX_CHANNELS];
! FLAC__int32 *residual[FLAC__MAX_CHANNELS]; /* must add 15 and mask low 4 bits before using */
FLAC__EntropyCodingMethod_PartitionedRiceContents partitioned_rice_contents[FLAC__MAX_CHANNELS];
unsigned output_capacity, output_channels;
FLAC__uint32 last_frame_number;
***************
*** 281,286 ****
--- 282,288 ----
decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal;
decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide;
decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal;
+ decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal;
/* now override with asm where appropriate */
#ifndef FLAC__NO_ASM
if(decoder->private_->cpuinfo.use_asm) {
***************
*** 290,301 ****
--- 292,311 ----
if(decoder->private_->cpuinfo.data.ia32.mmx) {
decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32;
decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32_mmx;
+ decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_asm_ia32_mmx;
}
else {
decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32;
decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32;
+ decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_asm_ia32;
}
#endif
+ #elif defined FLAC__CPU_PPC
+ FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_PPC);
+ if(decoder->private_->cpuinfo.data.ppc.altivec) {
+ decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ppc_altivec_16;
+ decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8;
+ }
#endif
}
#endif
***************
*** 748,754 ****
memset(tmp, 0, sizeof(FLAC__int32)*4);
decoder->private_->output[i] = tmp + 4;
! tmp = (FLAC__int32*)malloc(sizeof(FLAC__int32)*size);
if(tmp == 0) {
decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
return false;
--- 758,766 ----
memset(tmp, 0, sizeof(FLAC__int32)*4);
decoder->private_->output[i] = tmp + 4;
! /* need quadword alignment for vector optimizations: */
! /* allocate extra 15 bytes; then must add 15 and mask low 4 bits before using */
! tmp = (FLAC__int32*)malloc(sizeof(FLAC__int32)*size+15U);
if(tmp == 0) {
decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
return false;
***************
*** 1809,1818 ****
FLAC__int32 i32;
FLAC__uint32 u32;
unsigned u;
decoder->private_->frame.subframes[channel].type = FLAC__SUBFRAME_TYPE_FIXED;
! subframe->residual = decoder->private_->residual[channel];
subframe->order = order;
/* read warm-up samples */
--- 1821,1831 ----
FLAC__int32 i32;
FLAC__uint32 u32;
unsigned u;
+ FLAC__int32 *residual = (FLAC__int32 *)((long)decoder->private_->residual[channel]+15U & ~0xf);
decoder->private_->frame.subframes[channel].type = FLAC__SUBFRAME_TYPE_FIXED;
! subframe->residual = residual;
subframe->order = order;
/* read warm-up samples */
***************
*** 1841,1847 ****
/* read residual */
switch(subframe->entropy_coding_method.type) {
case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE:
! if(!read_residual_partitioned_rice_(decoder, order, subframe->entropy_coding_method.data.partitioned_rice.order, &decoder->private_->partitioned_rice_contents[channel], decoder->private_->residual[channel]))
return false;
break;
default:
--- 1854,1860 ----
/* read residual */
switch(subframe->entropy_coding_method.type) {
case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE:
! if(!read_residual_partitioned_rice_(decoder, order, subframe->entropy_coding_method.data.partitioned_rice.order, &decoder->private_->partitioned_rice_contents[channel], residual))
return false;
break;
default:
***************
*** 1850,1856 ****
/* decode the subframe */
memcpy(decoder->private_->output[channel], subframe->warmup, sizeof(FLAC__int32) * order);
! FLAC__fixed_restore_signal(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, order, decoder->private_->output[channel]+order);
return true;
}
--- 1863,1869 ----
/* decode the subframe */
memcpy(decoder->private_->output[channel], subframe->warmup, sizeof(FLAC__int32) * order);
! FLAC__fixed_restore_signal(residual, decoder->private_->frame.header.blocksize-order, order, decoder->private_->output[channel]+order);
return true;
}
***************
*** 1861,1870 ****
FLAC__int32 i32;
FLAC__uint32 u32;
unsigned u;
decoder->private_->frame.subframes[channel].type = FLAC__SUBFRAME_TYPE_LPC;
! subframe->residual = decoder->private_->residual[channel];
subframe->order = order;
/* read warm-up samples */
--- 1874,1884 ----
FLAC__int32 i32;
FLAC__uint32 u32;
unsigned u;
+ FLAC__int32 *residual = (FLAC__int32 *)((long)decoder->private_->residual[channel]+15U & ~0xf);
decoder->private_->frame.subframes[channel].type = FLAC__SUBFRAME_TYPE_LPC;
! subframe->residual = residual;
subframe->order = order;
/* read warm-up samples */
***************
*** 1915,1921 ****
/* read residual */
switch(subframe->entropy_coding_method.type) {
case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE:
! if(!read_residual_partitioned_rice_(decoder, order, subframe->entropy_coding_method.data.partitioned_rice.order, &decoder->private_->partitioned_rice_contents[channel], decoder->private_->residual[channel]))
return false;
break;
default:
--- 1929,1935 ----
/* read residual */
switch(subframe->entropy_coding_method.type) {
case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE:
! if(!read_residual_partitioned_rice_(decoder, order, subframe->entropy_coding_method.data.partitioned_rice.order, &decoder->private_->partitioned_rice_contents[channel], residual))
return false;
break;
default:
***************
*** 1925,1936 ****
/* decode the subframe */
memcpy(decoder->private_->output[channel], subframe->warmup, sizeof(FLAC__int32) * order);
if(bps + subframe->qlp_coeff_precision + FLAC__bitmath_ilog2(order) <= 32)
! if(bps <= 16 && subframe->qlp_coeff_precision <= 16)
! decoder->private_->local_lpc_restore_signal_16bit(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
else
! decoder->private_->local_lpc_restore_signal(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
else
! decoder->private_->local_lpc_restore_signal_64bit(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
return true;
}
--- 1939,1954 ----
/* decode the subframe */
memcpy(decoder->private_->output[channel], subframe->warmup, sizeof(FLAC__int32) * order);
if(bps + subframe->qlp_coeff_precision + FLAC__bitmath_ilog2(order) <= 32)
! if(bps <= 16 && subframe->qlp_coeff_precision <= 16) {
! if(order <= 8)
! decoder->private_->local_lpc_restore_signal_16bit_order8(residual, decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
! else
! decoder->private_->local_lpc_restore_signal_16bit(residual, decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
! }
else
! decoder->private_->local_lpc_restore_signal(residual, decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
else
! decoder->private_->local_lpc_restore_signal_64bit(residual, decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
return true;
}
***************
*** 1938,1945 ****
FLAC__bool read_subframe_verbatim_(FLAC__StreamDecoder *decoder, unsigned channel, unsigned bps)
{
FLAC__Subframe_Verbatim *subframe = &decoder->private_->frame.subframes[channel].data.verbatim;
! FLAC__int32 x, *residual = decoder->private_->residual[channel];
unsigned i;
decoder->private_->frame.subframes[channel].type = FLAC__SUBFRAME_TYPE_VERBATIM;
--- 1956,1966 ----
FLAC__bool read_subframe_verbatim_(FLAC__StreamDecoder *decoder, unsigned channel, unsigned bps)
{
FLAC__Subframe_Verbatim *subframe = &decoder->private_->frame.subframes[channel].data.verbatim;
! FLAC__int32 x;
unsigned i;
+ FLAC__int32 *residual = (FLAC__int32 *)((long)decoder->private_->residual[channel]+15U & ~0xf);
+
+ FLAC__ASSERT((((long)residual) & 0xf) == 0);
decoder->private_->frame.subframes[channel].type = FLAC__SUBFRAME_TYPE_VERBATIM;
***************
*** 1965,1970 ****
--- 1986,1993 ----
const unsigned partitions = 1u << partition_order;
const unsigned partition_samples = partition_order > 0? decoder->private_->frame.header.blocksize >> partition_order : decoder->private_->frame.header.blocksize - predictor_order;
+ FLAC__ASSERT((((long)residual) & 0xf) == 0);
+
if(!FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(partitioned_rice_contents, max(6, partition_order))) {
decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
return false;
-------------- next part --------------
Index: cpu.h
===================================================================
RCS file: /cvsroot/flac/flac/src/libFLAC/include/private/cpu.h,v
retrieving revision 1.11
diff -c -r1.11 cpu.h
*** cpu.h 31 Jan 2003 23:34:58 -0000 1.11
--- cpu.h 25 Jul 2004 23:15:40 -0000
***************
*** 40,45 ****
--- 40,46 ----
typedef enum {
FLAC__CPUINFO_TYPE_IA32,
+ FLAC__CPUINFO_TYPE_PPC,
FLAC__CPUINFO_TYPE_UNKNOWN
} FLAC__CPUInfo_Type;
***************
*** 54,59 ****
--- 55,64 ----
FLAC__bool extmmx;
} FLAC__CPUInfo_IA32;
+ typedef struct {
+ FLAC__bool altivec;
+ } FLAC__CPUInfo_PPC;
+
extern const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV;
extern const unsigned FLAC__CPUINFO_IA32_CPUID_MMX;
extern const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR;
***************
*** 69,74 ****
--- 74,80 ----
FLAC__CPUInfo_Type type;
union {
FLAC__CPUInfo_IA32 ia32;
+ FLAC__CPUInfo_PPC ppc;
} data;
} FLAC__CPUInfo;
More information about the Flac-dev
mailing list