[xiph-commits] r17749 - in trunk/theora/lib: . arm
tterribe at svn.xiph.org
tterribe at svn.xiph.org
Sun Dec 12 22:27:49 PST 2010
Author: tterribe
Date: 2010-12-12 22:27:49 -0800 (Sun, 12 Dec 2010)
New Revision: 17749
Added:
trunk/theora/lib/arm/armenc.c
trunk/theora/lib/arm/armenc.h
trunk/theora/lib/arm/armenquant.s
Modified:
trunk/theora/lib/Makefile.am
trunk/theora/lib/encint.h
Log:
Add a NEON version of oc_enc_quantize().
Modified: trunk/theora/lib/Makefile.am
===================================================================
--- trunk/theora/lib/Makefile.am 2010-12-12 09:40:50 UTC (rev 17748)
+++ trunk/theora/lib/Makefile.am 2010-12-13 06:27:49 UTC (rev 17749)
@@ -3,14 +3,16 @@
EXTRA_DIST = \
encoder_disabled.c \
+ arm/arm2gnu.pl \
+ arm/armopts.s.in \
arm/armcpu.c \
arm/armbits.h \
arm/armbits.s \
arm/armfrag.s \
arm/armidct.s \
arm/armint.h \
- arm/armopts.s.in \
- arm/arm2gnu.pl \
+ arm/armenc.h \
+ arm/armenquant.s \
c64x/c64xint.h \
c64x/c64xdec.h \
x86/mmxfrag.c \
@@ -61,7 +63,9 @@
encoder_shared_x86_64_sources =
-encoder_uniq_arm_sources =
+encoder_uniq_arm_sources = \
+ armenquant-gnu.S \
+ arm/armenc.c
if CPU_arm
BUILT_SOURCES = \
@@ -268,12 +272,14 @@
armfrag-gnu.S \
armidct-gnu.S \
armloop-gnu.S \
- armopts-gnu.S
+ armopts-gnu.S \
+ armenquant-gnu.S
# automake doesn't do dependency tracking for asm files, that I can tell
armfrag-gnu.S: armopts-gnu.S
armidct-gnu.S: armopts-gnu.S
armloop-gnu.S: armopts-gnu.S
+armenquant-gnu.S: armopts-gnu.S
# convert ARM asm to GNU as format
%-gnu.S: $(srcdir)/arm/%.s
Copied: trunk/theora/lib/arm/armenc.c (from rev 17748, trunk/theora/lib/arm/armstate.c)
===================================================================
--- trunk/theora/lib/arm/armenc.c (rev 0)
+++ trunk/theora/lib/arm/armenc.c 2010-12-13 06:27:49 UTC (rev 17749)
@@ -0,0 +1,54 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010 *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function:
+ last mod: $Id: x86state.c 17344 2010-07-21 01:42:18Z tterribe $
+
+ ********************************************************************/
+#include "armenc.h"
+
+#if defined(OC_ARM_ASM)
+
+void oc_enc_accel_init_arm(oc_enc_ctx *_enc){
+ ogg_uint32_t cpu_flags;
+ cpu_flags=_enc->state.cpu_flags;
+ oc_enc_accel_init_c(_enc);
+# if defined(OC_ENC_USE_VTABLE)
+ /*TODO: Add ARMv4 functions here.*/
+# endif
+# if defined(OC_ARM_ASM_EDSP)
+ if(cpu_flags&OC_CPU_ARM_EDSP){
+# if defined(OC_STATE_USE_VTABLE)
+ /*TODO: Add EDSP functions here.*/
+# endif
+ }
+# if defined(OC_ARM_ASM_MEDIA)
+ if(cpu_flags&OC_CPU_ARM_MEDIA){
+# if defined(OC_STATE_USE_VTABLE)
+ /*TODO: Add Media functions here.*/
+# endif
+ }
+# if defined(OC_ARM_ASM_NEON)
+ if(cpu_flags&OC_CPU_ARM_NEON){
+# if defined(OC_STATE_USE_VTABLE)
+ _enc->opt_vtable.enquant_table_init=oc_enc_enquant_table_init_neon;
+ _enc->opt_vtable.enquant_table_fixup=oc_enc_enquant_table_fixup_neon;
+ _enc->opt_vtable.quantize=oc_enc_quantize_neon;
+# endif
+ _enc->opt_data.enquant_table_size=128*sizeof(ogg_uint16_t);
+ _enc->opt_data.enquant_table_alignment=16;
+ }
+# endif
+# endif
+# endif
+}
+#endif
Copied: trunk/theora/lib/arm/armenc.h (from rev 17748, trunk/theora/lib/arm/armint.h)
===================================================================
--- trunk/theora/lib/arm/armenc.h (rev 0)
+++ trunk/theora/lib/arm/armenc.h 2010-12-13 06:27:49 UTC (rev 17749)
@@ -0,0 +1,44 @@
+/********************************************************************
+ * *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+ * *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010 *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ * *
+ ********************************************************************
+
+ function:
+ last mod: $Id: x86int.h 17344 2010-07-21 01:42:18Z tterribe $
+
+ ********************************************************************/
+#if !defined(_arm_armenc_H)
+# define _arm_armenc_H (1)
+# include "armint.h"
+
+# if defined(OC_ARM_ASM)
+# define oc_enc_accel_init oc_enc_accel_init_arm
+# define OC_ENC_USE_VTABLE (1)
+# endif
+
+# include "../encint.h"
+
+# if defined(OC_ARM_ASM)
+void oc_enc_accel_init_arm(oc_enc_ctx *_enc);
+
+# if defined(OC_ARM_ASM_EDSP)
+# if defined(OC_ARM_ASM_MEDIA)
+# if defined(OC_ARM_ASM_NEON)
+void oc_enc_enquant_table_init_neon(void *_enquant,
+ const ogg_uint16_t _dequant[64]);
+void oc_enc_enquant_table_fixup_neon(void *_enquant[3][3][2],int _nqis);
+int oc_enc_quantize_neon(ogg_int16_t _qdct[64],const ogg_int16_t _dct[64],
+ const ogg_uint16_t _dequant[64],const void *_enquant);
+# endif
+# endif
+# endif
+# endif
+
+#endif
Added: trunk/theora/lib/arm/armenquant.s
===================================================================
--- trunk/theora/lib/arm/armenquant.s (rev 0)
+++ trunk/theora/lib/arm/armenquant.s 2010-12-13 06:27:49 UTC (rev 17749)
@@ -0,0 +1,164 @@
+;********************************************************************
+;* *
+;* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
+;* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
+;* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+;* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
+;* *
+;* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
+;* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+;* *
+;********************************************************************
+;
+; function:
+; last mod: $Id: mmxstate.c 17247 2010-05-28 05:35:32Z tterribe $
+;
+;********************************************************************
+
+ AREA |.text|, CODE, READONLY
+
+ GET armopts.s
+
+ [ OC_ARM_ASM_NEON
+ EXPORT oc_enc_enquant_table_init_neon
+ EXPORT oc_enc_enquant_table_fixup_neon
+ EXPORT oc_enc_quantize_neon
+
+oc_enc_enquant_table_init_neon PROC
+ ; r0 = void *_enquant
+ ; r1 = const ogg_uint16_t _dequant[64]
+ STMFD r13!,{r0,r14}
+ ; Initialize the table using the C routine
+ BLX oc_enc_enquant_table_init_c
+ LDR r0, [r13],#4
+ MOV r1, #2
+ ; Now partially de-interleave it, so that the first row is all
+ ; multipliers, the second row is all shift factors, etc.
+ ; Also, negate the shifts for use by VSHL.
+oeeti_neon_lp
+ SUBS r1, r1, #1
+ VLDMIA r0, {D16-D31}
+ VUZP.16 Q8, Q9
+ VNEG.S16 Q9, Q9
+ VUZP.16 Q10,Q11
+ VNEG.S16 Q11,Q11
+ VUZP.16 Q12,Q13
+ VNEG.S16 Q13,Q13
+ VUZP.16 Q14,Q15
+ VNEG.S16 Q15,Q15
+ VSTMIA r0!,{D16-D31}
+ BNE oeeti_neon_lp
+ LDR PC, [r13],#4
+ ENDP
+
+oc_enc_enquant_table_fixup_neon PROC
+ ; r0 = void *_enquant[3][3][2]
+ ; r1 = int _nqis
+ STR r14, [r13,#-4]!
+oeetf_neon_lp1
+ SUBS r1, r1, #1
+ BEQ oeetf_neon_end1
+ MOV r14,#3
+oeetf_neon_lp2
+ LDR r2, [r0]
+ SUBS r14,r14,#1
+ LDRH r3, [r2]
+ LDRH r12,[r2,#32]
+ LDR r2, [r0,#8]
+ STRH r3, [r2]
+ STRH r12,[r2,#32]
+ LDR r2, [r0,#4]
+ LDRH r3, [r2]
+ LDRH r12,[r2,#32]
+ LDR r2, [r0,#12]
+ ADD r0, r0, #24
+ STRH r3, [r2]
+ STRH r12,[r2,#32]
+ BNE oeetf_neon_lp2
+ SUB r0, r0, #64
+ B oeetf_neon_lp1
+oeetf_neon_end1
+ LDR PC, [r13],#4
+ ENDP
+
+oc_enc_quantize_neon PROC
+ ; r0 = ogg_int16_t _qdct[64]
+ ; r1 = const ogg_int16_t _dct[64]
+ ; r2 = const ogg_int16_t _dequant[64]
+ ; r3 = const void *_enquant
+ STMFD r13!,{r4,r5,r14}
+ ; The loop counter goes in the high half of r14
+ MOV r14,#0xFFFCFFFF
+oeq_neon_lp
+ ; Load the next two rows of the data and the quant matrices.
+ VLD1.64 {D16,D17,D18,D19},[r1 at 128]!
+ VLD1.64 {D20,D21,D22,D23},[r2 at 128]!
+ ; Add in the signed rounding bias from the quantizers.
+ ; Note that the VHADD relies on the fact that the quantizers are all
+ ; even (they're in fact multiples of four) in order to round correctly
+ ; on the entries being negated.
+ VSHR.S16 Q0, Q8, #15
+ VSHR.S16 Q1, Q9, #15
+ VLD1.64 {D24,D25,D26,D27},[r3 at 128]!
+ VHADD.S16 Q10,Q0, Q10
+ VHADD.S16 Q11,Q1, Q11
+ VLD1.64 {D28,D29,D30,D31},[r3 at 128]!
+ ADDS r14,r14,#1<<16
+ VEOR.S16 Q10,Q0, Q10
+ VEOR.S16 Q11,Q1, Q11
+ VADD.S16 Q8, Q8, Q10
+ VADD.S16 Q9, Q9, Q11
+ ; Perform the actual division and save the result.
+ VQDMULH.S16 Q12,Q8, Q12
+ VQDMULH.S16 Q14,Q9, Q14
+ VADD.S16 Q8, Q8, Q8
+ VADD.S16 Q9, Q9, Q9
+ VADD.S16 Q8, Q8, Q12
+ VADD.S16 Q9, Q9, Q14
+ VSHL.S16 Q8, Q13
+ VSHL.S16 Q9, Q15
+ VSUB.S16 Q8, Q8, Q0
+ VSUB.S16 Q9, Q9, Q1
+ VST1.64 {D16,D17,D18,D19},[r0 at 128]!
+ ; Now pull out a bitfield marking the non-zero coefficients.
+ ; Sadly, NEON has no PMOVMSKB; emulating it requires 7 instructions.
+ VQMOVN.S16 D16,Q8
+ VQMOVN.S16 D17,Q9
+ VCEQ.S8 Q8, #0
+ VNEG.S8 Q8, Q8 ; D16=.......3.......2.......1.......0
+ ; .......7.......6.......5.......4
+ ; D17=.......B.......A.......9.......8
+ ; .......F.......E.......D.......C
+ VZIP.8 D16,D17 ; D16=.......9.......1.......8.......0
+ ; .......B.......3.......A.......2
+ ; D17=.......D.......5.......C.......4
+ ; .......F.......7.......E.......6
+ VSHL.U8 D17,D17,#4 ; D17=...D.......5.......C.......4....
+ ; ...F.......7.......E.......6....
+ VORR D16,D16,D17 ; D16=...D...9...5...1...C...8...4...0
+ ; ...F...B...7...3...E...A...6...2
+ ; Shift over the bitfields from previous iterations and
+ ; finish compacting the bitfield from the last iteration.
+ ORR r4, r5, LSL #2 ; r4 =.F.D.B.9.7.5.3.1.E.C.A.8.6.4.2.0
+ ORR r4, r4, LSR #15 ; r4 =.F.D.B.9.7.5.3.1FEDCBA9876543210
+ PKHTB r14,r14,r12,ASR #16 ; r14=i|A
+ PKHBT r12,r4, r12,LSL #16 ; r12=B|C
+ VMOV r4, r5, D16
+ BLT oeq_neon_lp
+ ; Start with the low half while the NEON register transfers.
+ PKHBT r0, r14,r12 ; r0 =B|A
+ MVN r0, r0
+ CLZ r0, r0
+ RSB r0, r0, #31
+ ; Stall 8-10 more cycles waiting for the last transfer.
+ ORR r4, r5, LSL #2 ; r4 =.F.D.B.9.7.5.3.1.E.C.A.8.6.4.2.0
+ ORR r4, r4, LSR #15 ; r4 =.F.D.B.9.7.5.3.1FEDCBA9876543210
+ PKHBT r1, r12,r4, LSL #16 ; r1 = D|C
+ MVNS r1, r1
+ CLZNE r1, r1
+ RSBNE r0, r1, #63
+ LDMFD r13!,{r4,r5,PC}
+ ENDP
+ ]
+
+ END
Modified: trunk/theora/lib/encint.h
===================================================================
--- trunk/theora/lib/encint.h 2010-12-12 09:40:50 UTC (rev 17748)
+++ trunk/theora/lib/encint.h 2010-12-13 06:27:49 UTC (rev 17749)
@@ -51,6 +51,9 @@
# include "x86/x86enc.h"
# endif
# endif
+# if defined(OC_ARM_ASM)
+# include "arm/armenc.h"
+# endif
# if !defined(oc_enc_accel_init)
# define oc_enc_accel_init oc_enc_accel_init_c
More information about the commits
mailing list