[xiph-commits] r17751 - trunk/theora/lib/arm
tterribe at svn.xiph.org
tterribe at svn.xiph.org
Mon Dec 13 06:34:27 PST 2010
Author: tterribe
Date: 2010-12-13 06:34:27 -0800 (Mon, 13 Dec 2010)
New Revision: 17751
Modified:
trunk/theora/lib/arm/armenquant.s
Log:
Formatting clean-up for oc_enc_quantize_neon().
Also knock another instruction off the PMOVMSKB emulation.
Modified: trunk/theora/lib/arm/armenquant.s
===================================================================
--- trunk/theora/lib/arm/armenquant.s 2010-12-13 07:03:07 UTC (rev 17750)
+++ trunk/theora/lib/arm/armenquant.s 2010-12-13 14:34:27 UTC (rev 17751)
@@ -54,7 +54,7 @@
oc_enc_enquant_table_fixup_neon PROC
; r0 = void *_enquant[3][3][2]
; r1 = int _nqis
- STR r14, [r13,#-4]!
+ STR r14,[r13,#-4]!
oeetf_neon_lp1
SUBS r1, r1, #1
BEQ oeetf_neon_end1
@@ -87,7 +87,7 @@
; r2 = const ogg_int16_t _dequant[64]
; r3 = const void *_enquant
STMFD r13!,{r4,r5,r14}
- ; The loop counter goes in the high half of r14
+ ; The loop counter goes in the high half of r14.
MOV r14,#0xFFFCFFFF
oeq_neon_lp
; Load the next two rows of the data and the quant matrices.
@@ -121,10 +121,10 @@
VSUB.S16 Q9, Q9, Q1
VST1.64 {D16,D17,D18,D19},[r0 at 128]!
; Now pull out a bitfield marking the non-zero coefficients.
- ; Sadly, NEON has no PMOVMSKB; emulating it requires 7 instructions.
VQMOVN.S16 D16,Q8
VQMOVN.S16 D17,Q9
VCEQ.S8 Q8, #0
+ ; Sadly, NEON has no PMOVMSKB; emulating it requires 6 instructions.
VNEG.S8 Q8, Q8 ; D16=.......3.......2.......1.......0
; .......7.......6.......5.......4
; D17=.......B.......A.......9.......8
@@ -133,14 +133,12 @@
; .......B.......3.......A.......2
; D17=.......D.......5.......C.......4
; .......F.......7.......E.......6
- VSHL.U8 D17,D17,#4 ; D17=...D.......5.......C.......4....
- ; ...F.......7.......E.......6....
- VORR D16,D16,D17 ; D16=...D...9...5...1...C...8...4...0
+ VSLI.8 D16,D17,#4 ; D16=...D...9...5...1...C...8...4...0
; ...F...B...7...3...E...A...6...2
; Shift over the bitfields from previous iterations and
; finish compacting the bitfield from the last iteration.
- ORR r4, r5, LSL #2 ; r4 =.F.D.B.9.7.5.3.1.E.C.A.8.6.4.2.0
- ORR r4, r4, LSR #15 ; r4 =.F.D.B.9.7.5.3.1FEDCBA9876543210
+ ORR r4, r4, r5, LSL #2 ; r4 =.F.D.B.9.7.5.3.1.E.C.A.8.6.4.2.0
+ ORR r4, r4, r4, LSR #15 ; r4 =.F.D.B.9.7.5.3.1FEDCBA9876543210
PKHTB r14,r14,r12,ASR #16 ; r14=i|A
PKHBT r12,r4, r12,LSL #16 ; r12=B|C
VMOV r4, r5, D16
@@ -151,8 +149,8 @@
CLZNE r0, r0
RSBNE r0, r0, #31
; Stall 8-10 more cycles waiting for the last transfer.
- ORR r4, r5, LSL #2 ; r4 =.F.D.B.9.7.5.3.1.E.C.A.8.6.4.2.0
- ORR r4, r4, LSR #15 ; r4 =.F.D.B.9.7.5.3.1FEDCBA9876543210
+ ORR r4, r4, r5, LSL #2 ; r4 =.F.D.B.9.7.5.3.1.E.C.A.8.6.4.2.0
+ ORR r4, r4, r4, LSR #15 ; r4 =.F.D.B.9.7.5.3.1FEDCBA9876543210
PKHBT r1, r12,r4, LSL #16 ; r1 = D|C
MVNS r1, r1
CLZNE r1, r1
More information about the commits
mailing list