src/util/arm-algo.S (view raw)
1# Copyright (c) 2013-2014 Jeffrey Pfau
2#
3# This Source Code Form is subject to the terms of the Mozilla Public
4# License, v. 2.0. If a copy of the MPL was not distributed with this
5# file, You can obtain one at http://mozilla.org/MPL/2.0/.
6# r0: Destination
7# r1: Source
8# r2: Number of words to copy as halfwords
9.global _to16Bit
10_to16Bit:
11push {r4-r10}
12mov r8, r0
13mov r9, r1
14mov r10, r2
15.L0:
16tst r10, #7
17beq .L1
18ldr r0, [r9], #4
19strh r0, [r8], #2
20sub r10, #1
21b .L0
22.L1:
23ldmia r9!, {r0-r7}
24strh r0, [r8], #2
25strh r1, [r8], #2
26strh r2, [r8], #2
27strh r3, [r8], #2
28strh r4, [r8], #2
29strh r5, [r8], #2
30strh r6, [r8], #2
31strh r7, [r8], #2
32subs r10, #8
33bne .L1
34pop {r4-r10}
35bx lr
36
37#ifdef __ARM_NEON
38# r0: Destination
39# r1: Source
40# r2: Width
41# r3: Height
42.global _neon2x
43_neon2x:
44push {r4-r5}
45lsl r4, r2, #2
46.n20:
47mov r2, r4, lsr #4
48add r5, r0, r4
49.n21:
50vld2.32 {d0[], d1[]}, [r1]!
51vmov d2, d0
52vmov d3, d1
53vzip.16 d0, d2
54vzip.16 d1, d3
55vst1.32 {q0}, [r0]!
56vst1.32 {q0}, [r5]!
57subs r2, #1
58bne .n21
59subs r3, #1
60mov r0, r5
61bne .n20
62pop {r4-r5}
63bx lr
64
65.global _neon4x
66_neon4x:
67push {r4-r7}
68lsl r4, r2, #3
69.n40:
70mov r2, r4, lsr #5
71add r5, r0, r4
72add r6, r5, r4
73add r7, r6, r4
74.n41:
75vld4.16 {d0[], d1[], d2[], d3[]}, [r1]!
76vst1.16 {d0}, [r0]!
77vst1.16 {d0}, [r5]!
78vst1.16 {d0}, [r6]!
79vst1.16 {d0}, [r7]!
80vst1.16 {d1}, [r0]!
81vst1.16 {d1}, [r5]!
82vst1.16 {d1}, [r6]!
83vst1.16 {d1}, [r7]!
84vst1.16 {d2}, [r0]!
85vst1.16 {d2}, [r5]!
86vst1.16 {d2}, [r6]!
87vst1.16 {d2}, [r7]!
88vst1.16 {d3}, [r0]!
89vst1.16 {d3}, [r5]!
90vst1.16 {d3}, [r6]!
91vst1.16 {d3}, [r7]!
92subs r2, #1
93bne .n41
94subs r3, #1
95mov r0, r7
96bne .n40
97pop {r4-r7}
98bx lr
99#endif