src/util/arm-algo.S (view raw)
1# r0: Destination
2# r1: Source
3# r2: Number of words to copy as halfwords
4.global _to16Bit
5_to16Bit:
6push {r4-r10}
7mov r8, r0
8mov r9, r1
9mov r10, r2
10.L0:
11tst r10, #7
12beq .L1
13ldr r0, [r9], #4
14strh r0, [r8], #2
15sub r10, #1
16b .L0
17.L1:
18ldmia r9!, {r0-r7}
19strh r0, [r8], #2
20strh r1, [r8], #2
21strh r2, [r8], #2
22strh r3, [r8], #2
23strh r4, [r8], #2
24strh r5, [r8], #2
25strh r6, [r8], #2
26strh r7, [r8], #2
27subs r10, #8
28bne .L1
29pop {r4-r10}
30bx lr
31
32#ifdef __ARM_NEON
33# r0: Destination
34# r1: Source
35# r2: Width
36# r3: Height
37.global _neon2x
38_neon2x:
39push {r4-r5}
40lsl r4, r2, #2
41.n20:
42mov r2, r4, lsr #4
43add r5, r0, r4
44.n21:
45vld2.32 {d0[], d1[]}, [r1]!
46vmov d2, d0
47vmov d3, d1
48vzip.16 d0, d2
49vzip.16 d1, d3
50vst1.32 {q0}, [r0]!
51vst1.32 {q0}, [r5]!
52subs r2, #1
53bne .n21
54subs r3, #1
55mov r0, r5
56bne .n20
57pop {r4-r5}
58bx lr
59
60.global _neon4x
61_neon4x:
62push {r4-r7}
63lsl r4, r2, #3
64.n40:
65mov r2, r4, lsr #5
66add r5, r0, r4
67add r6, r5, r4
68add r7, r6, r4
69.n41:
70vld4.16 {d0[], d1[], d2[], d3[]}, [r1]!
71vst1.16 {d0}, [r0]!
72vst1.16 {d0}, [r5]!
73vst1.16 {d0}, [r6]!
74vst1.16 {d0}, [r7]!
75vst1.16 {d1}, [r0]!
76vst1.16 {d1}, [r5]!
77vst1.16 {d1}, [r6]!
78vst1.16 {d1}, [r7]!
79vst1.16 {d2}, [r0]!
80vst1.16 {d2}, [r5]!
81vst1.16 {d2}, [r6]!
82vst1.16 {d2}, [r7]!
83vst1.16 {d3}, [r0]!
84vst1.16 {d3}, [r5]!
85vst1.16 {d3}, [r6]!
86vst1.16 {d3}, [r7]!
87subs r2, #1
88bne .n41
89subs r3, #1
90mov r0, r7
91bne .n40
92pop {r4-r7}
93bx lr
94#endif