forked from genesi/imx-libc-neon
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstrlen.S
147 lines (141 loc) · 3.88 KB
/
strlen.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
/*
* Copyright (c) 2010 0xlab - http://0xlab.org/
* Copyright (c) 2008 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <machine/cpu-features.h>
.text
.global strlen
.type strlen, %function
.align 4
/* size_t strlen(const char *S)
* entry: r0 -> string
* exit: r0 = len
*/
strlen:
.fnstart
len .req r0
data .req r3
addr .req r1
PLD(r0, #0)
/* Word-align address */
bic addr, r0, #3
/* Get adjustment for start ... */
ands len, r0, #3
neg len, len
/* First word of data */
ldr data, [addr], #4
/* Ensure bytes preceeding start ... */
add ip, len, #4
mov ip, ip, asl #3
mvn r2, #0
/* ... are masked out */
it ne
#ifdef __ARMEB__
orrne data, data, r2, lsl ip
#else
orrne data, data, r2, lsr ip
#endif
/* Magic const 0x01010101 */
#if __ARM_ARCH__ >= 7
movw ip, #0x101
#else
mov ip, #0x1
orr ip, ip, ip, lsl #8
#endif
orr ip, ip, ip, lsl #16
/* This is the main loop. We subtract one from each byte in
the word: the sign bit changes iff the byte was zero or
0x80 -- we eliminate the latter case by anding the result
with the 1-s complement of the data. */
1:
/* test (data - 0x01010101) */
sub r2, data, ip
/* ... & ~data */
bic r2, r2, data
/* ... & 0x80808080 == 0? */
ands r2, r2, ip, lsl #7
#if __ARM_ARCH__ >= 7
/* yes, get more data... */
itt eq
ldreq data, [addr], #4
/* and 4 more bytes */
addeq len, len, #4
/* If we have PLD, then unroll the loop a bit. */
PLD(addr, #8)
/* test (data - 0x01010101) */
ittt eq
subeq r2, data, ip
/* ... & ~data */
biceq r2, r2, data
/* ... & 0x80808080 == 0? */
andeqs r2, r2, ip, lsl #7
#endif
itt eq
/* yes, get more data... */
ldreq data, [addr], #4
/* and 4 more bytes */
addeq len, len, #4
beq 1b
#ifdef __ARMEB__
tst data, #0xff000000
itttt ne
addne len, len, #1
tstne data, #0xff0000
addne len, len, #1
tstne data, #0xff00
it ne
addne len, len, #1
#else
#if __ARM_ARCH__ >= 5
/* R2 is the residual sign bits from the above test. All we
need to do now is establish the position of the first zero
byte... */
/* Little-endian is harder, we need the number of trailing
zeros / 8 */
#if __ARM_ARCH__ >= 7
rbit r2, r2
clz r2, r2
# else
rsb r1, r2, #0
and r2, r2, r1
clz r2, r2
rsb r2, r2, #31
# endif
add len, len, r2, lsr #3
# else /* No CLZ instruction */
tst data, #0xff
itttt ne
addne len, len, #1
tstne data, #0xff00
addne len, len, #1
tstne data, #0xff0000
it ne
addne len, len, #1
# endif
#endif
bx lr
.fnend