Skip to content

Commit

Permalink
memcpy
Browse files Browse the repository at this point in the history
  • Loading branch information
lukileczo committed Feb 27, 2024
1 parent 8e7daf8 commit 012e135
Show file tree
Hide file tree
Showing 3 changed files with 362 additions and 33 deletions.
2 changes: 1 addition & 1 deletion arch/armv7a/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
# Author: Pawel Pisarczyk
#

OBJS += $(addprefix $(PREFIX_O)arch/armv7a/, syscalls.o jmp.o signal.o string.o reboot.o)
OBJS += $(addprefix $(PREFIX_O)arch/armv7a/, syscalls.o jmp.o signal.o string.o reboot.o memcpy.o)
CRT0_OBJS += $(addprefix $(PREFIX_O)arch/armv7a/, crt0.o)
361 changes: 361 additions & 0 deletions arch/armv7a/memcpy.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,361 @@
#define DST r0
#define SRC r1
#define LEN r8
#define DST_RET r12

.arm
.syntax unified

.text
.align 4

/* void *memcpy(void *dst, const void *src, size_t l) */
.globl memcpy
.type memcpy, %function
memcpy:
str r8, [sp, #-8]!
mov r8, r2
cmp LEN, #64
mov DST_RET, DST /* preserve return value */

bhs .LblkCopy

/* less than 64 bytes - always copy as if block was always unaligned */

.Ltail63Unaligned:
/* unaligned copy, 0-63 bytes */

/* r3 = LEN / 4 */
movs r3, LEN, lsr #2
beq .Ltail63Un0

.Ltail63Un4:
ldr r2, [SRC], #4
str r2, [DST], #4
subs r3, #1
bne .Ltail63Un4

.Ltail63Un0:
/* LEN = LEN % 4 */
ands LEN, #3
beq .Ltail63UnRet

/* 1 <= LEN <= 3 */
1:
subs LEN, #1
ldrb r2, [SRC], #1
strb r2, [DST], #1
bne 1b

.Ltail63UnRet:
ldr r8, [sp], #8
mov r0, DST_RET
bx lr


.LblkCopyUnaligned64:
/* src/dst not mutually aligned, more than 64 bytes to copy */

/* align dst to 64 bytes */
add r2, DST, #63
bic r2, #63

/* r2 = distance to 64-byte alignment */
subs r2, DST
beq .LblkCopyUnaligned /* dst already aligned */

sub LEN, r2

1:
ldrb r3, [SRC], #1
strb r3, [DST], #1
subs r2, #1
bne 1b

cmp LEN, #64
blo .Ltail63Unaligned

sub LEN, #64

.p2align 5
.LblkCopyUnaligned:
/* copy block of >64 bytes */
ldr r2, [SRC], #4
ldr r3, [SRC], #4
strd r2, r3, [DST], #8

ldr r2, [SRC], #4
ldr r3, [SRC], #4
strd r2, r3, [DST], #8

ldr r2, [SRC], #4
ldr r3, [SRC], #4
strd r2, r3, [DST], #8

ldr r2, [SRC], #4
ldr r3, [SRC], #4
strd r2, r3, [DST], #8

ldr r2, [SRC], #4
ldr r3, [SRC], #4
strd r2, r3, [DST], #8

ldr r2, [SRC], #4
ldr r3, [SRC], #4
strd r2, r3, [DST], #8

ldr r2, [SRC], #4
ldr r3, [SRC], #4
strd r2, r3, [DST], #8

ldr r2, [SRC], #4
ldr r3, [SRC], #4
strd r2, r3, [DST], #8

subs LEN, #64
bhs .LblkCopyUnaligned

bne 2f

/* LEN = 0 */
ldr r8, [sp], #8
mov r0, DST_RET
bx lr

2:
/* LEN < 0 */
and LEN, #63 /* make LEN positive again */
b .Ltail63Unaligned

.LblkCopy:
/* copy more than 64 bytes */

/* check src/dst alignment */
and r3, SRC, #7
and r2, DST, #7
cmp r3, r2
bne .LblkCopyUnaligned64

/* src/dst mutually 8-byte aligned */

/* handle leading misalignment, 1-3 bytes */
tst r3, #3
beq .LblkAligned4

rsb r3, #4
sub LEN, r3

3:
ldrb r2, [SRC], #1
strb r2, [DST], #1
tst SRC, #3
bne 3b

.LblkAligned4:
ands r3, SRC, #7

/* leading misalignment aligned to 4 bytes */
ldrne r2, [SRC], #4
strne r2, [DST], #4
subne LEN, LEN, #4

/* src/dst aligned to 8 bytes */
cmp LEN, #64
blo .Ltail63Aligned

/* check if copy bigger than 512 bytes */
cmp LEN, #512
bhs .LblkCopy512Aligned

pld [SRC]

sub SRC, #8
sub DST, #8

.Ltail127Aligned:
/* we may land here after long copy (64 <= LEN <= 127) */
sub LEN, #64

.p2align 5
.LblkCopy64:
/* copy 64-512 bytes in 64-byte chunks */
pld [SRC, #40]

ldrd r2, r3, [SRC, #8]
strd r2, r3, [DST, #8]
ldrd r2, r3, [SRC, #16]
strd r2, r3, [DST, #16]
ldrd r2, r3, [SRC, #24]
strd r2, r3, [DST, #24]
ldrd r2, r3, [SRC, #32]
strd r2, r3, [DST, #32]

pld [SRC, #72]

ldrd r2, r3, [SRC, #40]
strd r2, r3, [DST, #40]
ldrd r2, r3, [SRC, #48]
strd r2, r3, [DST, #48]
ldrd r2, r3, [SRC, #56]
strd r2, r3, [DST, #56]
ldrd r2, r3, [SRC, #64]!
strd r2, r3, [DST, #64]!
subs LEN, #64
bhs .LblkCopy64

bne 2f

/* LEN = 0 */
ldr r8, [sp], #8
mov r0, DST_RET
bx lr

2:
/* LEN < 0 */
and LEN, #63 /* make LEN positive again */

add SRC, #8
add DST, #8

.Ltail63Aligned:
/* copy the tail, 0-63 bytes
* src/dst are 8-byte aligned
*/

/* r3 = LEN / 8 */
movs r3, LEN, lsr #3
/* LEN = LEN % 8 */
and LEN, #7
beq .Ltail63Al0

push {r4, r5}

.Ltail63Al8:
ldrd r4, r5, [SRC], #8
strd r4, r5, [DST], #8
subs r3, #1
bne .Ltail63Al8

pop {r4, r5}

.Ltail63Al0:
/* copied all 8 byte aligned memory, now copy whats left */
cmp LEN, #0
beq .Ltail63AlRet

/* 1 <= LEN <= 7 */
ands r3, LEN, #4
ldrne r2, [SRC], #4
strne r2, [DST], #4

ands r3, LEN, #2
ldrhne r2, [SRC], #2
strhne r2, [DST], #2

ands r3, LEN, #1
ldrbne r2, [SRC], #1
strbne r2, [DST], #1

.Ltail63AlRet:
ldr r8, [sp], #8
mov r0, DST_RET
bx lr


.LblkCopy512Aligned:
/* Copy more than 512 bytes, src/dst are 8-byte aligned */
stmfd sp!, {r4-r7, r10, r11}

/* Align ld/st to 64 bytes */
ands r11, SRC, #63

sub SRC, #8
sub DST, #8
sub LEN, LEN, #128

pld [SRC, #8]

beq 4f

/* copy leading misalignment */
3:
ldrd r2, r3, [SRC, #8]!
strd r2, r3, [DST, #8]!
subs r11, #8
sub LEN, #8
bne 3b

pld [SRC, #8]

.p2align 5
4:
/* ld/st are cache line aligned */
pld [SRC, #40]

ldrd r2, r3, [SRC, #8]
strd r2, r3, [DST, #8]
ldrd r4, r5, [SRC, #16]
strd r4, r5, [DST, #16]
ldrd r6, r7, [SRC, #24]
strd r6, r7, [DST, #24]
ldrd r10, r11, [SRC, #32]
strd r10, r11, [DST, #32]

pld [SRC, #72]

ldrd r2, r3, [SRC, #40]
strd r2, r3, [DST, #40]
ldrd r4, r5, [SRC, #48]
strd r4, r5, [DST, #48]
ldrd r6, r7, [SRC, #56]
strd r6, r7, [DST, #56]
ldrd r10, r11, [SRC, #64]
strd r10, r11, [DST, #64]

pld [SRC, #108]

ldrd r2, r3, [SRC, #72]
strd r2, r3, [DST, #72]
ldrd r4, r5, [SRC, #80]
strd r4, r5, [DST, #80]
ldrd r6, r7, [SRC, #88]
strd r6, r7, [DST, #88]
ldrd r10, r11, [SRC, #96]
strd r10, r11, [DST, #96]

pld [SRC, #136]

ldrd r2, r3, [SRC, #104]
strd r2, r3, [DST, #104]
ldrd r4, r5, [SRC, #112]
strd r4, r5, [DST, #112]
ldrd r6, r7, [SRC, #120]
strd r6, r7, [DST, #120]
ldrd r10, r11, [SRC, #128]!
strd r10, r11, [DST, #128]!

subs LEN, LEN, #128
bhs 4b

ldmfd sp!, {r4-r7, r10, r11}

bne 6f

/* LEN = 0 */
ldr r8, [sp], #8
mov r0, DST_RET
bx lr

6:
/* LEN < 0 */
and LEN, #127 /* make LEN positive again */

cmp LEN, #63
bhi .Ltail127Aligned

add SRC, #8
add DST, #8
b .Ltail63Aligned

.size memcpy, .-memcpy
.ltorg
Loading

0 comments on commit 012e135

Please sign in to comment.