-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
arch/armv7a: add optimized memcpy implementation
JIRA: RTOS-789
- Loading branch information
Showing
3 changed files
with
382 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,381 @@ | ||
/* | ||
* Phoenix-RTOS | ||
* | ||
* libphoenix | ||
* | ||
* memcpy | ||
* | ||
* Copyright 2024 Phoenix Systems | ||
* Author: Lukasz Leczkowski | ||
* | ||
* This file is part of Phoenix-RTOS. | ||
* | ||
* %LICENSE% | ||
*/ | ||
|
||
#define DST r0 | ||
#define SRC r1 | ||
#define LEN r8 | ||
#define DST_RET r12 | ||
|
||
.arm | ||
.syntax unified | ||
|
||
.text | ||
.align 4 | ||
|
||
/* void *memcpy(void *dst, const void *src, size_t l) */ | ||
.globl memcpy | ||
.type memcpy, %function | ||
memcpy: | ||
str r8, [sp, #-8]! | ||
mov r8, r2 | ||
cmp LEN, #64 | ||
mov DST_RET, DST /* preserve return value */ | ||
|
||
bhs .LblkCopy | ||
|
||
/* less than 64 bytes - always copy as if block was unaligned */ | ||
|
||
.Ltail63Unaligned: | ||
/* unaligned copy, 0-63 bytes */ | ||
|
||
/* r3 = LEN / 4 */ | ||
movs r3, LEN, lsr #2 | ||
beq .Ltail63Un0 | ||
|
||
.Ltail63Un4: | ||
ldr r2, [SRC], #4 | ||
str r2, [DST], #4 | ||
subs r3, #1 | ||
bne .Ltail63Un4 | ||
|
||
.Ltail63Un0: | ||
/* LEN = LEN % 4 */ | ||
ands LEN, #3 | ||
beq .Lreturn | ||
|
||
/* 1 <= LEN <= 3 */ | ||
1: | ||
ldrb r2, [SRC], #1 | ||
strb r2, [DST], #1 | ||
subs LEN, #1 | ||
bne 1b | ||
|
||
.Lreturn: | ||
ldr r8, [sp], #8 | ||
mov r0, DST_RET | ||
bx lr | ||
|
||
|
||
.LblkCopyUnaligned64: | ||
/* src/dst not mutually aligned, more than 64 bytes to copy */ | ||
|
||
/* align dst to 64 bytes */ | ||
add r2, DST, #63 | ||
bic r2, #63 | ||
|
||
/* r2 = distance to 64-byte alignment */ | ||
subs r2, DST | ||
beq 6f /* dst already aligned */ | ||
|
||
str r4, [sp, #-8]! | ||
|
||
sub LEN, r2 | ||
|
||
/* the same logic as in .Ltail63Unaligned */ | ||
|
||
/* r3 = LEN / 4 */ | ||
movs r3, r2, lsr #2 | ||
beq 3f | ||
|
||
2: | ||
/* align dst to 4 */ | ||
ldr r4, [SRC], #4 | ||
str r4, [DST], #4 | ||
subs r3, #1 | ||
bne 2b | ||
|
||
3: | ||
/* r2 = r2 % 4 */ | ||
ands r2, #3 | ||
beq 5f | ||
|
||
4: | ||
ldrb r3, [SRC], #1 | ||
strb r3, [DST], #1 | ||
subs r2, #1 | ||
bne 4b | ||
|
||
5: | ||
ldr r4, [sp], #8 | ||
|
||
cmp LEN, #64 | ||
blo .Ltail63Unaligned | ||
|
||
6: | ||
sub LEN, #64 | ||
|
||
sub DST, #8 | ||
sub SRC, #8 | ||
|
||
.p2align 5 | ||
.LblkCopyUnaligned: | ||
/* copy block of 64 bytes */ | ||
ldr r2, [SRC, #8] | ||
ldr r3, [SRC, #12] | ||
strd r2, r3, [DST, #8] | ||
|
||
ldr r2, [SRC, #16] | ||
ldr r3, [SRC, #20] | ||
strd r2, r3, [DST, #16] | ||
|
||
ldr r2, [SRC, #24] | ||
ldr r3, [SRC, #28] | ||
strd r2, r3, [DST, #24] | ||
|
||
ldr r2, [SRC, #32] | ||
ldr r3, [SRC, #36] | ||
strd r2, r3, [DST, #32] | ||
|
||
ldr r2, [SRC, #40] | ||
ldr r3, [SRC, #44] | ||
strd r2, r3, [DST, #40] | ||
|
||
ldr r2, [SRC, #48] | ||
ldr r3, [SRC, #52] | ||
strd r2, r3, [DST, #48] | ||
|
||
ldr r2, [SRC, #56] | ||
ldr r3, [SRC, #60] | ||
strd r2, r3, [DST, #56] | ||
|
||
ldr r2, [SRC, #64]! | ||
ldr r3, [SRC, #4] | ||
strd r2, r3, [DST, #64]! | ||
|
||
subs LEN, #64 | ||
bhs .LblkCopyUnaligned | ||
|
||
add SRC, #8 | ||
add DST, #8 | ||
|
||
ands LEN, #63 /* make LEN positive again */ | ||
beq .Lreturn /* LEN = 0 */ | ||
|
||
b .Ltail63Unaligned | ||
|
||
|
||
.LblkCopy: | ||
/* copy more than 64 bytes */ | ||
|
||
/* check src/dst alignment */ | ||
and r3, SRC, #7 | ||
and r2, DST, #7 | ||
cmp r3, r2 | ||
bne .LblkCopyUnaligned64 | ||
|
||
/* src/dst mutually 8-byte aligned */ | ||
|
||
/* handle leading misalignment, 1-3 bytes */ | ||
ands r3, #3 | ||
beq .LblkAligned4 | ||
|
||
rsb r3, #4 | ||
sub LEN, r3 | ||
|
||
7: | ||
ldrb r2, [SRC], #1 | ||
strb r2, [DST], #1 | ||
tst SRC, #3 | ||
bne 7b | ||
|
||
.LblkAligned4: | ||
ands r3, SRC, #7 | ||
|
||
/* leading misalignment aligned to 4 bytes */ | ||
ldrne r2, [SRC], #4 | ||
strne r2, [DST], #4 | ||
subne LEN, LEN, #4 | ||
|
||
/* src/dst aligned to 8 bytes */ | ||
cmp LEN, #64 | ||
blo .Ltail63Aligned | ||
|
||
/* check if copy bigger than 512 bytes */ | ||
cmp LEN, #512 | ||
bhs .LblkCopy512Aligned | ||
|
||
pld [SRC] | ||
|
||
sub SRC, #8 | ||
sub DST, #8 | ||
|
||
.Ltail127Aligned: | ||
/* we may land here after long copy (64 <= LEN <= 127) */ | ||
sub LEN, #64 | ||
|
||
.p2align 5 | ||
.LblkCopy64: | ||
/* copy 64-512 bytes in 64-byte chunks */ | ||
pld [SRC, #40] | ||
|
||
ldrd r2, r3, [SRC, #8] | ||
strd r2, r3, [DST, #8] | ||
ldrd r2, r3, [SRC, #16] | ||
strd r2, r3, [DST, #16] | ||
ldrd r2, r3, [SRC, #24] | ||
strd r2, r3, [DST, #24] | ||
ldrd r2, r3, [SRC, #32] | ||
strd r2, r3, [DST, #32] | ||
|
||
pld [SRC, #72] | ||
|
||
ldrd r2, r3, [SRC, #40] | ||
strd r2, r3, [DST, #40] | ||
ldrd r2, r3, [SRC, #48] | ||
strd r2, r3, [DST, #48] | ||
ldrd r2, r3, [SRC, #56] | ||
strd r2, r3, [DST, #56] | ||
ldrd r2, r3, [SRC, #64]! | ||
strd r2, r3, [DST, #64]! | ||
subs LEN, #64 | ||
bhs .LblkCopy64 | ||
|
||
ands LEN, #63 /* make LEN positive again */ | ||
beq .Lreturn /* LEN = 0 */ | ||
|
||
add SRC, #8 | ||
add DST, #8 | ||
|
||
.Ltail63Aligned: | ||
/* copy the tail, 0-63 bytes | ||
* src/dst are 8-byte aligned | ||
*/ | ||
|
||
/* r3 = LEN / 8 */ | ||
movs r3, LEN, lsr #3 | ||
/* LEN = LEN % 8 */ | ||
and LEN, #7 | ||
beq .Ltail63Al0 | ||
|
||
push {r4, r5} | ||
|
||
.Ltail63Al8: | ||
ldrd r4, r5, [SRC], #8 | ||
strd r4, r5, [DST], #8 | ||
subs r3, #1 | ||
bne .Ltail63Al8 | ||
|
||
pop {r4, r5} | ||
|
||
.Ltail63Al0: | ||
/* copied all 8 byte aligned memory, now copy what's left */ | ||
cmp LEN, #0 | ||
beq .Lreturn | ||
|
||
/* 1 <= LEN <= 7 */ | ||
ands r3, LEN, #4 | ||
ldrne r2, [SRC], #4 | ||
strne r2, [DST], #4 | ||
|
||
ands r3, LEN, #2 | ||
ldrhne r2, [SRC], #2 | ||
strhne r2, [DST], #2 | ||
|
||
ands r3, LEN, #1 | ||
ldrbne r2, [SRC], #1 | ||
strbne r2, [DST], #1 | ||
|
||
b .Lreturn | ||
|
||
|
||
.LblkCopy512Aligned: | ||
/* Copy more than 512 bytes, src/dst are 8-byte aligned */ | ||
stmfd sp!, {r4-r7, r10, r11} | ||
|
||
/* Align ld/st to 64 bytes */ | ||
ands r11, SRC, #63 | ||
|
||
sub SRC, #8 | ||
sub DST, #8 | ||
pld [SRC, #8] | ||
|
||
sub LEN, LEN, #128 | ||
|
||
beq 9f | ||
|
||
/* copy leading misalignment */ | ||
8: | ||
ldrd r2, r3, [SRC, #8]! | ||
strd r2, r3, [DST, #8]! | ||
subs r11, #8 | ||
sub LEN, #8 | ||
bne 8b | ||
|
||
pld [SRC, #8] | ||
|
||
.p2align 5 | ||
9: | ||
/* ld/st are cache line aligned */ | ||
pld [SRC, #40] | ||
|
||
ldrd r2, r3, [SRC, #8] | ||
strd r2, r3, [DST, #8] | ||
ldrd r4, r5, [SRC, #16] | ||
strd r4, r5, [DST, #16] | ||
ldrd r6, r7, [SRC, #24] | ||
strd r6, r7, [DST, #24] | ||
ldrd r10, r11, [SRC, #32] | ||
strd r10, r11, [DST, #32] | ||
|
||
pld [SRC, #72] | ||
|
||
ldrd r2, r3, [SRC, #40] | ||
strd r2, r3, [DST, #40] | ||
ldrd r4, r5, [SRC, #48] | ||
strd r4, r5, [DST, #48] | ||
ldrd r6, r7, [SRC, #56] | ||
strd r6, r7, [DST, #56] | ||
ldrd r10, r11, [SRC, #64] | ||
strd r10, r11, [DST, #64] | ||
|
||
pld [SRC, #108] | ||
|
||
ldrd r2, r3, [SRC, #72] | ||
strd r2, r3, [DST, #72] | ||
ldrd r4, r5, [SRC, #80] | ||
strd r4, r5, [DST, #80] | ||
ldrd r6, r7, [SRC, #88] | ||
strd r6, r7, [DST, #88] | ||
ldrd r10, r11, [SRC, #96] | ||
strd r10, r11, [DST, #96] | ||
|
||
pld [SRC, #136] | ||
|
||
ldrd r2, r3, [SRC, #104] | ||
strd r2, r3, [DST, #104] | ||
ldrd r4, r5, [SRC, #112] | ||
strd r4, r5, [DST, #112] | ||
ldrd r6, r7, [SRC, #120] | ||
strd r6, r7, [DST, #120] | ||
ldrd r10, r11, [SRC, #128]! | ||
strd r10, r11, [DST, #128]! | ||
|
||
subs LEN, LEN, #128 | ||
bhs 9b | ||
|
||
ldmfd sp!, {r4-r7, r10, r11} | ||
|
||
and LEN, #127 /* make LEN positive again */ | ||
beq .Lreturn /* LEN = 0 */ | ||
|
||
cmp LEN, #63 | ||
bhi .Ltail127Aligned | ||
|
||
add SRC, #8 | ||
add DST, #8 | ||
b .Ltail63Aligned | ||
|
||
.size memcpy, .-memcpy | ||
.ltorg |
Oops, something went wrong.