From 2ea93a1c2b4688b977a7d01f91aa600a666ec218 Mon Sep 17 00:00:00 2001 From: Will C Date: Wed, 4 Sep 2024 18:12:07 -0400 Subject: [PATCH] Match mem_funcs from cam's work --- asm/MSL/mem_funcs.s | 222 ------------------------------------------ configure.py | 2 +- obj_files.mk | 2 +- src/MSL/mem_funcs.c | 230 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 232 insertions(+), 224 deletions(-) delete mode 100644 asm/MSL/mem_funcs.s diff --git a/asm/MSL/mem_funcs.s b/asm/MSL/mem_funcs.s deleted file mode 100644 index f40c977e79..0000000000 --- a/asm/MSL/mem_funcs.s +++ /dev/null @@ -1,222 +0,0 @@ -.include "macros.inc" - -.section .text # 0x80005940 - 0x803B7240 - -.global __copy_longs_rev_unaligned -__copy_longs_rev_unaligned: -/* 80323A14 003205F4 7D 83 2A 14 */ add r12, r3, r5 -/* 80323A18 003205F8 55 80 07 BF */ clrlwi. r0, r12, 0x1e -/* 80323A1C 003205FC 7D 64 2A 14 */ add r11, r4, r5 -/* 80323A20 00320600 7C 03 03 78 */ mr r3, r0 -/* 80323A24 00320604 41 82 00 18 */ beq .L_80323A3C -/* 80323A28 00320608 7C A3 28 50 */ subf r5, r3, r5 -.L_80323A2C: -/* 80323A2C 0032060C 8C 0B FF FF */ lbzu r0, -1(r11) -/* 80323A30 00320610 34 63 FF FF */ addic. r3, r3, -1 -/* 80323A34 00320614 9C 0C FF FF */ stbu r0, -1(r12) -/* 80323A38 00320618 40 82 FF F4 */ bne .L_80323A2C -.L_80323A3C: -/* 80323A3C 0032061C 55 64 1E F8 */ rlwinm r4, r11, 3, 0x1b, 0x1c -/* 80323A40 00320620 55 69 07 BE */ clrlwi r9, r11, 0x1e -/* 80323A44 00320624 21 44 00 20 */ subfic r10, r4, 0x20 -/* 80323A48 00320628 20 09 00 04 */ subfic r0, r9, 4 -/* 80323A4C 0032062C 7D 6B 02 14 */ add r11, r11, r0 -/* 80323A50 00320630 84 EB FF FC */ lwzu r7, -4(r11) -/* 80323A54 00320634 54 A6 E8 FE */ srwi r6, r5, 3 -.L_80323A58: -/* 80323A58 00320638 81 0B FF FC */ lwz r8, -4(r11) -/* 80323A5C 0032063C 7C E0 54 30 */ srw r0, r7, r10 -/* 80323A60 00320640 34 C6 FF FF */ addic. r6, r6, -1 -/* 80323A64 00320644 7D 03 20 30 */ slw r3, r8, r4 -/* 80323A68 00320648 7C 60 03 78 */ or r0, r3, r0 -/* 80323A6C 0032064C 90 0C FF FC */ stw r0, -4(r12) -/* 80323A70 00320650 7D 00 54 30 */ srw r0, r8, r10 -/* 80323A74 00320654 84 EB FF F8 */ lwzu r7, -8(r11) -/* 80323A78 00320658 7C E3 20 30 */ slw r3, r7, r4 -/* 80323A7C 0032065C 7C 60 03 78 */ or r0, r3, r0 -/* 80323A80 00320660 94 0C FF F8 */ stwu r0, -8(r12) -/* 80323A84 00320664 40 82 FF D4 */ bne .L_80323A58 -/* 80323A88 00320668 54 A0 07 7B */ rlwinm. r0, r5, 0, 0x1d, 0x1d -/* 80323A8C 0032066C 41 82 00 18 */ beq .L_80323AA4 -/* 80323A90 00320670 84 6B FF FC */ lwzu r3, -4(r11) -/* 80323A94 00320674 7C E0 54 30 */ srw r0, r7, r10 -/* 80323A98 00320678 7C 63 20 30 */ slw r3, r3, r4 -/* 80323A9C 0032067C 7C 60 03 78 */ or r0, r3, r0 -/* 80323AA0 00320680 94 0C FF FC */ stwu r0, -4(r12) -.L_80323AA4: -/* 80323AA4 00320684 54 A5 07 BF */ clrlwi. r5, r5, 0x1e -/* 80323AA8 00320688 4D 82 00 20 */ beqlr -/* 80323AAC 0032068C 7D 6B 4A 14 */ add r11, r11, r9 -.L_80323AB0: -/* 80323AB0 00320690 8C 0B FF FF */ lbzu r0, -1(r11) -/* 80323AB4 00320694 34 A5 FF FF */ addic. r5, r5, -1 -/* 80323AB8 00320698 9C 0C FF FF */ stbu r0, -1(r12) -/* 80323ABC 0032069C 40 82 FF F4 */ bne .L_80323AB0 -/* 80323AC0 003206A0 4E 80 00 20 */ blr - -.global __copy_longs_unaligned -__copy_longs_unaligned: -/* 80323AC4 003206A4 7C 03 00 D0 */ neg r0, r3 -/* 80323AC8 003206A8 54 06 07 BF */ clrlwi. r6, r0, 0x1e -/* 80323ACC 003206AC 38 84 FF FF */ addi r4, r4, -1 -/* 80323AD0 003206B0 38 63 FF FF */ addi r3, r3, -1 -/* 80323AD4 003206B4 41 82 00 18 */ beq .L_80323AEC -/* 80323AD8 003206B8 7C A6 28 50 */ subf r5, r6, r5 -.L_80323ADC: -/* 80323ADC 003206BC 8C 04 00 01 */ lbzu r0, 1(r4) -/* 80323AE0 003206C0 34 C6 FF FF */ addic. r6, r6, -1 -/* 80323AE4 003206C4 9C 03 00 01 */ stbu r0, 1(r3) -/* 80323AE8 003206C8 40 82 FF F4 */ bne .L_80323ADC -.L_80323AEC: -/* 80323AEC 003206CC 38 04 00 01 */ addi r0, r4, 1 -/* 80323AF0 003206D0 54 0B 07 BE */ clrlwi r11, r0, 0x1e -/* 80323AF4 003206D4 7C 8B 20 50 */ subf r4, r11, r4 -/* 80323AF8 003206D8 39 04 FF FD */ addi r8, r4, -3 -/* 80323AFC 003206DC 85 28 00 04 */ lwzu r9, 4(r8) -/* 80323B00 003206E0 54 04 1E F8 */ rlwinm r4, r0, 3, 0x1b, 0x1c -/* 80323B04 003206E4 21 84 00 20 */ subfic r12, r4, 0x20 -/* 80323B08 003206E8 38 C3 FF FD */ addi r6, r3, -3 -/* 80323B0C 003206EC 54 A7 E8 FE */ srwi r7, r5, 3 -.L_80323B10: -/* 80323B10 003206F0 81 48 00 04 */ lwz r10, 4(r8) -/* 80323B14 003206F4 7D 23 20 30 */ slw r3, r9, r4 -/* 80323B18 003206F8 34 E7 FF FF */ addic. r7, r7, -1 -/* 80323B1C 003206FC 7D 40 64 30 */ srw r0, r10, r12 -/* 80323B20 00320700 7C 60 03 78 */ or r0, r3, r0 -/* 80323B24 00320704 90 06 00 04 */ stw r0, 4(r6) -/* 80323B28 00320708 7D 43 20 30 */ slw r3, r10, r4 -/* 80323B2C 0032070C 85 28 00 08 */ lwzu r9, 8(r8) -/* 80323B30 00320710 7D 20 64 30 */ srw r0, r9, r12 -/* 80323B34 00320714 7C 60 03 78 */ or r0, r3, r0 -/* 80323B38 00320718 94 06 00 08 */ stwu r0, 8(r6) -/* 80323B3C 0032071C 40 82 FF D4 */ bne .L_80323B10 -/* 80323B40 00320720 54 A0 07 7B */ rlwinm. r0, r5, 0, 0x1d, 0x1d -/* 80323B44 00320724 41 82 00 18 */ beq .L_80323B5C -/* 80323B48 00320728 84 08 00 04 */ lwzu r0, 4(r8) -/* 80323B4C 0032072C 7D 23 20 30 */ slw r3, r9, r4 -/* 80323B50 00320730 7C 00 64 30 */ srw r0, r0, r12 -/* 80323B54 00320734 7C 60 03 78 */ or r0, r3, r0 -/* 80323B58 00320738 94 06 00 04 */ stwu r0, 4(r6) -.L_80323B5C: -/* 80323B5C 0032073C 54 A5 07 BF */ clrlwi. r5, r5, 0x1e -/* 80323B60 00320740 38 88 00 03 */ addi r4, r8, 3 -/* 80323B64 00320744 38 66 00 03 */ addi r3, r6, 3 -/* 80323B68 00320748 4D 82 00 20 */ beqlr -/* 80323B6C 0032074C 20 0B 00 04 */ subfic r0, r11, 4 -/* 80323B70 00320750 7C 80 20 50 */ subf r4, r0, r4 -.L_80323B74: -/* 80323B74 00320754 8C 04 00 01 */ lbzu r0, 1(r4) -/* 80323B78 00320758 34 A5 FF FF */ addic. r5, r5, -1 -/* 80323B7C 0032075C 9C 03 00 01 */ stbu r0, 1(r3) -/* 80323B80 00320760 40 82 FF F4 */ bne .L_80323B74 -/* 80323B84 00320764 4E 80 00 20 */ blr - -.global __copy_longs_rev_aligned -__copy_longs_rev_aligned: -/* 80323B88 00320768 7C C3 2A 14 */ add r6, r3, r5 -/* 80323B8C 0032076C 54 C0 07 BF */ clrlwi. r0, r6, 0x1e -/* 80323B90 00320770 7C 84 2A 14 */ add r4, r4, r5 -/* 80323B94 00320774 7C 03 03 78 */ mr r3, r0 -/* 80323B98 00320778 41 82 00 18 */ beq .L_80323BB0 -/* 80323B9C 0032077C 7C A3 28 50 */ subf r5, r3, r5 -.L_80323BA0: -/* 80323BA0 00320780 8C 04 FF FF */ lbzu r0, -1(r4) -/* 80323BA4 00320784 34 63 FF FF */ addic. r3, r3, -1 -/* 80323BA8 00320788 9C 06 FF FF */ stbu r0, -1(r6) -/* 80323BAC 0032078C 40 82 FF F4 */ bne .L_80323BA0 -.L_80323BB0: -/* 80323BB0 00320790 54 A3 D9 7F */ rlwinm. r3, r5, 0x1b, 5, 0x1f -/* 80323BB4 00320794 41 82 00 4C */ beq .L_80323C00 -.L_80323BB8: -/* 80323BB8 00320798 80 04 FF FC */ lwz r0, -4(r4) -/* 80323BBC 0032079C 34 63 FF FF */ addic. r3, r3, -1 -/* 80323BC0 003207A0 90 06 FF FC */ stw r0, -4(r6) -/* 80323BC4 003207A4 80 04 FF F8 */ lwz r0, -8(r4) -/* 80323BC8 003207A8 90 06 FF F8 */ stw r0, -8(r6) -/* 80323BCC 003207AC 80 04 FF F4 */ lwz r0, -0xc(r4) -/* 80323BD0 003207B0 90 06 FF F4 */ stw r0, -0xc(r6) -/* 80323BD4 003207B4 80 04 FF F0 */ lwz r0, -0x10(r4) -/* 80323BD8 003207B8 90 06 FF F0 */ stw r0, -0x10(r6) -/* 80323BDC 003207BC 80 04 FF EC */ lwz r0, -0x14(r4) -/* 80323BE0 003207C0 90 06 FF EC */ stw r0, -0x14(r6) -/* 80323BE4 003207C4 80 04 FF E8 */ lwz r0, -0x18(r4) -/* 80323BE8 003207C8 90 06 FF E8 */ stw r0, -0x18(r6) -/* 80323BEC 003207CC 80 04 FF E4 */ lwz r0, -0x1c(r4) -/* 80323BF0 003207D0 90 06 FF E4 */ stw r0, -0x1c(r6) -/* 80323BF4 003207D4 84 04 FF E0 */ lwzu r0, -0x20(r4) -/* 80323BF8 003207D8 94 06 FF E0 */ stwu r0, -0x20(r6) -/* 80323BFC 003207DC 40 82 FF BC */ bne .L_80323BB8 -.L_80323C00: -/* 80323C00 003207E0 54 A3 F7 7F */ rlwinm. r3, r5, 0x1e, 0x1d, 0x1f -/* 80323C04 003207E4 41 82 00 14 */ beq .L_80323C18 -.L_80323C08: -/* 80323C08 003207E8 84 04 FF FC */ lwzu r0, -4(r4) -/* 80323C0C 003207EC 34 63 FF FF */ addic. r3, r3, -1 -/* 80323C10 003207F0 94 06 FF FC */ stwu r0, -4(r6) -/* 80323C14 003207F4 40 82 FF F4 */ bne .L_80323C08 -.L_80323C18: -/* 80323C18 003207F8 54 A5 07 BF */ clrlwi. r5, r5, 0x1e -/* 80323C1C 003207FC 4D 82 00 20 */ beqlr -.L_80323C20: -/* 80323C20 00320800 8C 04 FF FF */ lbzu r0, -1(r4) -/* 80323C24 00320804 34 A5 FF FF */ addic. r5, r5, -1 -/* 80323C28 00320808 9C 06 FF FF */ stbu r0, -1(r6) -/* 80323C2C 0032080C 40 82 FF F4 */ bne .L_80323C20 -/* 80323C30 00320810 4E 80 00 20 */ blr - -.global __copy_longs_aligned -__copy_longs_aligned: -/* 80323C34 00320814 7C 03 00 D0 */ neg r0, r3 -/* 80323C38 00320818 54 06 07 BF */ clrlwi. r6, r0, 0x1e -/* 80323C3C 0032081C 38 E4 FF FF */ addi r7, r4, -1 -/* 80323C40 00320820 38 63 FF FF */ addi r3, r3, -1 -/* 80323C44 00320824 41 82 00 18 */ beq .L_80323C5C -/* 80323C48 00320828 7C A6 28 50 */ subf r5, r6, r5 -.L_80323C4C: -/* 80323C4C 0032082C 8C 07 00 01 */ lbzu r0, 1(r7) -/* 80323C50 00320830 34 C6 FF FF */ addic. r6, r6, -1 -/* 80323C54 00320834 9C 03 00 01 */ stbu r0, 1(r3) -/* 80323C58 00320838 40 82 FF F4 */ bne .L_80323C4C -.L_80323C5C: -/* 80323C5C 0032083C 54 A4 D9 7F */ rlwinm. r4, r5, 0x1b, 5, 0x1f -/* 80323C60 00320840 38 C7 FF FD */ addi r6, r7, -3 -/* 80323C64 00320844 38 63 FF FD */ addi r3, r3, -3 -/* 80323C68 00320848 41 82 00 4C */ beq .L_80323CB4 -.L_80323C6C: -/* 80323C6C 0032084C 80 06 00 04 */ lwz r0, 4(r6) -/* 80323C70 00320850 34 84 FF FF */ addic. r4, r4, -1 -/* 80323C74 00320854 90 03 00 04 */ stw r0, 4(r3) -/* 80323C78 00320858 80 06 00 08 */ lwz r0, 8(r6) -/* 80323C7C 0032085C 90 03 00 08 */ stw r0, 8(r3) -/* 80323C80 00320860 80 06 00 0C */ lwz r0, 0xc(r6) -/* 80323C84 00320864 90 03 00 0C */ stw r0, 0xc(r3) -/* 80323C88 00320868 80 06 00 10 */ lwz r0, 0x10(r6) -/* 80323C8C 0032086C 90 03 00 10 */ stw r0, 0x10(r3) -/* 80323C90 00320870 80 06 00 14 */ lwz r0, 0x14(r6) -/* 80323C94 00320874 90 03 00 14 */ stw r0, 0x14(r3) -/* 80323C98 00320878 80 06 00 18 */ lwz r0, 0x18(r6) -/* 80323C9C 0032087C 90 03 00 18 */ stw r0, 0x18(r3) -/* 80323CA0 00320880 80 06 00 1C */ lwz r0, 0x1c(r6) -/* 80323CA4 00320884 90 03 00 1C */ stw r0, 0x1c(r3) -/* 80323CA8 00320888 84 06 00 20 */ lwzu r0, 0x20(r6) -/* 80323CAC 0032088C 94 03 00 20 */ stwu r0, 0x20(r3) -/* 80323CB0 00320890 40 82 FF BC */ bne .L_80323C6C -.L_80323CB4: -/* 80323CB4 00320894 54 A4 F7 7F */ rlwinm. r4, r5, 0x1e, 0x1d, 0x1f -/* 80323CB8 00320898 41 82 00 14 */ beq .L_80323CCC -.L_80323CBC: -/* 80323CBC 0032089C 84 06 00 04 */ lwzu r0, 4(r6) -/* 80323CC0 003208A0 34 84 FF FF */ addic. r4, r4, -1 -/* 80323CC4 003208A4 94 03 00 04 */ stwu r0, 4(r3) -/* 80323CC8 003208A8 40 82 FF F4 */ bne .L_80323CBC -.L_80323CCC: -/* 80323CCC 003208AC 54 A5 07 BF */ clrlwi. r5, r5, 0x1e -/* 80323CD0 003208B0 38 86 00 03 */ addi r4, r6, 3 -/* 80323CD4 003208B4 38 63 00 03 */ addi r3, r3, 3 -/* 80323CD8 003208B8 4D 82 00 20 */ beqlr -.L_80323CDC: -/* 80323CDC 003208BC 8C 04 00 01 */ lbzu r0, 1(r4) -/* 80323CE0 003208C0 34 A5 FF FF */ addic. r5, r5, -1 -/* 80323CE4 003208C4 9C 03 00 01 */ stbu r0, 1(r3) -/* 80323CE8 003208C8 40 82 FF F4 */ bne .L_80323CDC -/* 80323CEC 003208CC 4E 80 00 20 */ blr diff --git a/configure.py b/configure.py index 8f3869c900..f3089cf66d 100755 --- a/configure.py +++ b/configure.py @@ -1176,7 +1176,7 @@ def RuntimeLib(lib_name: str, objects: Objects) -> LibDict: Object(Matching, "MSL/ctype.c"), Object(NonMatching, "MSL/direct_io.c"), Object(Matching, "MSL/cstring.c"), - Object(NonMatching, "MSL/mem_funcs.c"), + Object(Matching, "MSL/mem_funcs.c"), Object(NonMatching, "MSL/printf.c"), Object(Matching, "MSL/rand.c"), Object(Matching, "MSL/string.c"), diff --git a/obj_files.mk b/obj_files.mk index 9521cd1404..576136190c 100644 --- a/obj_files.mk +++ b/obj_files.mk @@ -785,7 +785,7 @@ TEXT_O_FILES +=\ $(BUILD_DIR)/src/MSL/ctype.c.o\ $(BUILD_DIR)/asm/MSL/direct_io.s.o\ $(BUILD_DIR)/src/MSL/cstring.c.o\ - $(BUILD_DIR)/asm/MSL/mem_funcs.s.o\ + $(BUILD_DIR)/src/MSL/mem_funcs.c.o\ $(BUILD_DIR)/asm/MSL/printf.s.o\ $(BUILD_DIR)/src/MSL/rand.c.o\ $(BUILD_DIR)/src/MSL/string.c.o\ diff --git a/src/MSL/mem_funcs.c b/src/MSL/mem_funcs.c index 8b13789179..128367f90c 100644 --- a/src/MSL/mem_funcs.c +++ b/src/MSL/mem_funcs.c @@ -1 +1,231 @@ +#include "mem_funcs.h" + +#pragma ANSI_strict off +#pragma defer_codegen on + +#define cps ((unsigned char*) src) +#define cpd ((unsigned char*) dst) +#define lps ((unsigned long*) src) +#define lpd ((unsigned long*) dst) +#define deref_auto_inc(p) *++(p) + +void __copy_longs_aligned(void* dst, const void* src, size_t n) +{ + unsigned long i; + + i = (-(unsigned long) dst) & 3; + + cps = ((unsigned char*) src) - 1; + cpd = ((unsigned char*) dst) - 1; + + if (i) { + n -= i; + + do { + deref_auto_inc(cpd) = deref_auto_inc(cps); + } while (--i); + } + + src = ((unsigned long*) (cps + 1)) - 1; + dst = ((unsigned long*) (cpd + 1)) - 1; + + i = n >> 5; + + if (i) { + do { + deref_auto_inc(lpd) = deref_auto_inc(lps); + deref_auto_inc(lpd) = deref_auto_inc(lps); + deref_auto_inc(lpd) = deref_auto_inc(lps); + deref_auto_inc(lpd) = deref_auto_inc(lps); + deref_auto_inc(lpd) = deref_auto_inc(lps); + deref_auto_inc(lpd) = deref_auto_inc(lps); + deref_auto_inc(lpd) = deref_auto_inc(lps); + deref_auto_inc(lpd) = deref_auto_inc(lps); + } while (--i); + } + + i = (n & 31) >> 2; + + if (i) { + do { + deref_auto_inc(lpd) = deref_auto_inc(lps); + } while (--i); + } + + cps = ((unsigned char*) (lps + 1)) - 1; + cpd = ((unsigned char*) (lpd + 1)) - 1; + + n &= 3; + + if (n) { + do { + deref_auto_inc(cpd) = deref_auto_inc(cps); + } while (--n); + } + + return; +} + +void __copy_longs_rev_aligned(void* dst, const void* src, size_t n) +{ + unsigned long i; + + cps = ((unsigned char*) src) + n; + cpd = ((unsigned char*) dst) + n; + + i = ((unsigned long) cpd) & 3; + + if (i) { + n -= i; + + do { + *--cpd = *--cps; + } while (--i); + } + + i = n >> 5; + + if (i) { + do { + *--lpd = *--lps; + *--lpd = *--lps; + *--lpd = *--lps; + *--lpd = *--lps; + *--lpd = *--lps; + *--lpd = *--lps; + *--lpd = *--lps; + *--lpd = *--lps; + } while (--i); + } + + i = (n & 31) >> 2; + + if (i) { + do { + *--lpd = *--lps; + } while (--i); + } + + n &= 3; + + if (n) { + do { + *--cpd = *--cps; + } while (--n); + } + + return; +} + +void __copy_longs_unaligned(void* dst, const void* src, size_t n) +{ + unsigned long i, v1, v2; + unsigned int src_offset, left_shift, right_shift; + + i = (-(unsigned long) dst) & 3; + + cps = ((unsigned char*) src) - 1; + cpd = ((unsigned char*) dst) - 1; + + if (i) { + n -= i; + + do { + deref_auto_inc(cpd) = deref_auto_inc(cps); + } while (--i); + } + + src_offset = ((unsigned int) (cps + 1)) & 3; + + left_shift = src_offset << 3; + right_shift = 32 - left_shift; + + cps -= src_offset; + + lps = ((unsigned long*) (cps + 1)) - 1; + lpd = ((unsigned long*) (cpd + 1)) - 1; + + i = n >> 3; + + v1 = deref_auto_inc(lps); + + do { + v2 = deref_auto_inc(lps); + deref_auto_inc(lpd) = (v1 << left_shift) | (v2 >> right_shift); + v1 = deref_auto_inc(lps); + deref_auto_inc(lpd) = (v2 << left_shift) | (v1 >> right_shift); + } while (--i); + + if (n & 4) { + v2 = deref_auto_inc(lps); + deref_auto_inc(lpd) = (v1 << left_shift) | (v2 >> right_shift); + } + + cps = ((unsigned char*) (lps + 1)) - 1; + cpd = ((unsigned char*) (lpd + 1)) - 1; + + n &= 3; + + if (n) { + cps -= 4 - src_offset; + do { + deref_auto_inc(cpd) = deref_auto_inc(cps); + } while (--n); + } + + return; +} + +void __copy_longs_rev_unaligned(void* dst, const void* src, size_t n) +{ + unsigned long i, v1, v2; + unsigned int src_offset, left_shift, right_shift; + + cps = ((unsigned char*) src) + n; + cpd = ((unsigned char*) dst) + n; + + i = ((unsigned long) cpd) & 3; + + if (i) { + n -= i; + + do { + *--cpd = *--cps; + } while (--i); + } + + src_offset = ((unsigned int) cps) & 3; + + left_shift = src_offset << 3; + right_shift = 32 - left_shift; + + cps += 4 - src_offset; + + i = n >> 3; + + v1 = *--lps; + + do { + v2 = *--lps; + *--lpd = (v2 << left_shift) | (v1 >> right_shift); + v1 = *--lps; + *--lpd = (v1 << left_shift) | (v2 >> right_shift); + } while (--i); + + if (n & 4) { + v2 = *--lps; + *--lpd = (v2 << left_shift) | (v1 >> right_shift); + } + + n &= 3; + + if (n) { + cps += src_offset; + do { + *--cpd = *--cps; + } while (--n); + } + + return; +}