From bb1514815d960ce87a7fcb4f4e451a70a01494f6 Mon Sep 17 00:00:00 2001 From: xicilion Date: Wed, 24 Jan 2024 05:41:00 +0800 Subject: [PATCH] exlib, feat: add loong64 context support. --- exlib/src/ctx/common/common-defs.h | 1 + exlib/src/ctx/loong64/ctx/bits.h | 18 +++++ exlib/src/ctx/loong64/defs.h | 94 +++++++++++++++++++++++ exlib/src/ctx/loong64/getcontext.S | 43 +++++++++++ exlib/src/ctx/loong64/makecontext.S | 96 ++++++++++++++++++++++++ exlib/src/ctx/loong64/setcontext.S | 52 +++++++++++++ exlib/src/ctx/loong64/startcontext.S | 21 ++++++ exlib/src/ctx/loong64/swapcontext.S | 108 +++++++++++++++++++++++++++ 8 files changed, 433 insertions(+) create mode 100644 exlib/src/ctx/loong64/ctx/bits.h create mode 100644 exlib/src/ctx/loong64/defs.h create mode 100644 exlib/src/ctx/loong64/getcontext.S create mode 100644 exlib/src/ctx/loong64/makecontext.S create mode 100644 exlib/src/ctx/loong64/setcontext.S create mode 100644 exlib/src/ctx/loong64/startcontext.S create mode 100644 exlib/src/ctx/loong64/swapcontext.S diff --git a/exlib/src/ctx/common/common-defs.h b/exlib/src/ctx/common/common-defs.h index f52a235fa..b98184bf2 100644 --- a/exlib/src/ctx/common/common-defs.h +++ b/exlib/src/ctx/common/common-defs.h @@ -56,3 +56,4 @@ #endif #define REG_OFFSET(__reg) (MCONTEXT_GREGS + ((__reg)*REG_SZ)) +#define FP_REG_OFFSET(__reg) (MCONTEXT_FPREGS + ((__reg)*FP_REG_SZ)) diff --git a/exlib/src/ctx/loong64/ctx/bits.h b/exlib/src/ctx/loong64/ctx/bits.h new file mode 100644 index 000000000..367fe65ee --- /dev/null +++ b/exlib/src/ctx/loong64/ctx/bits.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +typedef struct { + unsigned long long __pc; + unsigned long long __gregs[32]; + unsigned int __flags; + unsigned long long __fpregs[8] __attribute__((__aligned__(16))); +} ctx_mcontext_t; + +typedef struct ctx_ucontext { + unsigned long uc_flags; + struct ctx_ucontext* uc_link; + stack_t uc_stack; + ctx_mcontext_t uc_mcontext; +} ctx_ucontext_t; diff --git a/exlib/src/ctx/loong64/defs.h b/exlib/src/ctx/loong64/defs.h new file mode 100644 index 000000000..5954b108f --- /dev/null +++ b/exlib/src/ctx/loong64/defs.h @@ -0,0 +1,94 @@ +#ifndef __ARCH_LOONGARCH64_DEFS_H +#define __ARCH_LOONGARCH64_DEFS_H + +#define REG_SZ (8) +#define FP_REG_SZ (8) + +#define REG_R0 (0) +#define REG_R1 (1) +#define REG_R2 (2) +#define REG_R3 (3) +#define REG_R4 (4) +#define REG_R5 (5) +#define REG_R6 (6) +#define REG_R7 (7) +#define REG_R8 (8) +#define REG_R9 (9) +#define REG_R10 (10) +#define REG_R11 (11) +#define REG_R12 (12) +#define REG_R13 (13) +#define REG_R14 (14) +#define REG_R15 (15) +#define REG_R16 (16) +#define REG_R17 (17) +#define REG_R18 (18) +#define REG_R19 (19) +#define REG_R20 (20) +#define REG_R21 (21) +#define REG_R22 (22) +#define REG_R23 (23) +#define REG_R24 (24) +#define REG_R25 (25) +#define REG_R26 (26) +#define REG_R27 (27) +#define REG_R28 (28) +#define REG_R29 (29) +#define REG_R30 (30) +#define REG_R31 (31) + +/* $a0 is $4 , also $v0, same as $5, $a1 and $v1*/ +#define REG_A0 (4) + +/* stack pointer is actually $3 */ +#define REG_SP (3) + +/* frame pointer is actually $22 */ +#define REG_FP (22) + +/* offset to __gregs in ucontext_t */ +#define MCONTEXT_GREGS (48) + +/* offset to __fpregs in ucontext_t */ +#define MCONTEXT_FPREGS (312) + +/* offset to PC in ucontext_t */ +#define MCONTEXT_PC (40) + +/* offset to uc_link in ucontext_t */ +#define UCONTEXT_UC_LINK (8) + +/* offset to uc_stack.ss_sp in ucontext_t */ +#define UCONTEXT_STACK_PTR (16) + +/* offset to uc_stack.ss_size in ucontext_t */ +#define UCONTEXT_STACK_SIZE (32) + +/* offset to uc_sigmask in ucontext_t */ +#define UCONTEXT_SIGMASK (40) + +/* Stack alignment, from Kernel source */ +#define ALSZ 15 +#define ALMASK ~15 +#define FRAMESZ (((LOCALSZ * REG_SZ) + ALSZ) & ALMASK) + +#define PUSH_FRAME(__proc) \ + addi.d $sp, $sp, -FRAMESZ; + +#define POP_FRAME(__proc) \ + addi.d $sp, $sp, FRAMESZ; + +#define _NSIG8 (4) + +#define SIG_NOP 0 /* 0 is unused to catch errors */ +#define SIG_BLOCK 1 /* Block signals. */ +#define SIG_UNBLOCK 2 /* Unblock signals. */ +#define SIG_SETMASK 3 /* Set the set of blocked signals. */ + +#define __NR_rt_sigprocmask 135 + +#define SYS_ify(syscall_name) __NR_##syscall_name + +#include + +#endif diff --git a/exlib/src/ctx/loong64/getcontext.S b/exlib/src/ctx/loong64/getcontext.S new file mode 100644 index 000000000..3e5601c25 --- /dev/null +++ b/exlib/src/ctx/loong64/getcontext.S @@ -0,0 +1,43 @@ +#define LOCALSZ (1) + +#include "defs.h" + +ALIAS(getcontext, ctx_getcontext) + +FUNC(ctx_getcontext) + /* copy $sp, $fp to temporary registers so we don't clobber them */ + move $a2, $sp + move $a3, $fp + + PUSH_FRAME(ctx_getcontext) + + /* set registers */ + st.d $s0, $a0, REG_OFFSET(23) + st.d $s1, $a0, REG_OFFSET(24) + st.d $s2, $a0, REG_OFFSET(25) + st.d $s3, $a0, REG_OFFSET(26) + st.d $s4, $a0, REG_OFFSET(27) + st.d $s5, $a0, REG_OFFSET(28) + st.d $s6, $a0, REG_OFFSET(29) + st.d $s7, $a0, REG_OFFSET(30) + st.d $s8, $a0, REG_OFFSET(31) + + st.d $a2, $a0, REG_OFFSET(3) + st.d $a3, $a0, REG_OFFSET(22) + st.d $ra, $a0, REG_OFFSET(1) + + st.d $ra, $a0, (MCONTEXT_PC) + + fst.d $fs0, $a0, FP_REG_OFFSET(0) + fst.d $fs1, $a0, FP_REG_OFFSET(1) + fst.d $fs2, $a0, FP_REG_OFFSET(2) + fst.d $fs3, $a0, FP_REG_OFFSET(3) + fst.d $fs4, $a0, FP_REG_OFFSET(4) + fst.d $fs5, $a0, FP_REG_OFFSET(5) + fst.d $fs6, $a0, FP_REG_OFFSET(6) + fst.d $fs7, $a0, FP_REG_OFFSET(7) + + POP_FRAME(ctx_getcontext) + + jr $ra +END(ctx_getcontext) diff --git a/exlib/src/ctx/loong64/makecontext.S b/exlib/src/ctx/loong64/makecontext.S new file mode 100644 index 000000000..5602af14a --- /dev/null +++ b/exlib/src/ctx/loong64/makecontext.S @@ -0,0 +1,96 @@ +#include "defs.h" + +#define LOCALSZ (6) + +#define A3_OFF (FRAMESZ - (5 * REG_SZ)) +#define A4_OFF (FRAMESZ - (4 * REG_SZ)) +#define A5_OFF (FRAMESZ - (3 * REG_SZ)) +#define A6_OFF (FRAMESZ - (2 * REG_SZ)) +#define A7_OFF (FRAMESZ - (1 * REG_SZ)) + +ALIAS(makecontext, ctx_makecontext) + +FUNC(ctx_makecontext) + PUSH_FRAME(ctx_makecontext) + + move $t5, $a0 + move $t4, $a1 + + /* store $a3 through $a7 to the stack frame. */ + st.d $a3, $sp, A3_OFF + st.d $a4, $sp, A4_OFF + st.d $a5, $sp, A5_OFF + st.d $a6, $sp, A6_OFF + st.d $a7, $sp, A7_OFF + + /* set $zero in the mcontext to 1. */ + addi.d $v0, $zero, 1 + st.d $v0, $t5, REG_OFFSET(0) + + /* ensure the stack is aligned on a quad-word boundary. */ + ld.d $t0, $t5, UCONTEXT_STACK_PTR + ld.d $t2, $t5, UCONTEXT_STACK_SIZE + /* the third argument(from zero), that's the first argument of func() */ + addi.d $t1, $sp, A3_OFF + add.d $t0, $t0, $t2 + + addi.d $t7, $zero, ALMASK + and $t0, $t0, $t7 + + /* number of args */ + beq $a2, $zero, no_more_arguments + bltu $a2, $zero, no_more_arguments + + /* store register arguments. */ + addi.d $t2, $t5, MCONTEXT_GREGS + (4 * REG_SZ) + move $t3, $zero + +store_register_arg: + addi.d $t3, $t3, 1 + ld.d $v1, $t1, 0 + addi.d $t1, $t1, REG_SZ + st.d $v1, $t2, 0 + addi.d $t2, $t2, REG_SZ + addi.d $t6, $zero, 8 + bltu $t3, $t6, store_register_arg + bgeu $t3, $a2, no_more_arguments + + /* make room for stack arguments. */ + sub.d $t2, $a2, $t3 + + addi.d $t6, $zero, 3 + sll.d $t2, $t2, $t6 + + sub.d $t0, $t0, $t2 + + addi.d $t6, $zero, ALMASK + and $t0, $t0, $t6 + + /* store stack arguments. */ + move $t2, $t0 + +store_stack_arg: + addi.d $t3, $t3, 1 + ld.d $v1, $t1, 0 + addi.d $t1, $t1, REG_SZ + st.d $v1, $t2, 0 + addi.d $t2, $t2, REG_SZ + bltu $t3, $a2, store_stack_arg + +no_more_arguments: + /* trampoline setup. */ + la.got $t8, ctx_trampoline + + ld.d $v1, $t5, UCONTEXT_UC_LINK + st.d $v1, $t5, REG_OFFSET(23) + + st.d $t0, $t5, REG_OFFSET(3) + + st.d $t8, $t5, REG_OFFSET(1) + + st.d $t4, $t5, MCONTEXT_PC + + POP_FRAME(ctx_makecontext) + + jr $ra +END(ctx_makecontext) diff --git a/exlib/src/ctx/loong64/setcontext.S b/exlib/src/ctx/loong64/setcontext.S new file mode 100644 index 000000000..16f65b25f --- /dev/null +++ b/exlib/src/ctx/loong64/setcontext.S @@ -0,0 +1,52 @@ +#define LOCALSZ (1) + +#include "defs.h" + +ALIAS(setcontext, ctx_setcontext) + +FUNC(ctx_setcontext) + PUSH_FRAME(ctx_setcontext) + + /* move the context to $v0, in LA, $v0 = $a0 = $4 */ + move $t5, $a0 + + fld.d $fs0, $t5, FP_REG_OFFSET(24) + fld.d $fs1, $t5, FP_REG_OFFSET(25) + fld.d $fs2, $t5, FP_REG_OFFSET(26) + fld.d $fs3, $t5, FP_REG_OFFSET(27) + fld.d $fs4, $t5, FP_REG_OFFSET(28) + fld.d $fs5, $t5, FP_REG_OFFSET(29) + fld.d $fs6, $t5, FP_REG_OFFSET(30) + fld.d $fs7, $t5, FP_REG_OFFSET(31) + + /* load the registers */ + ld.d $a0, $t5, REG_OFFSET(4) + ld.d $a1, $t5, REG_OFFSET(5) + ld.d $a2, $t5, REG_OFFSET(6) + ld.d $a3, $t5, REG_OFFSET(7) + ld.d $a4, $t5, REG_OFFSET(8) + ld.d $a5, $t5, REG_OFFSET(9) + ld.d $a6, $t5, REG_OFFSET(10) + ld.d $a7, $t5, REG_OFFSET(11) + + ld.d $s0, $t5, REG_OFFSET(23) + ld.d $s1, $t5, REG_OFFSET(24) + ld.d $s2, $t5, REG_OFFSET(25) + ld.d $s3, $t5, REG_OFFSET(26) + ld.d $s4, $t5, REG_OFFSET(27) + ld.d $s5, $t5, REG_OFFSET(28) + ld.d $s6, $t5, REG_OFFSET(29) + ld.d $s7, $t5, REG_OFFSET(30) + ld.d $s8, $t5, REG_OFFSET(31) + + ld.d $sp, $t5, REG_OFFSET(3) + ld.d $fp, $t5, REG_OFFSET(22) + ld.d $ra, $t5, REG_OFFSET(1) + + ld.d $t8, $t5, (MCONTEXT_PC) + + jr $t8 + move $v0, $zero + + POP_FRAME(ctx_setcontext) +END(ctx_setcontext) diff --git a/exlib/src/ctx/loong64/startcontext.S b/exlib/src/ctx/loong64/startcontext.S new file mode 100644 index 000000000..aa4fe6e6c --- /dev/null +++ b/exlib/src/ctx/loong64/startcontext.S @@ -0,0 +1,21 @@ +#define LOCALSZ (4) + +#include "defs.h" + +FUNC(ctx_trampoline) + + /* call setcontext */ + move $a0, $s0 + /* we receive our initial ucontext in $s0, so if $s0 is nil, bail */ + beqz $s0, no_linked_context + + la.got $t8, ctx_setcontext + + jr $t8 + +no_linked_context: + move $a0, $zero + la.global $t8, exit + jr $t8 + +END(ctx_trampoline) diff --git a/exlib/src/ctx/loong64/swapcontext.S b/exlib/src/ctx/loong64/swapcontext.S new file mode 100644 index 000000000..8ebbf6f98 --- /dev/null +++ b/exlib/src/ctx/loong64/swapcontext.S @@ -0,0 +1,108 @@ +#define LOCALSZ (4) + +#include "defs.h" + +#define A1_OFFSET (FRAMESZ - (1 * REG_SZ)) + +ALIAS(swapcontext, ctx_swapcontext) + +FUNC(ctx_swapcontext) + /* copy $sp, $fp to temporary registers so we don't clobber them */ + move $a3, $sp + move $a4, $fp + + move $t5, $a0 + + PUSH_FRAME(ctx_swapcontext) + + /* set registers */ + st.d $a0, $t5, REG_OFFSET(4) + st.d $a1, $t5, REG_OFFSET(5) + st.d $a2, $t5, REG_OFFSET(6) + st.d $a3, $t5, REG_OFFSET(7) + st.d $a4, $t5, REG_OFFSET(8) + st.d $a5, $t5, REG_OFFSET(9) + st.d $a6, $t5, REG_OFFSET(10) + st.d $a7, $t5, REG_OFFSET(11) + + st.d $x, $t5, REG_OFFSET(21) + + st.d $s0, $t5, REG_OFFSET(23) + st.d $s1, $t5, REG_OFFSET(24) + st.d $s2, $t5, REG_OFFSET(25) + st.d $s3, $t5, REG_OFFSET(26) + st.d $s4, $t5, REG_OFFSET(27) + st.d $s5, $t5, REG_OFFSET(28) + st.d $s6, $t5, REG_OFFSET(29) + st.d $s7, $t5, REG_OFFSET(30) + st.d $s8, $t5, REG_OFFSET(31) + + st.d $a3, $t5, REG_OFFSET(3) + st.d $a4, $t5, REG_OFFSET(22) + st.d $ra, $t5, REG_OFFSET(1) + + st.d $ra, $t5, (MCONTEXT_PC) + + fst.d $fs0, $t5, FP_REG_OFFSET(0) + fst.d $fs1, $t5, FP_REG_OFFSET(1) + fst.d $fs2, $t5, FP_REG_OFFSET(2) + fst.d $fs3, $t5, FP_REG_OFFSET(3) + fst.d $fs4, $t5, FP_REG_OFFSET(4) + fst.d $fs5, $t5, FP_REG_OFFSET(5) + fst.d $fs6, $t5, FP_REG_OFFSET(6) + fst.d $fs7, $t5, FP_REG_OFFSET(7) + + /* copy new context address in $a1 to stack */ + st.d $a1, $sp, A1_OFFSET + + /* load new context address into $v0 */ + ld.d $t4, $sp, A1_OFFSET + + fld.d $fs0, $t4, FP_REG_OFFSET(0) + fld.d $fs1, $t4, FP_REG_OFFSET(1) + fld.d $fs2, $t4, FP_REG_OFFSET(2) + fld.d $fs3, $t4, FP_REG_OFFSET(3) + fld.d $fs4, $t4, FP_REG_OFFSET(4) + fld.d $fs5, $t4, FP_REG_OFFSET(5) + fld.d $fs6, $t4, FP_REG_OFFSET(6) + fld.d $fs7, $t4, FP_REG_OFFSET(7) + + /* load the registers */ + ld.d $a0, $t4, REG_OFFSET(4) + ld.d $a1, $t4, REG_OFFSET(5) + ld.d $a2, $t4, REG_OFFSET(6) + ld.d $a3, $t4, REG_OFFSET(7) + ld.d $a4, $t4, REG_OFFSET(8) + ld.d $a5, $t4, REG_OFFSET(9) + ld.d $a6, $t4, REG_OFFSET(10) + ld.d $a7, $t4, REG_OFFSET(11) + + ld.d $x, $t4, REG_OFFSET(21) + + ld.d $s0, $t4, REG_OFFSET(23) + ld.d $s1, $t4, REG_OFFSET(24) + ld.d $s2, $t4, REG_OFFSET(25) + ld.d $s3, $t4, REG_OFFSET(26) + ld.d $s4, $t4, REG_OFFSET(27) + ld.d $s5, $t4, REG_OFFSET(28) + ld.d $s6, $t4, REG_OFFSET(29) + ld.d $s7, $t4, REG_OFFSET(30) + ld.d $s8, $t4, REG_OFFSET(31) + + ld.d $sp, $t4, REG_OFFSET(3) + ld.d $fp, $t4, REG_OFFSET(22) + ld.d $ra, $t4, REG_OFFSET(1) + + ld.d $t8, $t4, (MCONTEXT_PC) + + jr $t8 + move $v0, $zero + +fail: + la.global $t8, exit + + POP_FRAME(ctx_swapcontext) + + jirl $ra, $t8, 0 + move $v0, $zero +END(ctx_swapcontext)