From 978b1ec4ce8d354b02427091c55af7f388347c8b Mon Sep 17 00:00:00 2001 From: Giacomo Debidda Date: Fri, 19 Jul 2024 17:45:43 +0200 Subject: [PATCH] Allow to query RISC-V ELF binaries (#23) * add support for RISC-V 64 ELF binaries * add support for RISC-V 32-bit ELF binaries * add hello world example for RISC-V 32 and 64 bit * Add TEST support for flake and simplify if conditions --------- Co-authored-by: Farid Zakaria --- examples/hello-riscv/README.md | 57 ++++++++++++++++++++++++++++++++++ examples/hello-riscv/hello.s | 41 ++++++++++++++++++++++++ flake.nix | 2 ++ sqlelf/elf.py | 15 +++++++-- 4 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 examples/hello-riscv/README.md create mode 100644 examples/hello-riscv/hello.s diff --git a/examples/hello-riscv/README.md b/examples/hello-riscv/README.md new file mode 100644 index 0000000..cf5109e --- /dev/null +++ b/examples/hello-riscv/README.md @@ -0,0 +1,57 @@ +# Hello World in RISC-V + +This example shows how to assemble, link and query a simple Hello World program for RISC-V. + +If you want to run this example, you will need a cross-compiling toolchain like the [RISC-V GNU Compiler Toolchain](https://github.com/riscv-collab/riscv-gnu-toolchain) and [QEMU](https://www.qemu.org/). + +## 32-bit + +Assemble `hello.s` into an object file for the RISC-V 32-bit base integer instruction set (`-march rv32i`), little-endian (`-mlittle-endian`), with an ABI that follows the convention where `int`, `long` and `pointer` types are all 32-bit, with debug symbols included in the object file (`-g`): + +```sh +riscv64-elf-as -march rv32i -mabi ilp32 -mlittle-endian -o hello.o hello.s -g +``` + +Link the object file into a RISC-V 32-bit little-endian executable (`-m elf32lriscv`), with the symbol `_start` as its entry point: + +```sh +riscv64-elf-ld -e _start -m elf32lriscv -o exe --verbose hello.o +``` + +Execute the RISC-V ELF in QEMU: + +```sh +qemu-riscv32 exe +``` + +Double check the disassembly: + +```sh +riscv64-elf-objdump --disassemble exe +``` + +## 64-bit + +Assemble `hello.s` into an object file for the RISC-V 64-bit base integer instruction set (`-march rv64i`), little-endian (`-mlittle-endian`), with an ABI that follows the convention where `long` and `pointer` types are all 64-bit, with debug symbols included in the object file (`-g`): + +```sh +riscv64-elf-as -march rv64i -mabi lp64 -mlittle-endian -o hello.o hello.s -g +``` + +Link the object file into a RISC-V 64-bit little-endian executable (`-m elf64lriscv`), with the symbol `_start` as its entry point: + +```sh +riscv64-elf-ld -e _start -m elf64lriscv -o exe --verbose hello.o +``` + +Execute the RISC-V ELF in QEMU: + +```sh +qemu-riscv64 exe +``` + +Double check the disassembly: + +```sh +riscv64-elf-objdump --disassemble exe +``` diff --git a/examples/hello-riscv/hello.s b/examples/hello-riscv/hello.s new file mode 100644 index 0000000..b0c98c5 --- /dev/null +++ b/examples/hello-riscv/hello.s @@ -0,0 +1,41 @@ +.section .text +.globl _start +.equ STDOUT, 1 # File descriptor 1 is standard output (stdout) +.equ WRITE, 64 # Linux write syscall +.equ EXIT, 93 # Linux exit syscall +.equ EXIT_CODE_SUCCESS, 0 + +_start: + # In C, a list of parameters is passed to the kernel in a certain sequence. + # For the write system call, the parameters are structured as follows: + # ssize_t write(int fd, const void *buf, size_t count) + # The three parameters passed are: + # 1. a file descriptor (e.g. 1 for stdout) + # 2. a pointer to a character buffer (i.e. a string) + # 3. the number of characters in that string to be written. + li a0, STDOUT + la a1, buf_begin + # Load a byte from memory, zero-pad it (to a 64-bit value in RV64), and store + # the unsigned value in the destination register a2. + lbu a2, buf_size + + # Store the system call number in register a7. + li a7, WRITE + # Switch to RISC-V supervisor mode (the Linux kernel runs in this mode) and + # make a request using the value stored in a7 as the system call number. + ecall + + li a0, EXIT_CODE_SUCCESS + li a7, EXIT + ecall + +# The .rodata section of an ELF binary contains constant values. The .rodata +# section is marked as read-only, so these values cannot change at runtime. +.section .rodata + +buf_begin: + .string "Hello World!\n" +buf_size: + # Current address (the .) minus address of buf_begin = length of buffer. + # We store the result in a 8-bit word using the .byte directive. + .byte .-buf_begin diff --git a/flake.nix b/flake.nix index 199848c..900962d 100644 --- a/flake.nix +++ b/flake.nix @@ -29,6 +29,8 @@ with nixpkgsFor.${system}; { default = mkShellNoCC { venvDir = "./.venv"; + # needed for tests + TEST_BINARY = "${coreutils}/bin/ls"; packages = [ python3Packages.pip # This execute some shell code to initialize a venv in $venvDir before diff --git a/sqlelf/elf.py b/sqlelf/elf.py index 44404bc..1d0f51e 100644 --- a/sqlelf/elf.py +++ b/sqlelf/elf.py @@ -225,14 +225,25 @@ def instructions_generator() -> Iterator[dict[str, Any]]: def mode(binary: lief_ext.Binary) -> int: - if binary.header.identity_class == lief.ELF.ELF_CLASS.CLASS64: - return cast(int, capstone.CS_MODE_64) + machine_type = binary.header.machine_type + identity_class = binary.header.identity_class + if machine_type == lief.ELF.ARCH.RISCV: + if identity_class == lief.ELF.ELF_CLASS.CLASS32: + return cast(int, capstone.CS_MODE_RISCV32) + if machine_type == lief.ELF.ARCH.RISCV: + if identity_class == lief.ELF.ELF_CLASS.CLASS64: + return cast(int, capstone.CS_MODE_RISCV64) + if machine_type == lief.ELF.ARCH.x86_64: + if identity_class == lief.ELF.ELF_CLASS.CLASS64: + return cast(int, capstone.CS_MODE_64) raise RuntimeError(f"Unknown mode for {binary.path}") def arch(binary: lief_ext.Binary) -> int: if binary.header.machine_type == lief.ELF.ARCH.x86_64: return cast(int, capstone.CS_ARCH_X86) + elif binary.header.machine_type == lief.ELF.ARCH.RISCV: + return cast(int, capstone.CS_ARCH_RISCV) raise RuntimeError(f"Unknown machine type for {binary.path}")