From 4943880c084c4de25e8e7341a13ded65849c0ec1 Mon Sep 17 00:00:00 2001 From: Peter Kasting Date: Tue, 23 Aug 2022 15:40:54 +0000 Subject: [PATCH 1/2] Fix yet more signedness conversion warnings. I'd do the _nsec fields too, but I can't find any official type alias for them. Reviewed-on: https://chromium-review.googlesource.com/c/linux-syscall-support/+/3851022 git-subtree-dir: third_party/lss git-subtree-split: ce877209e11aa69dcfffbd53ef90ea1d07136521 --- .gitignore | 3 + DIR_METADATA | 12 + LICENSE | 28 + OWNERS | 2 + README.md | 138 + codereview.settings | 5 + linux_syscall_support.h | 5334 +++++++++++++++++++++++++++++++++++++++ tests/.gitignore | 4 + tests/Makefile | 147 ++ tests/README.md | 52 + tests/fallocate.c | 69 + tests/getitimer.c | 84 + tests/getrandom.c | 59 + tests/lstat.c | 97 + tests/setitimer.c | 90 + tests/sigaction.c | 54 + tests/sigtimedwait.c | 58 + tests/stat.c | 67 + tests/test_skel.h | 89 + tests/unlink.c | 48 + 20 files changed, 6440 insertions(+) create mode 100644 .gitignore create mode 100644 DIR_METADATA create mode 100644 LICENSE create mode 100644 OWNERS create mode 100644 README.md create mode 100644 codereview.settings create mode 100644 linux_syscall_support.h create mode 100644 tests/.gitignore create mode 100644 tests/Makefile create mode 100644 tests/README.md create mode 100644 tests/fallocate.c create mode 100644 tests/getitimer.c create mode 100644 tests/getrandom.c create mode 100644 tests/lstat.c create mode 100644 tests/setitimer.c create mode 100644 tests/sigaction.c create mode 100644 tests/sigtimedwait.c create mode 100644 tests/stat.c create mode 100644 tests/test_skel.h create mode 100644 tests/unlink.c diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000000..4032eb05cb9c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.o + +core diff --git a/DIR_METADATA b/DIR_METADATA new file mode 100644 index 000000000000..eccfd35fab8f --- /dev/null +++ b/DIR_METADATA @@ -0,0 +1,12 @@ +# Metadata information for this directory. +# +# For more information on DIR_METADATA files, see: +# https://source.chromium.org/chromium/infra/infra/+/HEAD:go/src/infra/tools/dirmd/README.md +# +# For the schema of this file, see Metadata message: +# https://source.chromium.org/chromium/infra/infra/+/HEAD:go/src/infra/tools/dirmd/proto/dir_metadata.proto + +os: LINUX +monorail { + project: "linux-syscall-support" +} diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000000..58ab3fba9d4c --- /dev/null +++ b/LICENSE @@ -0,0 +1,28 @@ +Copyright (c) 2005-2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/OWNERS b/OWNERS new file mode 100644 index 000000000000..29b02fa1143e --- /dev/null +++ b/OWNERS @@ -0,0 +1,2 @@ +mseaborn@chromium.org +vapier@chromium.org diff --git a/README.md b/README.md new file mode 100644 index 000000000000..e25abdd218fe --- /dev/null +++ b/README.md @@ -0,0 +1,138 @@ +# Linux Syscall Support (LSS) + +Every so often, projects need to directly embed Linux system calls instead of +calling the implementations in the system runtime library. + +This project provides a header file that can be included into your application +whenever you need to make direct system calls. + +The goal is to provide an API that generally mirrors the standard C library +while still making direct syscalls. We try to hide some of the differences +between arches when reasonably feasible. e.g. Newer architectures no longer +provide an `open` syscall, but do provide `openat`. We will still expose a +`sys_open` helper by default that calls into `openat` instead. + +We explicitly do not expose the raw syscall ABI including all of its historical +warts to the user. We want people to be able to easily make a syscall, not have +to worry that on some arches size args are swapped or they are shifted. + +Please be sure to review the Caveats section below however. + +## How to include linux\_syscall\_support.h in your project + +You can either copy the file into your project, or preferably, you can set up +Git submodules to automatically pull from our source repository. + +## Supported targets + +The following architectures/ABIs have been tested (at some point) and should +generally work. If you don't see your combo listed here, please double check +the header itself as this list might be out of date. + +* x86 32-bit (i.e. i386, i486, i586, i686, Intel, AMD, etc...) +* [x86_64 64-bit](https://en.wikipedia.org/wiki/X86-64) (i.e. x86-64, amd64, etc...) +* [x32 32-bit](https://sites.google.com/site/x32abi/) +* [ARM 32-bit](https://en.wikipedia.org/wiki/ARM_architecture) OABI +* [ARM 32-bit](https://en.wikipedia.org/wiki/ARM_architecture) EABI (i.e. armv6, armv7, etc...) +* AARCH64 64-bit (i.e. arm64, armv8, etc...) +* PowerPC 32-bit (i.e. ppc, ppc32, etc...) +* MIPS 32-bit o32 ABI +* MIPS 32-bit n32 ABI +* MIPS 64-bit n64 ABI +* LOONGARCH 64-bit ABI + +## API + +By default, you can just add a `sys_` prefix to any function you want to call. +So if you want to call `open(...)`, use `sys_open(...)` instead. + +### Knobs + +The linux\_syscall\_support.h header provides many knobs for you to control +the exported API. These are all documented in the top of the header in a big +comment block, so refer to that instead. + +## Caveats + +### ABI differences + +Some functions that the standard C library exposes use a different ABI than +what the Linux kernel uses. Care must be taken when making syscalls directly +that you use the right structure and flags. e.g. Most C libraries define a +`struct stat` (commonly in `sys/stat.h` or `bits/stat.h`) that is different +from the `struct stat` the kernel uses (commonly in `asm/stat.h`). If you use +the wrong structure layout, then you can see errors like memory corruption or +weird/shifted values. If you plan on making syscalls directly, you should +focus on headers that are available under the `linux/` and `asm/` namespaces. + +Note: LSS provides structs for most of these cases. For `sys_stat()`, it +provides `struct kernel_stat` for you to use. + +### Transparent backwards compatibility with older kernels + +While some C libraries (notably, glibc) take care to fallback to older syscalls +when running on older kernels, there is no such support in LSS. If you plan on +trying to run on older kernels, you will need to handle errors yourself (e.g. +`ENOSYS` when using a too new syscall). + +Remember that this can happen with new flag bits too. e.g. The `O_CLOEXEC` +flag was added to many syscalls, but if you try to run use it on older kernels, +it will fail with `EINVAL`. In that case, you must handle the fallback logic +yourself. + +### Variable arguments (varargs) + +We do not support vararg type functions. e.g. While the standard `open()` +function can accept 2 or 3 arguments (with the mode field being optional), +the `sys_open()` function always requires 3 arguments. + +## Bug reports & feature requests + +If you wish to report a problem or request a feature, please file them in our +[bug tracker](https://bugs.chromium.org/p/linux-syscall-support/issues/). + +Please do not post patches to the tracker. Instead, see below for how to send +patches to us directly. + +While we welcome feature requests, please keep in mind that it is unlikely that +anyone will find time to implement them for you. Sending patches is strongly +preferred and will often move things much faster. + +## Projects that use LSS + +* [Chromium](https://www.chromium.org/) +* [Breakpad](https://chromium.googlesource.com/breakpad/breakpad) +* [Native Client](https://developer.chrome.com/native-client), in nacl\_bootstrap.c + +## How to get an LSS change committed + +### Review + +You get your change reviewed, you can upload it to +[Gerrit](https://chromium-review.googlesource.com/q/project:linux-syscall-support+status:open) +using `git cl upload` from +[Chromium's depot-tools](https://commondatastorage.googleapis.com/chrome-infra-docs/flat/depot_tools/docs/html/depot_tools_tutorial.html). + +### Testing + +Tests are found in the [tests/](./tests/) subdirectory. It does not (yet) offer +100% coverage, but should grow over time. + +New commits that update/change/add syscall wrappers should include tests for +them too. Consult the [test documentation](./tests/README.md) for more details. + +To run, just run `make` inside the tests directory. It will compile & execute +the tests locally. + +There is some limited cross-compile coverage available if you run `make cross`. +It only compiles things (does not execute at all). + +### Rolling into Chromium + +If you commit a change to LSS, please also commit a Chromium change to update +`lss_revision` in +[Chromium's DEPS](https://chromium.googlesource.com/chromium/src/+/HEAD/DEPS) +file. + +This ensures that the LSS change gets tested, so that people who commit later +LSS changes don't run into problems with updating `lss_revision`. diff --git a/codereview.settings b/codereview.settings new file mode 100644 index 000000000000..b5082e8d307f --- /dev/null +++ b/codereview.settings @@ -0,0 +1,5 @@ +# This file is used by git cl to get repository specific information. +CC_LIST: chromium-reviews@chromium.org,mseaborn@chromium.org +CODE_REVIEW_SERVER: codereview.chromium.org +GERRIT_HOST: True +VIEW_VC: https://chromium.googlesource.com/linux-syscall-support/+/ diff --git a/linux_syscall_support.h b/linux_syscall_support.h new file mode 100644 index 000000000000..99a4b444918d --- /dev/null +++ b/linux_syscall_support.h @@ -0,0 +1,5334 @@ +/* Copyright (c) 2005-2011, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke + */ + +/* This file includes Linux-specific support functions common to the + * coredumper and the thread lister; primarily, this is a collection + * of direct system calls, and a couple of symbols missing from + * standard header files. + * There are a few options that the including file can set to control + * the behavior of this file: + * + * SYS_CPLUSPLUS: + * The entire header file will normally be wrapped in 'extern "C" { }", + * making it suitable for compilation as both C and C++ source. If you + * do not want to do this, you can set the SYS_CPLUSPLUS macro to inhibit + * the wrapping. N.B. doing so will suppress inclusion of all prerequisite + * system header files, too. It is the caller's responsibility to provide + * the necessary definitions. + * + * SYS_ERRNO: + * All system calls will update "errno" unless overriden by setting the + * SYS_ERRNO macro prior to including this file. SYS_ERRNO should be + * an l-value. + * + * SYS_INLINE: + * New symbols will be defined "static inline", unless overridden by + * the SYS_INLINE macro. + * + * SYS_LINUX_SYSCALL_SUPPORT_H + * This macro is used to avoid multiple inclusions of this header file. + * If you need to include this file more than once, make sure to + * unset SYS_LINUX_SYSCALL_SUPPORT_H before each inclusion. + * + * SYS_PREFIX: + * New system calls will have a prefix of "sys_" unless overridden by + * the SYS_PREFIX macro. Valid values for this macro are [0..9] which + * results in prefixes "sys[0..9]_". It is also possible to set this + * macro to -1, which avoids all prefixes. + * + * SYS_SYSCALL_ENTRYPOINT: + * Some applications (such as sandboxes that filter system calls), need + * to be able to run custom-code each time a system call is made. If this + * macro is defined, it expands to the name of a "common" symbol. If + * this symbol is assigned a non-NULL pointer value, it is used as the + * address of the system call entrypoint. + * A pointer to this symbol can be obtained by calling + * get_syscall_entrypoint() + * + * This file defines a few internal symbols that all start with "LSS_". + * Do not access these symbols from outside this file. They are not part + * of the supported API. + */ +#ifndef SYS_LINUX_SYSCALL_SUPPORT_H +#define SYS_LINUX_SYSCALL_SUPPORT_H + +/* We currently only support x86-32, x86-64, ARM, MIPS, PPC, s390 and s390x + * on Linux. + * Porting to other related platforms should not be difficult. + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \ + defined(__mips__) || defined(__PPC__) || defined(__ARM_EABI__) || \ + defined(__aarch64__) || defined(__s390__) || defined(__e2k__) || \ + (defined(__riscv) && __riscv_xlen == 64) || defined(__loongarch_lp64)) \ + && (defined(__linux) || defined(__ANDROID__)) + +#ifndef SYS_CPLUSPLUS +#ifdef __cplusplus +/* Some system header files in older versions of gcc neglect to properly + * handle being included from C++. As it appears to be harmless to have + * multiple nested 'extern "C"' blocks, just add another one here. + */ +extern "C" { +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __mips__ +/* Include definitions of the ABI currently in use. */ +#ifdef __ANDROID__ +/* Android doesn't have sgidefs.h, but does have asm/sgidefs.h, + * which has the definitions we need. + */ +#include +#else +#include +#endif +#endif +#endif + +/* Some libcs, for example Android NDK and musl, #define these + * macros as aliases to their non-64 counterparts. To avoid naming + * conflict, remove them. + * + * These are restored by the corresponding #pragma pop_macro near + * the end of this file. + */ +#pragma push_macro("stat64") +#pragma push_macro("fstat64") +#pragma push_macro("lstat64") +#pragma push_macro("pread64") +#pragma push_macro("pwrite64") +#pragma push_macro("getdents64") +#undef stat64 +#undef fstat64 +#undef lstat64 +#undef pread64 +#undef pwrite64 +#undef getdents64 + +#if defined(__ANDROID__) && defined(__x86_64__) +// A number of x86_64 syscalls are blocked by seccomp on recent Android; +// undefine them so that modern alternatives will be used instead where +// possible. +// The alternative syscalls have been sanity checked against linux-3.4+; +// older versions might not work. +# undef __NR_getdents +# undef __NR_dup2 +# undef __NR_fork +# undef __NR_getpgrp +# undef __NR_open +# undef __NR_poll +# undef __NR_readlink +# undef __NR_stat +# undef __NR_unlink +# undef __NR_pipe +#endif + +#if defined(__ANDROID__) +// waitpid is blocked by seccomp on all architectures on recent Android. +# undef __NR_waitpid +#endif + +/* As glibc often provides subtly incompatible data structures (and implicit + * wrapper functions that convert them), we provide our own kernel data + * structures for use by the system calls. + * These structures have been developed by using Linux 2.6.23 headers for + * reference. Note though, we do not care about exact API compatibility + * with the kernel, and in fact the kernel often does not have a single + * API that works across architectures. Instead, we try to mimic the glibc + * API where reasonable, and only guarantee ABI compatibility with the + * kernel headers. + * Most notably, here are a few changes that were made to the structures + * defined by kernel headers: + * + * - we only define structures, but not symbolic names for kernel data + * types. For the latter, we directly use the native C datatype + * (i.e. "unsigned" instead of "mode_t"). + * - in a few cases, it is possible to define identical structures for + * both 32bit (e.g. i386) and 64bit (e.g. x86-64) platforms by + * standardizing on the 64bit version of the data types. In particular, + * this means that we use "unsigned" where the 32bit headers say + * "unsigned long". + * - overall, we try to minimize the number of cases where we need to + * conditionally define different structures. + * - the "struct kernel_sigaction" class of structures have been + * modified to more closely mimic glibc's API by introducing an + * anonymous union for the function pointer. + * - a small number of field names had to have an underscore appended to + * them, because glibc defines a global macro by the same name. + */ + +/* include/linux/dirent.h */ +struct kernel_dirent64 { + unsigned long long d_ino; + long long d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[256]; +}; + +/* include/linux/dirent.h */ +#if !defined(__NR_getdents) +// when getdents is not available, getdents64 is used for both. +#define kernel_dirent kernel_dirent64 +#else +struct kernel_dirent { + long d_ino; + long d_off; + unsigned short d_reclen; + char d_name[256]; +}; +#endif + +/* include/linux/uio.h */ +struct kernel_iovec { + void *iov_base; + unsigned long iov_len; +}; + +/* include/linux/socket.h */ +struct kernel_msghdr { + void *msg_name; + int msg_namelen; + struct kernel_iovec*msg_iov; + unsigned long msg_iovlen; + void *msg_control; + unsigned long msg_controllen; + unsigned msg_flags; +}; + +/* include/asm-generic/poll.h */ +struct kernel_pollfd { + int fd; + short events; + short revents; +}; + +/* include/linux/resource.h */ +struct kernel_rlimit { + unsigned long rlim_cur; + unsigned long rlim_max; +}; + +/* include/linux/time.h */ +struct kernel_timespec { + long tv_sec; + long tv_nsec; +}; + +/* include/linux/time.h */ +struct kernel_timeval { + long tv_sec; + long tv_usec; +}; + +/* include/linux/time.h */ +struct kernel_itimerval { + struct kernel_timeval it_interval; + struct kernel_timeval it_value; +}; + +/* include/linux/resource.h */ +struct kernel_rusage { + struct kernel_timeval ru_utime; + struct kernel_timeval ru_stime; + long ru_maxrss; + long ru_ixrss; + long ru_idrss; + long ru_isrss; + long ru_minflt; + long ru_majflt; + long ru_nswap; + long ru_inblock; + long ru_oublock; + long ru_msgsnd; + long ru_msgrcv; + long ru_nsignals; + long ru_nvcsw; + long ru_nivcsw; +}; + +#if defined(__i386__) || defined(__ARM_EABI__) || defined(__ARM_ARCH_3__) \ + || defined(__PPC__) || (defined(__s390__) && !defined(__s390x__)) \ + || defined(__e2k__) + +/* include/asm-{arm,i386,mips,ppc}/signal.h */ +struct kernel_old_sigaction { + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, siginfo_t *, void *); + }; + unsigned long sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +} __attribute__((packed,aligned(4))); +#elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) + #define kernel_old_sigaction kernel_sigaction +#elif defined(__aarch64__) || defined(__riscv) || defined(__loongarch_lp64) + // No kernel_old_sigaction defined for arm64 riscv and loongarch64. +#endif + +/* Some kernel functions (e.g. sigaction() in 2.6.23) require that the + * exactly match the size of the signal set, even though the API was + * intended to be extensible. We define our own KERNEL_NSIG to deal with + * this. + * Please note that glibc provides signals [1.._NSIG-1], whereas the + * kernel (and this header) provides the range [1..KERNEL_NSIG]. The + * actual number of signals is obviously the same, but the constants + * differ by one. + */ +#ifdef __mips__ +#define KERNEL_NSIG 128 +#else +#define KERNEL_NSIG 64 +#endif + +/* include/asm-{arm,aarch64,i386,mips,x86_64}/signal.h */ +struct kernel_sigset_t { + unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/ + (8*sizeof(unsigned long))]; +}; + +/* include/asm-{arm,i386,mips,x86_64,ppc}/signal.h */ +struct kernel_sigaction { +#ifdef __mips__ + unsigned long sa_flags; + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, siginfo_t *, void *); + }; + struct kernel_sigset_t sa_mask; +#else + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, siginfo_t *, void *); + }; + unsigned long sa_flags; +#if !defined(__riscv) && !defined(__loongarch_lp64) + void (*sa_restorer)(void); +#endif + struct kernel_sigset_t sa_mask; +#endif +}; + +/* include/linux/socket.h */ +struct kernel_sockaddr { + unsigned short sa_family; + char sa_data[14]; +}; + +/* include/asm-{arm,aarch64,i386,mips,ppc,s390}/stat.h */ +#ifdef __mips__ +#if _MIPS_SIM == _MIPS_SIM_ABI64 +typedef unsigned long long kernel_blkcnt_t; +typedef unsigned kernel_blksize_t; +typedef unsigned kernel_dev_t; +typedef unsigned kernel_gid_t; +typedef unsigned long long kernel_ino_t; +typedef unsigned kernel_mode_t; +typedef unsigned kernel_nlink_t; +typedef long long kernel_off_t; +typedef unsigned kernel_time_t; +typedef unsigned kernel_uid_t; +struct kernel_stat { +#else +struct kernel_stat64 { +#endif + unsigned st_dev; + unsigned __pad0[3]; + unsigned long long st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned st_rdev; + unsigned __pad1[3]; + long long st_size; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned st_blksize; + unsigned __pad2; + unsigned long long st_blocks; +}; +#elif defined __PPC__ +struct kernel_stat64 { + unsigned long long st_dev; + unsigned long long st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned long long st_rdev; + unsigned short int __pad2; + long long st_size; + long st_blksize; + long long st_blocks; + long st_atime_; + unsigned long st_atime_nsec_; + long st_mtime_; + unsigned long st_mtime_nsec_; + long st_ctime_; + unsigned long st_ctime_nsec_; + unsigned long __unused4; + unsigned long __unused5; +}; +#elif defined(__e2k__) +struct kernel_stat64 { + unsigned long long st_dev; + unsigned long long st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + unsigned long long st_rdev; + long long st_size; + int st_blksize; + int __pad2; + unsigned long long st_blocks; + int st_atime_; + unsigned int st_atime_nsec_; + int st_mtime_; + unsigned int st_mtime_nsec_; + int st_ctime_; + unsigned int st_ctime_nsec_; + unsigned int __unused4; + unsigned int __unused5; +}; +#else +struct kernel_stat64 { + unsigned long long st_dev; + unsigned char __pad0[4]; + unsigned __st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned long long st_rdev; + unsigned char __pad3[4]; + long long st_size; + unsigned st_blksize; + unsigned long long st_blocks; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned long long st_ino; +}; +#endif + +/* include/asm-{arm,aarch64,i386,mips,x86_64,ppc,s390}/stat.h */ +#if defined(__i386__) || defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) +typedef unsigned kernel_blkcnt_t; +typedef unsigned kernel_blksize_t; +typedef unsigned short kernel_dev_t; +typedef unsigned short kernel_gid_t; +typedef unsigned kernel_ino_t; +typedef unsigned short kernel_mode_t; +typedef unsigned short kernel_nlink_t; +typedef unsigned kernel_off_t; +typedef unsigned kernel_time_t; +typedef unsigned short kernel_uid_t; +struct kernel_stat { + /* The kernel headers suggest that st_dev and st_rdev should be 32bit + * quantities encoding 12bit major and 20bit minor numbers in an interleaved + * format. In reality, we do not see useful data in the top bits. So, + * we'll leave the padding in here, until we find a better solution. + */ + kernel_dev_t st_dev; + short pad1; + kernel_ino_t st_ino; + kernel_mode_t st_mode; + kernel_nlink_t st_nlink; + kernel_uid_t st_uid; + kernel_gid_t st_gid; + kernel_dev_t st_rdev; + short pad2; + kernel_off_t st_size; + kernel_blksize_t st_blksize; + kernel_blkcnt_t st_blocks; + kernel_time_t st_atime_; + unsigned st_atime_nsec_; + kernel_time_t st_mtime_; + unsigned st_mtime_nsec_; + kernel_time_t st_ctime_; + unsigned st_ctime_nsec_; + unsigned __unused4; + unsigned __unused5; +}; +#elif defined(__x86_64__) +typedef int64_t kernel_blkcnt_t; +typedef int64_t kernel_blksize_t; +typedef uint64_t kernel_dev_t; +typedef unsigned kernel_gid_t; +typedef uint64_t kernel_ino_t; +typedef unsigned kernel_mode_t; +typedef uint64_t kernel_nlink_t; +typedef int64_t kernel_off_t; +typedef uint64_t kernel_time_t; +typedef unsigned kernel_uid_t; +struct kernel_stat { + kernel_dev_t st_dev; + kernel_ino_t st_ino; + kernel_nlink_t st_nlink; + kernel_mode_t st_mode; + kernel_uid_t st_uid; + kernel_gid_t st_gid; + unsigned __pad0; + kernel_dev_t st_rdev; + kernel_off_t st_size; + kernel_blksize_t st_blksize; + kernel_blkcnt_t st_blocks; + kernel_time_t st_atime_; + uint64_t st_atime_nsec_; + kernel_time_t st_mtime_; + uint64_t st_mtime_nsec_; + kernel_time_t st_ctime_; + uint64_t st_ctime_nsec_; + int64_t __unused4[3]; +}; +#elif defined(__PPC__) +typedef unsigned long kernel_blkcnt_t; +typedef unsigned long kernel_blksize_t; +typedef unsigned kernel_dev_t; +typedef unsigned kernel_gid_t; +typedef unsigned long kernel_ino_t; +typedef unsigned long kernel_mode_t; +typedef unsigned short kernel_nlink_t; +typedef long kernel_off_t; +typedef unsigned long kernel_time_t; +typedef unsigned kernel_uid_t; +struct kernel_stat { + kernel_dev_t st_dev; + kernel_ino_t st_ino; + kernel_mode_t st_mode; + kernel_nlink_t st_nlink; + kernel_gid_t st_uid; + kernel_uid_t st_gid; + kernel_dev_t st_rdev; + kernel_off_t st_size; + kernel_blksize_t st_blksize; + kernel_blkcnt_t st_blocks; + kernel_time_t st_atime_; + unsigned long st_atime_nsec_; + kernel_time_t st_mtime_; + unsigned long st_mtime_nsec_; + kernel_time_t st_ctime_; + unsigned long st_ctime_nsec_; + unsigned long __unused4; + unsigned long __unused5; +}; +#elif (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64) +typedef int kernel_blkcnt_t; +typedef int kernel_blksize_t; +typedef unsigned kernel_dev_t; +typedef unsigned kernel_gid_t; +typedef unsigned kernel_ino_t; +typedef unsigned kernel_mode_t; +typedef unsigned kernel_nlink_t; +typedef long kernel_off_t; +typedef long kernel_time_t; +typedef unsigned kernel_uid_t; +struct kernel_stat { + kernel_dev_t st_dev; + int st_pad1[3]; + kernel_ino_t st_ino; + kernel_mode_t st_mode; + kernel_nlink_t st_nlink; + kernel_uid_t st_uid; + kernel_gid_t st_gid; + kernel_dev_t st_rdev; + int st_pad2[2]; + kernel_off_t st_size; + int st_pad3; + kernel_time_t st_atime_; + long st_atime_nsec_; + kernel_time_t st_mtime_; + long st_mtime_nsec_; + kernel_time_t st_ctime_; + long st_ctime_nsec_; + kernel_blksize_t st_blksize; + kernel_blkcnt_t st_blocks; + int st_pad4[14]; +}; +#elif defined(__aarch64__) || defined(__riscv) || defined(__loongarch_lp64) +typedef long kernel_blkcnt_t; +typedef int kernel_blksize_t; +typedef unsigned long kernel_dev_t; +typedef unsigned int kernel_gid_t; +typedef unsigned long kernel_ino_t; +typedef unsigned int kernel_mode_t; +typedef unsigned int kernel_nlink_t; +typedef long kernel_off_t; +typedef long kernel_time_t; +typedef unsigned int kernel_uid_t; +struct kernel_stat { + kernel_dev_t st_dev; + kernel_ino_t st_ino; + kernel_mode_t st_mode; + kernel_nlink_t st_nlink; + kernel_uid_t st_uid; + kernel_gid_t st_gid; + kernel_dev_t st_rdev; + unsigned long __pad1; + kernel_off_t st_size; + kernel_blksize_t st_blksize; + int __pad2; + kernel_blkcnt_t st_blocks; + kernel_time_t st_atime_; + unsigned long st_atime_nsec_; + kernel_time_t st_mtime_; + unsigned long st_mtime_nsec_; + kernel_time_t st_ctime_; + unsigned long st_ctime_nsec_; + unsigned int __unused4; + unsigned int __unused5; +}; +#elif defined(__s390x__) +typedef long kernel_blkcnt_t; +typedef unsigned long kernel_blksize_t; +typedef unsigned long kernel_dev_t; +typedef unsigned int kernel_gid_t; +typedef unsigned long kernel_ino_t; +typedef unsigned int kernel_mode_t; +typedef unsigned long kernel_nlink_t; +typedef unsigned long kernel_off_t; +typedef unsigned long kernel_time_t; +typedef unsigned int kernel_uid_t; +struct kernel_stat { + kernel_dev_t st_dev; + kernel_ino_t st_ino; + kernel_nlink_t st_nlink; + kernel_mode_t st_mode; + kernel_uid_t st_uid; + kernel_gid_t st_gid; + unsigned int __pad1; + kernel_dev_t st_rdev; + kernel_off_t st_size; + kernel_time_t st_atime_; + unsigned long st_atime_nsec_; + kernel_time_t st_mtime_; + unsigned long st_mtime_nsec_; + kernel_time_t st_ctime_; + unsigned long st_ctime_nsec_; + kernel_blksize_t st_blksize; + kernel_blkcnt_t st_blocks; + unsigned long __unused[3]; +}; +#elif defined(__s390__) +typedef unsigned long kernel_blkcnt_t; +typedef unsigned long kernel_blksize_t; +typedef unsigned short kernel_dev_t; +typedef unsigned short kernel_gid_t; +typedef unsigned long kernel_ino_t; +typedef unsigned short kernel_mode_t; +typedef unsigned short kernel_nlink_t; +typedef unsigned long kernel_off_t; +typedef unsigned long kernel_time_t; +typedef unsigned short kernel_uid_t; +struct kernel_stat { + kernel_dev_t st_dev; + unsigned short __pad1; + kernel_ino_t st_ino; + kernel_mode_t st_mode; + kernel_nlink_t st_nlink; + kernel_uid_t st_uid; + kernel_gid_t st_gid; + kernel_dev_t st_rdev; + unsigned short __pad2; + kernel_off_t st_size; + kernel_blksize_t st_blksize; + kernel_blkcnt_t st_blocks; + kernel_time_t st_atime_; + unsigned long st_atime_nsec_; + kernel_time_t st_mtime_; + unsigned long st_mtime_nsec_; + kernel_time_t st_ctime_; + unsigned long st_ctime_nsec_; + unsigned long __unused4; + unsigned long __unused5; +}; +#elif defined(__e2k__) +typedef unsigned long kernel_blkcnt_t; +typedef unsigned long kernel_blksize_t; +typedef unsigned long kernel_dev_t; +typedef unsigned int kernel_gid_t; +typedef unsigned long kernel_ino_t; +typedef unsigned int kernel_mode_t; +typedef unsigned long kernel_nlink_t; +typedef unsigned long kernel_off_t; +typedef unsigned long kernel_time_t; +typedef unsigned int kernel_uid_t; +struct kernel_stat { + kernel_dev_t st_dev; + kernel_ino_t st_ino; + kernel_mode_t st_mode; + kernel_nlink_t st_nlink; + kernel_uid_t st_uid; + kernel_gid_t st_gid; + kernel_dev_t st_rdev; + kernel_off_t st_size; + kernel_blksize_t st_blksize; + kernel_blkcnt_t st_blocks; + kernel_time_t st_atime_; + unsigned long st_atime_nsec_; + kernel_time_t st_mtime_; + unsigned long st_mtime_nsec_; + kernel_time_t st_ctime_; + unsigned long st_ctime_nsec_; +}; +#endif + +/* include/asm-{arm,aarch64,i386,mips,x86_64,ppc,s390}/statfs.h */ +#ifdef __mips__ +#if _MIPS_SIM != _MIPS_SIM_ABI64 +struct kernel_statfs64 { + unsigned long f_type; + unsigned long f_bsize; + unsigned long f_frsize; + unsigned long __pad; + unsigned long long f_blocks; + unsigned long long f_bfree; + unsigned long long f_files; + unsigned long long f_ffree; + unsigned long long f_bavail; + struct { int val[2]; } f_fsid; + unsigned long f_namelen; + unsigned long f_spare[6]; +}; +#endif +#elif defined(__s390__) +/* See also arch/s390/include/asm/compat.h */ +struct kernel_statfs64 { + unsigned int f_type; + unsigned int f_bsize; + unsigned long long f_blocks; + unsigned long long f_bfree; + unsigned long long f_bavail; + unsigned long long f_files; + unsigned long long f_ffree; + struct { int val[2]; } f_fsid; + unsigned int f_namelen; + unsigned int f_frsize; + unsigned int f_flags; + unsigned int f_spare[4]; +}; +#elif !defined(__x86_64__) +struct kernel_statfs64 { + unsigned long f_type; + unsigned long f_bsize; + unsigned long long f_blocks; + unsigned long long f_bfree; + unsigned long long f_bavail; + unsigned long long f_files; + unsigned long long f_ffree; + struct { int val[2]; } f_fsid; + unsigned long f_namelen; + unsigned long f_frsize; + unsigned long f_spare[5]; +}; +#endif + +/* include/asm-{arm,i386,mips,x86_64,ppc,generic,s390}/statfs.h */ +#ifdef __mips__ +struct kernel_statfs { + long f_type; + long f_bsize; + long f_frsize; + long f_blocks; + long f_bfree; + long f_files; + long f_ffree; + long f_bavail; + struct { int val[2]; } f_fsid; + long f_namelen; + long f_spare[6]; +}; +#elif defined(__x86_64__) +struct kernel_statfs { + /* x86_64 actually defines all these fields as signed, whereas all other */ + /* platforms define them as unsigned. Leaving them at unsigned should not */ + /* cause any problems. Make sure these are 64-bit even on x32. */ + uint64_t f_type; + uint64_t f_bsize; + uint64_t f_blocks; + uint64_t f_bfree; + uint64_t f_bavail; + uint64_t f_files; + uint64_t f_ffree; + struct { int val[2]; } f_fsid; + uint64_t f_namelen; + uint64_t f_frsize; + uint64_t f_spare[5]; +}; +#elif defined(__s390__) +struct kernel_statfs { + unsigned int f_type; + unsigned int f_bsize; + unsigned long f_blocks; + unsigned long f_bfree; + unsigned long f_bavail; + unsigned long f_files; + unsigned long f_ffree; + struct { int val[2]; } f_fsid; + unsigned int f_namelen; + unsigned int f_frsize; + unsigned int f_flags; + unsigned int f_spare[4]; +}; +#else +struct kernel_statfs { + unsigned long f_type; + unsigned long f_bsize; + unsigned long f_blocks; + unsigned long f_bfree; + unsigned long f_bavail; + unsigned long f_files; + unsigned long f_ffree; + struct { int val[2]; } f_fsid; + unsigned long f_namelen; + unsigned long f_frsize; + unsigned long f_spare[5]; +}; +#endif + +struct kernel_statx_timestamp { + int64_t tv_sec; + uint32_t tv_nsec; + int32_t __reserved; +}; + +struct kernel_statx { + uint32_t stx_mask; + uint32_t stx_blksize; + uint64_t stx_attributes; + uint32_t stx_nlink; + uint32_t stx_uid; + uint32_t stx_gid; + uint16_t stx_mode; + uint16_t __spare0[1]; + uint64_t stx_ino; + uint64_t stx_size; + uint64_t stx_blocks; + uint64_t stx_attributes_mask; + struct kernel_statx_timestamp stx_atime; + struct kernel_statx_timestamp stx_btime; + struct kernel_statx_timestamp stx_ctime; + struct kernel_statx_timestamp stx_mtime; + uint32_t stx_rdev_major; + uint32_t stx_rdev_minor; + uint32_t stx_dev_major; + uint32_t stx_dev_minor; + uint64_t stx_mnt_id; + uint64_t __spare2; + uint64_t __spare3[12]; +}; + +/* Definitions missing from the standard header files */ +#ifndef O_DIRECTORY +#if defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) || defined(__aarch64__) +#define O_DIRECTORY 0040000 +#else +#define O_DIRECTORY 0200000 +#endif +#endif +#ifndef NT_PRXFPREG +#define NT_PRXFPREG 0x46e62b7f +#endif +#ifndef PTRACE_GETFPXREGS +#define PTRACE_GETFPXREGS ((enum __ptrace_request)18) +#endif +#ifndef PR_GET_DUMPABLE +#define PR_GET_DUMPABLE 3 +#endif +#ifndef PR_SET_DUMPABLE +#define PR_SET_DUMPABLE 4 +#endif +#ifndef PR_GET_SECCOMP +#define PR_GET_SECCOMP 21 +#endif +#ifndef PR_SET_SECCOMP +#define PR_SET_SECCOMP 22 +#endif +#ifndef AT_FDCWD +#define AT_FDCWD (-100) +#endif +#ifndef AT_SYMLINK_NOFOLLOW +#define AT_SYMLINK_NOFOLLOW 0x100 +#endif +#ifndef AT_REMOVEDIR +#define AT_REMOVEDIR 0x200 +#endif +#ifndef AT_NO_AUTOMOUNT +#define AT_NO_AUTOMOUNT 0x800 +#endif +#ifndef AT_EMPTY_PATH +#define AT_EMPTY_PATH 0x1000 +#endif +#ifndef STATX_BASIC_STATS +#define STATX_BASIC_STATS 0x000007ffU +#endif +#ifndef AT_STATX_SYNC_AS_STAT +#define AT_STATX_SYNC_AS_STAT 0x0000 +#endif +#ifndef MREMAP_FIXED +#define MREMAP_FIXED 2 +#endif +#ifndef SA_RESTORER +#define SA_RESTORER 0x04000000 +#endif +#ifndef CPUCLOCK_PROF +#define CPUCLOCK_PROF 0 +#endif +#ifndef CPUCLOCK_VIRT +#define CPUCLOCK_VIRT 1 +#endif +#ifndef CPUCLOCK_SCHED +#define CPUCLOCK_SCHED 2 +#endif +#ifndef CPUCLOCK_PERTHREAD_MASK +#define CPUCLOCK_PERTHREAD_MASK 4 +#endif +#ifndef MAKE_PROCESS_CPUCLOCK +#define MAKE_PROCESS_CPUCLOCK(pid, clock) \ + ((int)(~(unsigned)(pid) << 3) | (int)(clock)) +#endif +#ifndef MAKE_THREAD_CPUCLOCK +#define MAKE_THREAD_CPUCLOCK(tid, clock) \ + ((int)(~(unsigned)(tid) << 3) | \ + (int)((clock) | CPUCLOCK_PERTHREAD_MASK)) +#endif + +#ifndef FUTEX_WAIT +#define FUTEX_WAIT 0 +#endif +#ifndef FUTEX_WAKE +#define FUTEX_WAKE 1 +#endif +#ifndef FUTEX_FD +#define FUTEX_FD 2 +#endif +#ifndef FUTEX_REQUEUE +#define FUTEX_REQUEUE 3 +#endif +#ifndef FUTEX_CMP_REQUEUE +#define FUTEX_CMP_REQUEUE 4 +#endif +#ifndef FUTEX_WAKE_OP +#define FUTEX_WAKE_OP 5 +#endif +#ifndef FUTEX_LOCK_PI +#define FUTEX_LOCK_PI 6 +#endif +#ifndef FUTEX_UNLOCK_PI +#define FUTEX_UNLOCK_PI 7 +#endif +#ifndef FUTEX_TRYLOCK_PI +#define FUTEX_TRYLOCK_PI 8 +#endif +#ifndef FUTEX_PRIVATE_FLAG +#define FUTEX_PRIVATE_FLAG 128 +#endif +#ifndef FUTEX_CMD_MASK +#define FUTEX_CMD_MASK ~FUTEX_PRIVATE_FLAG +#endif +#ifndef FUTEX_WAIT_PRIVATE +#define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | FUTEX_PRIVATE_FLAG) +#endif +#ifndef FUTEX_WAKE_PRIVATE +#define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | FUTEX_PRIVATE_FLAG) +#endif +#ifndef FUTEX_REQUEUE_PRIVATE +#define FUTEX_REQUEUE_PRIVATE (FUTEX_REQUEUE | FUTEX_PRIVATE_FLAG) +#endif +#ifndef FUTEX_CMP_REQUEUE_PRIVATE +#define FUTEX_CMP_REQUEUE_PRIVATE (FUTEX_CMP_REQUEUE | FUTEX_PRIVATE_FLAG) +#endif +#ifndef FUTEX_WAKE_OP_PRIVATE +#define FUTEX_WAKE_OP_PRIVATE (FUTEX_WAKE_OP | FUTEX_PRIVATE_FLAG) +#endif +#ifndef FUTEX_LOCK_PI_PRIVATE +#define FUTEX_LOCK_PI_PRIVATE (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG) +#endif +#ifndef FUTEX_UNLOCK_PI_PRIVATE +#define FUTEX_UNLOCK_PI_PRIVATE (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) +#endif +#ifndef FUTEX_TRYLOCK_PI_PRIVATE +#define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) +#endif + + +#if defined(__x86_64__) +#ifndef ARCH_SET_GS +#define ARCH_SET_GS 0x1001 +#endif +#ifndef ARCH_GET_GS +#define ARCH_GET_GS 0x1004 +#endif +#endif + +#if defined(__i386__) +#ifndef __NR_quotactl +#define __NR_quotactl 131 +#endif +#ifndef __NR_setresuid +#define __NR_setresuid 164 +#define __NR_getresuid 165 +#define __NR_setresgid 170 +#define __NR_getresgid 171 +#endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigreturn 173 +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigpending 176 +#define __NR_rt_sigsuspend 179 +#endif +#ifndef __NR_pread64 +#define __NR_pread64 180 +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 181 +#endif +#ifndef __NR_ugetrlimit +#define __NR_ugetrlimit 191 +#endif +#ifndef __NR_stat64 +#define __NR_stat64 195 +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 197 +#endif +#ifndef __NR_setresuid32 +#define __NR_setresuid32 208 +#define __NR_getresuid32 209 +#define __NR_setresgid32 210 +#define __NR_getresgid32 211 +#endif +#ifndef __NR_setfsuid32 +#define __NR_setfsuid32 215 +#define __NR_setfsgid32 216 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 220 +#endif +#ifndef __NR_gettid +#define __NR_gettid 224 +#endif +#ifndef __NR_readahead +#define __NR_readahead 225 +#endif +#ifndef __NR_setxattr +#define __NR_setxattr 226 +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr 227 +#endif +#ifndef __NR_getxattr +#define __NR_getxattr 229 +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr 230 +#endif +#ifndef __NR_listxattr +#define __NR_listxattr 232 +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr 233 +#endif +#ifndef __NR_tkill +#define __NR_tkill 238 +#endif +#ifndef __NR_futex +#define __NR_futex 240 +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 241 +#define __NR_sched_getaffinity 242 +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address 258 +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime 265 +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres 266 +#endif +#ifndef __NR_statfs64 +#define __NR_statfs64 268 +#endif +#ifndef __NR_fstatfs64 +#define __NR_fstatfs64 269 +#endif +#ifndef __NR_fadvise64_64 +#define __NR_fadvise64_64 272 +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set 289 +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get 290 +#endif +#ifndef __NR_openat +#define __NR_openat 295 +#endif +#ifndef __NR_fstatat64 +#define __NR_fstatat64 300 +#endif +#ifndef __NR_unlinkat +#define __NR_unlinkat 301 +#endif +#ifndef __NR_move_pages +#define __NR_move_pages 317 +#endif +#ifndef __NR_getcpu +#define __NR_getcpu 318 +#endif +#ifndef __NR_fallocate +#define __NR_fallocate 324 +#endif +#ifndef __NR_getrandom +#define __NR_getrandom 355 +#endif +/* End of i386 definitions */ +#elif defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) +#ifndef __NR_setresuid +#define __NR_setresuid (__NR_SYSCALL_BASE + 164) +#define __NR_getresuid (__NR_SYSCALL_BASE + 165) +#define __NR_setresgid (__NR_SYSCALL_BASE + 170) +#define __NR_getresgid (__NR_SYSCALL_BASE + 171) +#endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigreturn (__NR_SYSCALL_BASE + 173) +#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174) +#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175) +#define __NR_rt_sigpending (__NR_SYSCALL_BASE + 176) +#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE + 179) +#endif +#ifndef __NR_pread64 +#define __NR_pread64 (__NR_SYSCALL_BASE + 180) +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 (__NR_SYSCALL_BASE + 181) +#endif +#ifndef __NR_ugetrlimit +#define __NR_ugetrlimit (__NR_SYSCALL_BASE + 191) +#endif +#ifndef __NR_stat64 +#define __NR_stat64 (__NR_SYSCALL_BASE + 195) +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 (__NR_SYSCALL_BASE + 197) +#endif +#ifndef __NR_setresuid32 +#define __NR_setresuid32 (__NR_SYSCALL_BASE + 208) +#define __NR_getresuid32 (__NR_SYSCALL_BASE + 209) +#define __NR_setresgid32 (__NR_SYSCALL_BASE + 210) +#define __NR_getresgid32 (__NR_SYSCALL_BASE + 211) +#endif +#ifndef __NR_setfsuid32 +#define __NR_setfsuid32 (__NR_SYSCALL_BASE + 215) +#define __NR_setfsgid32 (__NR_SYSCALL_BASE + 216) +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 (__NR_SYSCALL_BASE + 217) +#endif +#ifndef __NR_gettid +#define __NR_gettid (__NR_SYSCALL_BASE + 224) +#endif +#ifndef __NR_readahead +#define __NR_readahead (__NR_SYSCALL_BASE + 225) +#endif +#ifndef __NR_setxattr +#define __NR_setxattr (__NR_SYSCALL_BASE + 226) +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr (__NR_SYSCALL_BASE + 227) +#endif +#ifndef __NR_getxattr +#define __NR_getxattr (__NR_SYSCALL_BASE + 229) +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr (__NR_SYSCALL_BASE + 230) +#endif +#ifndef __NR_listxattr +#define __NR_listxattr (__NR_SYSCALL_BASE + 232) +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr (__NR_SYSCALL_BASE + 233) +#endif +#ifndef __NR_tkill +#define __NR_tkill (__NR_SYSCALL_BASE + 238) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_SYSCALL_BASE + 240) +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity (__NR_SYSCALL_BASE + 241) +#define __NR_sched_getaffinity (__NR_SYSCALL_BASE + 242) +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address (__NR_SYSCALL_BASE + 256) +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime (__NR_SYSCALL_BASE + 263) +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres (__NR_SYSCALL_BASE + 264) +#endif +#ifndef __NR_statfs64 +#define __NR_statfs64 (__NR_SYSCALL_BASE + 266) +#endif +#ifndef __NR_fstatfs64 +#define __NR_fstatfs64 (__NR_SYSCALL_BASE + 267) +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set (__NR_SYSCALL_BASE + 314) +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get (__NR_SYSCALL_BASE + 315) +#endif +#ifndef __NR_fstatat64 +#define __NR_fstatat64 (__NR_SYSCALL_BASE + 327) +#endif +#ifndef __NR_move_pages +#define __NR_move_pages (__NR_SYSCALL_BASE + 344) +#endif +#ifndef __NR_getcpu +#define __NR_getcpu (__NR_SYSCALL_BASE + 345) +#endif +#ifndef __NR_getrandom +#define __NR_getrandom (__NR_SYSCALL_BASE + 384) +#endif +/* End of ARM 3/EABI definitions */ +#elif defined(__aarch64__) || defined(__riscv) || defined(__loongarch_lp64) +#ifndef __NR_setxattr +#define __NR_setxattr 5 +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr 6 +#endif +#ifndef __NR_getxattr +#define __NR_getxattr 8 +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr 9 +#endif +#ifndef __NR_listxattr +#define __NR_listxattr 11 +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr 12 +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set 30 +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get 31 +#endif +#ifndef __NR_unlinkat +#define __NR_unlinkat 35 +#endif +#ifndef __NR_fallocate +#define __NR_fallocate 47 +#endif +#ifndef __NR_openat +#define __NR_openat 56 +#endif +#ifndef __NR_quotactl +#define __NR_quotactl 60 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 61 +#endif +#ifndef __NR_getdents +// when getdents is not available, getdents64 is used for both. +#define __NR_getdents __NR_getdents64 +#endif +#ifndef __NR_pread64 +#define __NR_pread64 67 +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 68 +#endif +#ifndef __NR_ppoll +#define __NR_ppoll 73 +#endif +#ifndef __NR_readlinkat +#define __NR_readlinkat 78 +#endif +#if !defined(__loongarch_lp64) +#ifndef __NR_newfstatat +#define __NR_newfstatat 79 +#endif +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address 96 +#endif +#ifndef __NR_futex +#define __NR_futex 98 +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime 113 +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres 114 +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 122 +#define __NR_sched_getaffinity 123 +#endif +#ifndef __NR_tkill +#define __NR_tkill 130 +#endif +#ifndef __NR_setresuid +#define __NR_setresuid 147 +#define __NR_getresuid 148 +#define __NR_setresgid 149 +#define __NR_getresgid 150 +#endif +#ifndef __NR_gettid +#define __NR_gettid 178 +#endif +#ifndef __NR_readahead +#define __NR_readahead 213 +#endif +#ifndef __NR_fadvise64 +#define __NR_fadvise64 223 +#endif +#ifndef __NR_move_pages +#define __NR_move_pages 239 +#endif +#ifndef __NR_getrandom +#define __NR_getrandom 278 +#endif +#ifndef __NR_statx +#define __NR_statx 291 +#endif +#elif defined(__x86_64__) +#ifndef __NR_pread64 +#define __NR_pread64 17 +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 18 +#endif +#ifndef __NR_setresuid +#define __NR_setresuid 117 +#define __NR_getresuid 118 +#define __NR_setresgid 119 +#define __NR_getresgid 120 +#endif +#ifndef __NR_quotactl +#define __NR_quotactl 179 +#endif +#ifndef __NR_gettid +#define __NR_gettid 186 +#endif +#ifndef __NR_readahead +#define __NR_readahead 187 +#endif +#ifndef __NR_setxattr +#define __NR_setxattr 188 +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr 189 +#endif +#ifndef __NR_getxattr +#define __NR_getxattr 191 +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr 192 +#endif +#ifndef __NR_listxattr +#define __NR_listxattr 194 +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr 195 +#endif +#ifndef __NR_tkill +#define __NR_tkill 200 +#endif +#ifndef __NR_futex +#define __NR_futex 202 +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 203 +#define __NR_sched_getaffinity 204 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 217 +#endif +#ifndef __NR_getdents +// when getdents is not available, getdents64 is used for both. +#define __NR_getdents __NR_getdents64 +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address 218 +#endif +#ifndef __NR_fadvise64 +#define __NR_fadvise64 221 +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime 228 +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres 229 +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set 251 +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get 252 +#endif +#ifndef __NR_openat +#define __NR_openat 257 +#endif +#ifndef __NR_newfstatat +#define __NR_newfstatat 262 +#endif +#ifndef __NR_unlinkat +#define __NR_unlinkat 263 +#endif +#ifndef __NR_move_pages +#define __NR_move_pages 279 +#endif +#ifndef __NR_fallocate +#define __NR_fallocate 285 +#endif +#ifndef __NR_getrandom +#define __NR_getrandom 318 +#endif +/* End of x86-64 definitions */ +#elif defined(__mips__) +#if _MIPS_SIM == _MIPS_SIM_ABI32 +#ifndef __NR_setresuid +#define __NR_setresuid (__NR_Linux + 185) +#define __NR_getresuid (__NR_Linux + 186) +#define __NR_setresgid (__NR_Linux + 190) +#define __NR_getresgid (__NR_Linux + 191) +#endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigreturn (__NR_Linux + 193) +#define __NR_rt_sigaction (__NR_Linux + 194) +#define __NR_rt_sigprocmask (__NR_Linux + 195) +#define __NR_rt_sigpending (__NR_Linux + 196) +#define __NR_rt_sigsuspend (__NR_Linux + 199) +#endif +#ifndef __NR_pread64 +#define __NR_pread64 (__NR_Linux + 200) +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 (__NR_Linux + 201) +#endif +#ifndef __NR_stat64 +#define __NR_stat64 (__NR_Linux + 213) +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 (__NR_Linux + 215) +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 (__NR_Linux + 219) +#endif +#ifndef __NR_gettid +#define __NR_gettid (__NR_Linux + 222) +#endif +#ifndef __NR_readahead +#define __NR_readahead (__NR_Linux + 223) +#endif +#ifndef __NR_setxattr +#define __NR_setxattr (__NR_Linux + 224) +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr (__NR_Linux + 225) +#endif +#ifndef __NR_getxattr +#define __NR_getxattr (__NR_Linux + 227) +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr (__NR_Linux + 228) +#endif +#ifndef __NR_listxattr +#define __NR_listxattr (__NR_Linux + 230) +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr (__NR_Linux + 231) +#endif +#ifndef __NR_tkill +#define __NR_tkill (__NR_Linux + 236) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_Linux + 238) +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity (__NR_Linux + 239) +#define __NR_sched_getaffinity (__NR_Linux + 240) +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address (__NR_Linux + 252) +#endif +#ifndef __NR_statfs64 +#define __NR_statfs64 (__NR_Linux + 255) +#endif +#ifndef __NR_fstatfs64 +#define __NR_fstatfs64 (__NR_Linux + 256) +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime (__NR_Linux + 263) +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres (__NR_Linux + 264) +#endif +#ifndef __NR_openat +#define __NR_openat (__NR_Linux + 288) +#endif +#ifndef __NR_fstatat +#define __NR_fstatat (__NR_Linux + 293) +#endif +#ifndef __NR_unlinkat +#define __NR_unlinkat (__NR_Linux + 294) +#endif +#ifndef __NR_move_pages +#define __NR_move_pages (__NR_Linux + 308) +#endif +#ifndef __NR_getcpu +#define __NR_getcpu (__NR_Linux + 312) +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set (__NR_Linux + 314) +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get (__NR_Linux + 315) +#endif +#ifndef __NR_getrandom +#define __NR_getrandom (__NR_Linux + 353) +#endif +/* End of MIPS (old 32bit API) definitions */ +#elif _MIPS_SIM == _MIPS_SIM_ABI64 +#ifndef __NR_pread64 +#define __NR_pread64 (__NR_Linux + 16) +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 (__NR_Linux + 17) +#endif +#ifndef __NR_setresuid +#define __NR_setresuid (__NR_Linux + 115) +#define __NR_getresuid (__NR_Linux + 116) +#define __NR_setresgid (__NR_Linux + 117) +#define __NR_getresgid (__NR_Linux + 118) +#endif +#ifndef __NR_gettid +#define __NR_gettid (__NR_Linux + 178) +#endif +#ifndef __NR_readahead +#define __NR_readahead (__NR_Linux + 179) +#endif +#ifndef __NR_setxattr +#define __NR_setxattr (__NR_Linux + 180) +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr (__NR_Linux + 181) +#endif +#ifndef __NR_getxattr +#define __NR_getxattr (__NR_Linux + 183) +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr (__NR_Linux + 184) +#endif +#ifndef __NR_listxattr +#define __NR_listxattr (__NR_Linux + 186) +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr (__NR_Linux + 187) +#endif +#ifndef __NR_tkill +#define __NR_tkill (__NR_Linux + 192) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_Linux + 194) +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity (__NR_Linux + 195) +#define __NR_sched_getaffinity (__NR_Linux + 196) +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address (__NR_Linux + 212) +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime (__NR_Linux + 222) +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres (__NR_Linux + 223) +#endif +#ifndef __NR_openat +#define __NR_openat (__NR_Linux + 247) +#endif +#ifndef __NR_fstatat +#define __NR_fstatat (__NR_Linux + 252) +#endif +#ifndef __NR_unlinkat +#define __NR_unlinkat (__NR_Linux + 253) +#endif +#ifndef __NR_move_pages +#define __NR_move_pages (__NR_Linux + 267) +#endif +#ifndef __NR_getcpu +#define __NR_getcpu (__NR_Linux + 271) +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set (__NR_Linux + 273) +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get (__NR_Linux + 274) +#endif +#ifndef __NR_getrandom +#define __NR_getrandom (__NR_Linux + 313) +#endif +/* End of MIPS (64bit API) definitions */ +#else +#ifndef __NR_setresuid +#define __NR_setresuid (__NR_Linux + 115) +#define __NR_getresuid (__NR_Linux + 116) +#define __NR_setresgid (__NR_Linux + 117) +#define __NR_getresgid (__NR_Linux + 118) +#endif +#ifndef __NR_gettid +#define __NR_gettid (__NR_Linux + 178) +#endif +#ifndef __NR_readahead +#define __NR_readahead (__NR_Linux + 179) +#endif +#ifndef __NR_setxattr +#define __NR_setxattr (__NR_Linux + 180) +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr (__NR_Linux + 181) +#endif +#ifndef __NR_getxattr +#define __NR_getxattr (__NR_Linux + 183) +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr (__NR_Linux + 184) +#endif +#ifndef __NR_listxattr +#define __NR_listxattr (__NR_Linux + 186) +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr (__NR_Linux + 187) +#endif +#ifndef __NR_tkill +#define __NR_tkill (__NR_Linux + 192) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_Linux + 194) +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity (__NR_Linux + 195) +#define __NR_sched_getaffinity (__NR_Linux + 196) +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address (__NR_Linux + 213) +#endif +#ifndef __NR_statfs64 +#define __NR_statfs64 (__NR_Linux + 217) +#endif +#ifndef __NR_fstatfs64 +#define __NR_fstatfs64 (__NR_Linux + 218) +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime (__NR_Linux + 226) +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres (__NR_Linux + 227) +#endif +#ifndef __NR_openat +#define __NR_openat (__NR_Linux + 251) +#endif +#ifndef __NR_fstatat +#define __NR_fstatat (__NR_Linux + 256) +#endif +#ifndef __NR_unlinkat +#define __NR_unlinkat (__NR_Linux + 257) +#endif +#ifndef __NR_move_pages +#define __NR_move_pages (__NR_Linux + 271) +#endif +#ifndef __NR_getcpu +#define __NR_getcpu (__NR_Linux + 275) +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set (__NR_Linux + 277) +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get (__NR_Linux + 278) +#endif +/* End of MIPS (new 32bit API) definitions */ +#endif +/* End of MIPS definitions */ +#elif defined(__PPC__) +#ifndef __NR_setfsuid +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#endif +#ifndef __NR_setresuid +#define __NR_setresuid 164 +#define __NR_getresuid 165 +#define __NR_setresgid 169 +#define __NR_getresgid 170 +#endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigreturn 172 +#define __NR_rt_sigaction 173 +#define __NR_rt_sigprocmask 174 +#define __NR_rt_sigpending 175 +#define __NR_rt_sigsuspend 178 +#endif +#ifndef __NR_pread64 +#define __NR_pread64 179 +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 180 +#endif +#ifndef __NR_ugetrlimit +#define __NR_ugetrlimit 190 +#endif +#ifndef __NR_readahead +#define __NR_readahead 191 +#endif +#ifndef __NR_stat64 +#define __NR_stat64 195 +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 197 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 202 +#endif +#ifndef __NR_gettid +#define __NR_gettid 207 +#endif +#ifndef __NR_tkill +#define __NR_tkill 208 +#endif +#ifndef __NR_setxattr +#define __NR_setxattr 209 +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr 210 +#endif +#ifndef __NR_getxattr +#define __NR_getxattr 212 +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr 213 +#endif +#ifndef __NR_listxattr +#define __NR_listxattr 215 +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr 216 +#endif +#ifndef __NR_futex +#define __NR_futex 221 +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 222 +#define __NR_sched_getaffinity 223 +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address 232 +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime 246 +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres 247 +#endif +#ifndef __NR_statfs64 +#define __NR_statfs64 252 +#endif +#ifndef __NR_fstatfs64 +#define __NR_fstatfs64 253 +#endif +#ifndef __NR_fadvise64_64 +#define __NR_fadvise64_64 254 +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set 273 +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get 274 +#endif +#ifndef __NR_openat +#define __NR_openat 286 +#endif +#ifndef __NR_fstatat64 +#define __NR_fstatat64 291 +#endif +#ifndef __NR_unlinkat +#define __NR_unlinkat 292 +#endif +#ifndef __NR_move_pages +#define __NR_move_pages 301 +#endif +#ifndef __NR_getcpu +#define __NR_getcpu 302 +#endif +/* End of powerpc defininitions */ +#elif defined(__s390__) +#ifndef __NR_quotactl +#define __NR_quotactl 131 +#endif +#ifndef __NR_rt_sigreturn +#define __NR_rt_sigreturn 173 +#endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction 174 +#endif +#ifndef __NR_rt_sigprocmask +#define __NR_rt_sigprocmask 175 +#endif +#ifndef __NR_rt_sigpending +#define __NR_rt_sigpending 176 +#endif +#ifndef __NR_rt_sigsuspend +#define __NR_rt_sigsuspend 179 +#endif +#ifndef __NR_pread64 +#define __NR_pread64 180 +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 181 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 220 +#endif +#ifndef __NR_readahead +#define __NR_readahead 222 +#endif +#ifndef __NR_setxattr +#define __NR_setxattr 224 +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr 225 +#endif +#ifndef __NR_getxattr +#define __NR_getxattr 227 +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr 228 +#endif +#ifndef __NR_listxattr +#define __NR_listxattr 230 +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr 231 +#endif +#ifndef __NR_gettid +#define __NR_gettid 236 +#endif +#ifndef __NR_tkill +#define __NR_tkill 237 +#endif +#ifndef __NR_futex +#define __NR_futex 238 +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 239 +#endif +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 240 +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address 252 +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime 260 +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres 261 +#endif +#ifndef __NR_statfs64 +#define __NR_statfs64 265 +#endif +#ifndef __NR_fstatfs64 +#define __NR_fstatfs64 266 +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set 282 +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get 283 +#endif +#ifndef __NR_openat +#define __NR_openat 288 +#endif +#ifndef __NR_unlinkat +#define __NR_unlinkat 294 +#endif +#ifndef __NR_move_pages +#define __NR_move_pages 310 +#endif +#ifndef __NR_getcpu +#define __NR_getcpu 311 +#endif +#ifndef __NR_fallocate +#define __NR_fallocate 314 +#endif +/* Some syscalls are named/numbered differently between s390 and s390x. */ +#ifdef __s390x__ +# ifndef __NR_getrlimit +# define __NR_getrlimit 191 +# endif +# ifndef __NR_setresuid +# define __NR_setresuid 208 +# endif +# ifndef __NR_getresuid +# define __NR_getresuid 209 +# endif +# ifndef __NR_setresgid +# define __NR_setresgid 210 +# endif +# ifndef __NR_getresgid +# define __NR_getresgid 211 +# endif +# ifndef __NR_setfsuid +# define __NR_setfsuid 215 +# endif +# ifndef __NR_setfsgid +# define __NR_setfsgid 216 +# endif +# ifndef __NR_fadvise64 +# define __NR_fadvise64 253 +# endif +# ifndef __NR_newfstatat +# define __NR_newfstatat 293 +# endif +#else /* __s390x__ */ +# ifndef __NR_getrlimit +# define __NR_getrlimit 76 +# endif +# ifndef __NR_setfsuid +# define __NR_setfsuid 138 +# endif +# ifndef __NR_setfsgid +# define __NR_setfsgid 139 +# endif +# ifndef __NR_setresuid +# define __NR_setresuid 164 +# endif +# ifndef __NR_getresuid +# define __NR_getresuid 165 +# endif +# ifndef __NR_setresgid +# define __NR_setresgid 170 +# endif +# ifndef __NR_getresgid +# define __NR_getresgid 171 +# endif +# ifndef __NR_ugetrlimit +# define __NR_ugetrlimit 191 +# endif +# ifndef __NR_mmap2 +# define __NR_mmap2 192 +# endif +# ifndef __NR_setresuid32 +# define __NR_setresuid32 208 +# endif +# ifndef __NR_getresuid32 +# define __NR_getresuid32 209 +# endif +# ifndef __NR_setresgid32 +# define __NR_setresgid32 210 +# endif +# ifndef __NR_getresgid32 +# define __NR_getresgid32 211 +# endif +# ifndef __NR_setfsuid32 +# define __NR_setfsuid32 215 +# endif +# ifndef __NR_setfsgid32 +# define __NR_setfsgid32 216 +# endif +# ifndef __NR_fadvise64_64 +# define __NR_fadvise64_64 264 +# endif +# ifndef __NR_fstatat64 +# define __NR_fstatat64 293 +# endif +#endif /* __s390__ */ +/* End of s390/s390x definitions */ +#endif + + +/* After forking, we must make sure to only call system calls. */ +#if defined(__BOUNDED_POINTERS__) + #error "Need to port invocations of syscalls for bounded ptrs" +#else + /* The core dumper and the thread lister get executed after threads + * have been suspended. As a consequence, we cannot call any functions + * that acquire locks. Unfortunately, libc wraps most system calls + * (e.g. in order to implement pthread_atfork, and to make calls + * cancellable), which means we cannot call these functions. Instead, + * we have to call syscall() directly. + */ + #undef LSS_ERRNO + #ifdef SYS_ERRNO + /* Allow the including file to override the location of errno. This can + * be useful when using clone() with the CLONE_VM option. + */ + #define LSS_ERRNO SYS_ERRNO + #else + #define LSS_ERRNO errno + #endif + + #undef LSS_INLINE + #ifdef SYS_INLINE + #define LSS_INLINE SYS_INLINE + #else + #define LSS_INLINE static inline + #endif + + /* Allow the including file to override the prefix used for all new + * system calls. By default, it will be set to "sys_". + */ + #undef LSS_NAME + #ifndef SYS_PREFIX + #define LSS_NAME(name) sys_##name + #elif defined(SYS_PREFIX) && SYS_PREFIX < 0 + #define LSS_NAME(name) name + #elif defined(SYS_PREFIX) && SYS_PREFIX == 0 + #define LSS_NAME(name) sys0_##name + #elif defined(SYS_PREFIX) && SYS_PREFIX == 1 + #define LSS_NAME(name) sys1_##name + #elif defined(SYS_PREFIX) && SYS_PREFIX == 2 + #define LSS_NAME(name) sys2_##name + #elif defined(SYS_PREFIX) && SYS_PREFIX == 3 + #define LSS_NAME(name) sys3_##name + #elif defined(SYS_PREFIX) && SYS_PREFIX == 4 + #define LSS_NAME(name) sys4_##name + #elif defined(SYS_PREFIX) && SYS_PREFIX == 5 + #define LSS_NAME(name) sys5_##name + #elif defined(SYS_PREFIX) && SYS_PREFIX == 6 + #define LSS_NAME(name) sys6_##name + #elif defined(SYS_PREFIX) && SYS_PREFIX == 7 + #define LSS_NAME(name) sys7_##name + #elif defined(SYS_PREFIX) && SYS_PREFIX == 8 + #define LSS_NAME(name) sys8_##name + #elif defined(SYS_PREFIX) && SYS_PREFIX == 9 + #define LSS_NAME(name) sys9_##name + #endif + + #undef LSS_RETURN + #if defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) \ + || defined(__ARM_EABI__) || defined(__aarch64__) || defined(__s390__) \ + || defined(__e2k__) || defined(__riscv) || defined(__loongarch_lp64) + /* Failing system calls return a negative result in the range of + * -1..-4095. These are "errno" values with the sign inverted. + */ + #define LSS_RETURN(type, res) \ + do { \ + if ((unsigned long)(res) >= (unsigned long)(-4095)) { \ + LSS_ERRNO = (int)(-(res)); \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) + #elif defined(__mips__) + /* On MIPS, failing system calls return -1, and set errno in a + * separate CPU register. + */ + #define LSS_RETURN(type, res, err) \ + do { \ + if (err) { \ + unsigned long __errnovalue = (res); \ + LSS_ERRNO = __errnovalue; \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) + #elif defined(__PPC__) + /* On PPC, failing system calls return -1, and set errno in a + * separate CPU register. See linux/unistd.h. + */ + #define LSS_RETURN(type, res, err) \ + do { \ + if (err & 0x10000000 ) { \ + LSS_ERRNO = (res); \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) + #endif + #if defined(__i386__) + /* In PIC mode (e.g. when building shared libraries), gcc for i386 + * reserves ebx. Unfortunately, most distribution ship with implementations + * of _syscallX() which clobber ebx. + * Also, most definitions of _syscallX() neglect to mark "memory" as being + * clobbered. This causes problems with compilers, that do a better job + * at optimizing across __asm__ calls. + * So, we just have to redefine all of the _syscallX() macros. + */ + #undef LSS_ENTRYPOINT + #ifdef SYS_SYSCALL_ENTRYPOINT + static inline void (**LSS_NAME(get_syscall_entrypoint)(void))(void) { + void (**entrypoint)(void); + asm volatile(".bss\n" + ".align 8\n" + ".globl " SYS_SYSCALL_ENTRYPOINT "\n" + ".common " SYS_SYSCALL_ENTRYPOINT ",8,8\n" + ".previous\n" + /* This logically does 'lea "SYS_SYSCALL_ENTRYPOINT", %0' */ + "call 0f\n" + "0:pop %0\n" + "add $_GLOBAL_OFFSET_TABLE_+[.-0b], %0\n" + "mov " SYS_SYSCALL_ENTRYPOINT "@GOT(%0), %0\n" + : "=r"(entrypoint)); + return entrypoint; + } + + #define LSS_ENTRYPOINT ".bss\n" \ + ".align 8\n" \ + ".globl " SYS_SYSCALL_ENTRYPOINT "\n" \ + ".common " SYS_SYSCALL_ENTRYPOINT ",8,8\n" \ + ".previous\n" \ + /* Check the SYS_SYSCALL_ENTRYPOINT vector */ \ + "push %%eax\n" \ + "call 10000f\n" \ + "10000:pop %%eax\n" \ + "add $_GLOBAL_OFFSET_TABLE_+[.-10000b], %%eax\n" \ + "mov " SYS_SYSCALL_ENTRYPOINT \ + "@GOT(%%eax), %%eax\n" \ + "mov 0(%%eax), %%eax\n" \ + "test %%eax, %%eax\n" \ + "jz 10002f\n" \ + "push %%eax\n" \ + "call 10001f\n" \ + "10001:pop %%eax\n" \ + "add $(10003f-10001b), %%eax\n" \ + "xchg 4(%%esp), %%eax\n" \ + "ret\n" \ + "10002:pop %%eax\n" \ + "int $0x80\n" \ + "10003:\n" + #else + #define LSS_ENTRYPOINT "int $0x80\n" + #endif + #undef LSS_BODY + #define LSS_BODY(type,args...) \ + long __res; \ + __asm__ __volatile__("push %%ebx\n" \ + "movl %2,%%ebx\n" \ + LSS_ENTRYPOINT \ + "pop %%ebx" \ + args \ + : "memory"); \ + LSS_RETURN(type,__res) + #undef _syscall0 + #define _syscall0(type,name) \ + type LSS_NAME(name)(void) { \ + long __res; \ + __asm__ volatile(LSS_ENTRYPOINT \ + : "=a" (__res) \ + : "0" (__NR_##name) \ + : "memory"); \ + LSS_RETURN(type,__res); \ + } + #undef _syscall1 + #define _syscall1(type,name,type1,arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name), "ri" ((long)(arg1))); \ + } + #undef _syscall2 + #define _syscall2(type,name,type1,arg1,type2,arg2) \ + type LSS_NAME(name)(type1 arg1,type2 arg2) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name),"ri" ((long)(arg1)), "c" ((long)(arg2))); \ + } + #undef _syscall3 + #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ + type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \ + "d" ((long)(arg3))); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \ + "d" ((long)(arg3)),"S" ((long)(arg4))); \ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + long __res; \ + __asm__ __volatile__("push %%ebx\n" \ + "movl %2,%%ebx\n" \ + "movl %1,%%eax\n" \ + LSS_ENTRYPOINT \ + "pop %%ebx" \ + : "=a" (__res) \ + : "i" (__NR_##name), "ri" ((long)(arg1)), \ + "c" ((long)(arg2)), "d" ((long)(arg3)), \ + "S" ((long)(arg4)), "D" ((long)(arg5)) \ + : "memory"); \ + LSS_RETURN(type,__res); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + long __res; \ + struct { long __a1; long __a6; } __s = { (long)arg1, (long) arg6 }; \ + __asm__ __volatile__("push %%ebp\n" \ + "push %%ebx\n" \ + "movl 4(%2),%%ebp\n" \ + "movl 0(%2), %%ebx\n" \ + "movl %1,%%eax\n" \ + LSS_ENTRYPOINT \ + "pop %%ebx\n" \ + "pop %%ebp" \ + : "=a" (__res) \ + : "i" (__NR_##name), "0" ((long)(&__s)), \ + "c" ((long)(arg2)), "d" ((long)(arg3)), \ + "S" ((long)(arg4)), "D" ((long)(arg5)) \ + : "memory"); \ + LSS_RETURN(type,__res); \ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __res; + __asm__ __volatile__(/* if (fn == NULL) + * return -EINVAL; + */ + "movl %3,%%ecx\n" + "jecxz 1f\n" + + /* if (child_stack == NULL) + * return -EINVAL; + */ + "movl %4,%%ecx\n" + "jecxz 1f\n" + + /* Set up alignment of the child stack: + * child_stack = (child_stack & ~0xF) - 20; + */ + "andl $-16,%%ecx\n" + "subl $20,%%ecx\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "movl %6,%%eax\n" + "movl %%eax,4(%%ecx)\n" + "movl %3,%%eax\n" + "movl %%eax,(%%ecx)\n" + + /* %eax = syscall(%eax = __NR_clone, + * %ebx = flags, + * %ecx = child_stack, + * %edx = parent_tidptr, + * %esi = newtls, + * %edi = child_tidptr) + * Also, make sure that %ebx gets preserved as it is + * used in PIC mode. + */ + "movl %8,%%esi\n" + "movl %7,%%edx\n" + "movl %5,%%eax\n" + "movl %9,%%edi\n" + "pushl %%ebx\n" + "movl %%eax,%%ebx\n" + "movl %2,%%eax\n" + LSS_ENTRYPOINT + + /* In the parent: restore %ebx + * In the child: move "fn" into %ebx + */ + "popl %%ebx\n" + + /* if (%eax != 0) + * return %eax; + */ + "test %%eax,%%eax\n" + "jnz 1f\n" + + /* In the child, now. Terminate frame pointer chain. + */ + "movl $0,%%ebp\n" + + /* Call "fn". "arg" is already on the stack. + */ + "call *%%ebx\n" + + /* Call _exit(%ebx). Unfortunately older versions + * of gcc restrict the number of arguments that can + * be passed to asm(). So, we need to hard-code the + * system call number. + */ + "movl %%eax,%%ebx\n" + "movl $1,%%eax\n" + LSS_ENTRYPOINT + + /* Return to parent. + */ + "1:\n" + : "=a" (__res) + : "0"(-EINVAL), "i"(__NR_clone), + "m"(fn), "m"(child_stack), "m"(flags), "m"(arg), + "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr) + : "memory", "ecx", "edx", "esi", "edi"); + LSS_RETURN(int, __res); + } + + LSS_INLINE _syscall1(int, set_thread_area, void *, u) + LSS_INLINE _syscall1(int, get_thread_area, void *, u) + + LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { + /* On i386, the kernel does not know how to return from a signal + * handler. Instead, it relies on user space to provide a + * restorer function that calls the {rt_,}sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + void (*res)(void); + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:movl %1,%%eax\n" + LSS_ENTRYPOINT + "2:popl %0\n" + "addl $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_rt_sigreturn)); + return res; + } + LSS_INLINE void (*LSS_NAME(restore)(void))(void) { + /* On i386, the kernel does not know how to return from a signal + * handler. Instead, it relies on user space to provide a + * restorer function that calls the {rt_,}sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + void (*res)(void); + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:pop %%eax\n" + "movl %1,%%eax\n" + LSS_ENTRYPOINT + "2:popl %0\n" + "addl $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_sigreturn)); + return res; + } + #elif defined(__x86_64__) + /* There are no known problems with any of the _syscallX() macros + * currently shipping for x86_64, but we still need to be able to define + * our own version so that we can override the location of the errno + * location (e.g. when using the clone() system call with the CLONE_VM + * option). + */ + #undef LSS_ENTRYPOINT + #ifdef SYS_SYSCALL_ENTRYPOINT + static inline void (**LSS_NAME(get_syscall_entrypoint)(void))(void) { + void (**entrypoint)(void); + asm volatile(".bss\n" + ".align 8\n" + ".globl " SYS_SYSCALL_ENTRYPOINT "\n" + ".common " SYS_SYSCALL_ENTRYPOINT ",8,8\n" + ".previous\n" + "mov " SYS_SYSCALL_ENTRYPOINT "@GOTPCREL(%%rip), %0\n" + : "=r"(entrypoint)); + return entrypoint; + } + + #define LSS_ENTRYPOINT \ + ".bss\n" \ + ".align 8\n" \ + ".globl " SYS_SYSCALL_ENTRYPOINT "\n" \ + ".common " SYS_SYSCALL_ENTRYPOINT ",8,8\n" \ + ".previous\n" \ + "mov " SYS_SYSCALL_ENTRYPOINT "@GOTPCREL(%%rip), %%rcx\n" \ + "mov 0(%%rcx), %%rcx\n" \ + "test %%rcx, %%rcx\n" \ + "jz 10001f\n" \ + "call *%%rcx\n" \ + "jmp 10002f\n" \ + "10001:syscall\n" \ + "10002:\n" + + #else + #define LSS_ENTRYPOINT "syscall\n" + #endif + + /* The x32 ABI has 32 bit longs, but the syscall interface is 64 bit. + * We need to explicitly cast to an unsigned 64 bit type to avoid implicit + * sign extension. We can't cast pointers directly because those are + * 32 bits, and gcc will dump ugly warnings about casting from a pointer + * to an integer of a different size. + */ + #undef LSS_SYSCALL_ARG + #define LSS_SYSCALL_ARG(a) ((uint64_t)(uintptr_t)(a)) + #undef _LSS_RETURN + #define _LSS_RETURN(type, res, cast) \ + do { \ + if ((uint64_t)(res) >= (uint64_t)(-4095)) { \ + LSS_ERRNO = (int)(-(res)); \ + res = -1; \ + } \ + return (type)(cast)(res); \ + } while (0) + #undef LSS_RETURN + #define LSS_RETURN(type, res) _LSS_RETURN(type, res, uintptr_t) + + #undef _LSS_BODY + #define _LSS_BODY(nr, type, name, cast, ...) \ + long long __res; \ + __asm__ __volatile__(LSS_BODY_ASM##nr LSS_ENTRYPOINT \ + : "=a" (__res) \ + : "0" (__NR_##name) LSS_BODY_ARG##nr(__VA_ARGS__) \ + : LSS_BODY_CLOBBER##nr "r11", "rcx", "memory"); \ + _LSS_RETURN(type, __res, cast) + #undef LSS_BODY + #define LSS_BODY(nr, type, name, args...) \ + _LSS_BODY(nr, type, name, uintptr_t, ## args) + + #undef LSS_BODY_ASM0 + #undef LSS_BODY_ASM1 + #undef LSS_BODY_ASM2 + #undef LSS_BODY_ASM3 + #undef LSS_BODY_ASM4 + #undef LSS_BODY_ASM5 + #undef LSS_BODY_ASM6 + #define LSS_BODY_ASM0 + #define LSS_BODY_ASM1 LSS_BODY_ASM0 + #define LSS_BODY_ASM2 LSS_BODY_ASM1 + #define LSS_BODY_ASM3 LSS_BODY_ASM2 + #define LSS_BODY_ASM4 LSS_BODY_ASM3 "movq %5,%%r10;" + #define LSS_BODY_ASM5 LSS_BODY_ASM4 "movq %6,%%r8;" + #define LSS_BODY_ASM6 LSS_BODY_ASM5 "movq %7,%%r9;" + + #undef LSS_BODY_CLOBBER0 + #undef LSS_BODY_CLOBBER1 + #undef LSS_BODY_CLOBBER2 + #undef LSS_BODY_CLOBBER3 + #undef LSS_BODY_CLOBBER4 + #undef LSS_BODY_CLOBBER5 + #undef LSS_BODY_CLOBBER6 + #define LSS_BODY_CLOBBER0 + #define LSS_BODY_CLOBBER1 LSS_BODY_CLOBBER0 + #define LSS_BODY_CLOBBER2 LSS_BODY_CLOBBER1 + #define LSS_BODY_CLOBBER3 LSS_BODY_CLOBBER2 + #define LSS_BODY_CLOBBER4 LSS_BODY_CLOBBER3 "r10", + #define LSS_BODY_CLOBBER5 LSS_BODY_CLOBBER4 "r8", + #define LSS_BODY_CLOBBER6 LSS_BODY_CLOBBER5 "r9", + + #undef LSS_BODY_ARG0 + #undef LSS_BODY_ARG1 + #undef LSS_BODY_ARG2 + #undef LSS_BODY_ARG3 + #undef LSS_BODY_ARG4 + #undef LSS_BODY_ARG5 + #undef LSS_BODY_ARG6 + #define LSS_BODY_ARG0() + #define LSS_BODY_ARG1(arg1) \ + LSS_BODY_ARG0(), "D" (arg1) + #define LSS_BODY_ARG2(arg1, arg2) \ + LSS_BODY_ARG1(arg1), "S" (arg2) + #define LSS_BODY_ARG3(arg1, arg2, arg3) \ + LSS_BODY_ARG2(arg1, arg2), "d" (arg3) + #define LSS_BODY_ARG4(arg1, arg2, arg3, arg4) \ + LSS_BODY_ARG3(arg1, arg2, arg3), "r" (arg4) + #define LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5) \ + LSS_BODY_ARG4(arg1, arg2, arg3, arg4), "r" (arg5) + #define LSS_BODY_ARG6(arg1, arg2, arg3, arg4, arg5, arg6) \ + LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5), "r" (arg6) + + #undef _syscall0 + #define _syscall0(type,name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(0, type, name); \ + } + #undef _syscall1 + #define _syscall1(type,name,type1,arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(1, type, name, LSS_SYSCALL_ARG(arg1)); \ + } + #undef _syscall2 + #define _syscall2(type,name,type1,arg1,type2,arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_BODY(2, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2));\ + } + #undef _syscall3 + #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_BODY(3, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(4, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4));\ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_BODY(5, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \ + LSS_SYSCALL_ARG(arg5)); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_BODY(6, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \ + LSS_SYSCALL_ARG(arg5), LSS_SYSCALL_ARG(arg6));\ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long long __res; + { + __asm__ __volatile__(/* if (fn == NULL) + * return -EINVAL; + */ + "testq %4,%4\n" + "jz 1f\n" + + /* if (child_stack == NULL) + * return -EINVAL; + */ + "testq %5,%5\n" + "jz 1f\n" + + /* childstack -= 2*sizeof(void *); + */ + "subq $16,%5\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "movq %7,8(%5)\n" + "movq %4,0(%5)\n" + + /* %rax = syscall(%rax = __NR_clone, + * %rdi = flags, + * %rsi = child_stack, + * %rdx = parent_tidptr, + * %r8 = new_tls, + * %r10 = child_tidptr) + */ + "movq %2,%%rax\n" + "movq %9,%%r8\n" + "movq %10,%%r10\n" + LSS_ENTRYPOINT + + /* if (%rax != 0) + * return; + */ + "testq %%rax,%%rax\n" + "jnz 1f\n" + + /* In the child. Terminate frame pointer chain. + */ + "xorq %%rbp,%%rbp\n" + + /* Call "fn(arg)". + */ + "popq %%rax\n" + "popq %%rdi\n" + "call *%%rax\n" + + /* Call _exit(%ebx). + */ + "movq %%rax,%%rdi\n" + "movq %3,%%rax\n" + LSS_ENTRYPOINT + + /* Return to parent. + */ + "1:\n" + : "=a" (__res) + : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), + "r"(LSS_SYSCALL_ARG(fn)), + "S"(LSS_SYSCALL_ARG(child_stack)), + "D"(LSS_SYSCALL_ARG(flags)), + "r"(LSS_SYSCALL_ARG(arg)), + "d"(LSS_SYSCALL_ARG(parent_tidptr)), + "r"(LSS_SYSCALL_ARG(newtls)), + "r"(LSS_SYSCALL_ARG(child_tidptr)) + : "memory", "r8", "r10", "r11", "rcx"); + } + LSS_RETURN(int, __res); + } + LSS_INLINE _syscall2(int, arch_prctl, int, c, void *, a) + + LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { + /* On x86-64, the kernel does not know how to return from + * a signal handler. Instead, it relies on user space to provide a + * restorer function that calls the rt_sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + long long res; + __asm__ __volatile__("jmp 2f\n" + ".align 16\n" + "1:movq %1,%%rax\n" + LSS_ENTRYPOINT + "2:leaq 1b(%%rip),%0\n" + : "=r" (res) + : "i" (__NR_rt_sigreturn)); + return (void (*)(void))(uintptr_t)res; + } + #elif defined(__ARM_ARCH_3__) + /* Most definitions of _syscallX() neglect to mark "memory" as being + * clobbered. This causes problems with compilers, that do a better job + * at optimizing across __asm__ calls. + * So, we just have to redefine all of the _syscallX() macros. + */ + #undef LSS_REG + #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a + #undef LSS_BODY + #define LSS_BODY(type,name,args...) \ + register long __res_r0 __asm__("r0"); \ + long __res; \ + __asm__ __volatile__ (__syscall(name) \ + : "=r"(__res_r0) : args : "lr", "memory"); \ + __res = __res_r0; \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__r0)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4)); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4), "r"(__r5)); \ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __res; + { + register int __flags __asm__("r0") = flags; + register void *__stack __asm__("r1") = child_stack; + register void *__ptid __asm__("r2") = parent_tidptr; + register void *__tls __asm__("r3") = newtls; + register int *__ctid __asm__("r4") = child_tidptr; + __asm__ __volatile__(/* if (fn == NULL || child_stack == NULL) + * return -EINVAL; + */ + "cmp %2,#0\n" + "cmpne %3,#0\n" + "moveq %0,%1\n" + "beq 1f\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "str %5,[%3,#-4]!\n" + "str %2,[%3,#-4]!\n" + + /* %r0 = syscall(%r0 = flags, + * %r1 = child_stack, + * %r2 = parent_tidptr, + * %r3 = newtls, + * %r4 = child_tidptr) + */ + __syscall(clone)"\n" + + /* if (%r0 != 0) + * return %r0; + */ + "movs %0,r0\n" + "bne 1f\n" + + /* In the child, now. Call "fn(arg)". + */ + "ldr r0,[sp, #4]\n" + "mov lr,pc\n" + "ldr pc,[sp]\n" + + /* Call _exit(%r0). + */ + __syscall(exit)"\n" + "1:\n" + : "=r" (__res) + : "i"(-EINVAL), + "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), + "r"(__ptid), "r"(__tls), "r"(__ctid) + : "cc", "lr", "memory"); + } + LSS_RETURN(int, __res); + } + #elif defined(__ARM_EABI__) + /* Most definitions of _syscallX() neglect to mark "memory" as being + * clobbered. This causes problems with compilers, that do a better job + * at optimizing across __asm__ calls. + * So, we just have to redefine all fo the _syscallX() macros. + */ + #undef LSS_REG + #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a + #undef LSS_BODY + #define LSS_BODY(type,name,args...) \ + register long __res_r0 __asm__("r0"); \ + long __res; \ + __asm__ __volatile__ ("push {r7}\n" \ + "mov r7, %1\n" \ + "swi 0x0\n" \ + "pop {r7}\n" \ + : "=r"(__res_r0) \ + : "i"(__NR_##name) , ## args \ + : "lr", "memory"); \ + __res = __res_r0; \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__r0)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4)); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4), "r"(__r5)); \ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __res; + if (fn == NULL || child_stack == NULL) { + __res = -EINVAL; + LSS_RETURN(int, __res); + } + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + { + uintptr_t* cstack = (uintptr_t*)child_stack - 2; + cstack[0] = (uintptr_t)fn; + cstack[1] = (uintptr_t)arg; + child_stack = cstack; + } + { + register int __flags __asm__("r0") = flags; + register void *__stack __asm__("r1") = child_stack; + register void *__ptid __asm__("r2") = parent_tidptr; + register void *__tls __asm__("r3") = newtls; + register int *__ctid __asm__("r4") = child_tidptr; + __asm__ __volatile__( +#ifdef __thumb2__ + "push {r7}\n" +#endif + /* %r0 = syscall(%r0 = flags, + * %r1 = child_stack, + * %r2 = parent_tidptr, + * %r3 = newtls, + * %r4 = child_tidptr) + */ + "mov r7, %6\n" + "swi 0x0\n" + + /* if (%r0 != 0) + * return %r0; + */ + "cmp r0, #0\n" + "bne 1f\n" + + /* In the child, now. Call "fn(arg)". + */ + "ldr r0,[sp, #4]\n" + + "ldr lr,[sp]\n" + "blx lr\n" + + /* Call _exit(%r0). + */ + "mov r7, %7\n" + "swi 0x0\n" + /* Unreachable */ + "bkpt #0\n" + "1:\n" +#ifdef __thumb2__ + "pop {r7}\n" +#endif + "movs %0,r0\n" + : "=r"(__res) + : "r"(__stack), "r"(__flags), "r"(__ptid), "r"(__tls), "r"(__ctid), + "i"(__NR_clone), "i"(__NR_exit) + : "cc", "lr", "memory" +#ifndef __thumb2__ + , "r7" +#endif + ); + } + LSS_RETURN(int, __res); + } + #elif defined(__aarch64__) + /* Most definitions of _syscallX() neglect to mark "memory" as being + * clobbered. This causes problems with compilers, that do a better job + * at optimizing across __asm__ calls. + * So, we just have to redefine all of the _syscallX() macros. + */ + #undef LSS_REG + #define LSS_REG(r,a) register int64_t __r##r __asm__("x"#r) = (int64_t)a + #undef LSS_BODY + #define LSS_BODY(type,name,args...) \ + register int64_t __res_x0 __asm__("x0"); \ + int64_t __res; \ + __asm__ __volatile__ ("mov x8, %1\n" \ + "svc 0x0\n" \ + : "=r"(__res_x0) \ + : "i"(__NR_##name) , ## args \ + : "x8", "memory"); \ + __res = __res_x0; \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__r0)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4)); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4), "r"(__r5)); \ + } + + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + int64_t __res; + { + register uint64_t __flags __asm__("x0") = (uint64_t)flags; + register void *__stack __asm__("x1") = child_stack; + register void *__ptid __asm__("x2") = parent_tidptr; + register void *__tls __asm__("x3") = newtls; + register int *__ctid __asm__("x4") = child_tidptr; + __asm__ __volatile__(/* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "stp %1, %4, [%2, #-16]!\n" + + /* %x0 = syscall(%x0 = flags, + * %x1 = child_stack, + * %x2 = parent_tidptr, + * %x3 = newtls, + * %x4 = child_tidptr) + */ + "mov x8, %8\n" + "svc 0x0\n" + + /* if (%r0 != 0) + * return %r0; + */ + "mov %0, x0\n" + "cbnz x0, 1f\n" + + /* In the child, now. Call "fn(arg)". + */ + "ldp x1, x0, [sp], #16\n" + "blr x1\n" + + /* Call _exit(%r0). + */ + "mov x8, %9\n" + "svc 0x0\n" + "1:\n" + : "=r" (__res) + : "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), + "r"(__ptid), "r"(__tls), "r"(__ctid), + "i"(__NR_clone), "i"(__NR_exit) + : "cc", "x8", "memory"); + } + LSS_RETURN(int, __res); + } + #elif defined(__mips__) + #undef LSS_REG + #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) = \ + (unsigned long)(a) + #undef LSS_BODY + #undef LSS_SYSCALL_CLOBBERS + #if _MIPS_SIM == _MIPS_SIM_ABI32 + #define LSS_SYSCALL_CLOBBERS "$1", "$3", "$8", "$9", "$10", \ + "$11", "$12", "$13", "$14", "$15", \ + "$24", "$25", "hi", "lo", "memory" + #else + #define LSS_SYSCALL_CLOBBERS "$1", "$3", "$10", "$11", "$12", \ + "$13", "$14", "$15", "$24", "$25", \ + "hi", "lo", "memory" + #endif + #define LSS_BODY(type,name,r7,...) \ + register unsigned long __v0 __asm__("$2") = __NR_##name; \ + __asm__ __volatile__ ("syscall\n" \ + : "=r"(__v0), r7 (__r7) \ + : "0"(__v0), ##__VA_ARGS__ \ + : LSS_SYSCALL_CLOBBERS); \ + LSS_RETURN(type, __v0, __r7) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_BODY(type, name, "=r"); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_REG(4, arg1); LSS_BODY(type, name, "=r", "r"(__r4)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_REG(4, arg1); LSS_REG(5, arg2); \ + LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5), "r"(__r6)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6)); \ + } + #undef _syscall5 + #if _MIPS_SIM == _MIPS_SIM_ABI32 + /* The old 32bit MIPS system call API passes the fifth and sixth argument + * on the stack, whereas the new APIs use registers "r8" and "r9". + */ + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + register unsigned long __v0 __asm__("$2") = __NR_##name; \ + __asm__ __volatile__ (".set noreorder\n" \ + "subu $29, 32\n" \ + "sw %5, 16($29)\n" \ + "syscall\n" \ + "addiu $29, 32\n" \ + ".set reorder\n" \ + : "+r"(__v0), "+r" (__r7) \ + : "r"(__r4), "r"(__r5), \ + "r"(__r6), "r" ((unsigned long)arg5) \ + : "$8", "$9", "$10", "$11", "$12", \ + "$13", "$14", "$15", "$24", "$25", \ + "memory"); \ + LSS_RETURN(type, __v0, __r7); \ + } + #else + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); LSS_REG(8, arg5); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \ + "r"(__r8)); \ + } + #endif + #undef _syscall6 + #if _MIPS_SIM == _MIPS_SIM_ABI32 + /* The old 32bit MIPS system call API passes the fifth and sixth argument + * on the stack, whereas the new APIs use registers "r8" and "r9". + */ + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + register unsigned long __v0 __asm__("$2") = __NR_##name; \ + __asm__ __volatile__ (".set noreorder\n" \ + "subu $29, 32\n" \ + "sw %5, 16($29)\n" \ + "sw %6, 20($29)\n" \ + "syscall\n" \ + "addiu $29, 32\n" \ + ".set reorder\n" \ + : "+r"(__v0), "+r" (__r7) \ + : "r"(__r4), "r"(__r5), \ + "r"(__r6), "r" ((unsigned long)arg5), \ + "r" ((unsigned long)arg6) \ + : "$8", "$9", "$10", "$11", "$12", \ + "$13", "$14", "$15", "$24", "$25", \ + "memory"); \ + LSS_RETURN(type, __v0, __r7); \ + } + #else + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5,type6 arg6) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); LSS_REG(8, arg5); LSS_REG(9, arg6); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \ + "r"(__r8), "r"(__r9)); \ + } + #endif + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + register unsigned long __v0 __asm__("$2") = -EINVAL; + register unsigned long __r7 __asm__("$7") = (unsigned long)newtls; + { + register int __flags __asm__("$4") = flags; + register void *__stack __asm__("$5") = child_stack; + register void *__ptid __asm__("$6") = parent_tidptr; + register int *__ctid __asm__("$8") = child_tidptr; + __asm__ __volatile__( + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "subu $29,24\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "sub $29,16\n" + #else + "dsubu $29,16\n" + #endif + + /* if (fn == NULL || child_stack == NULL) + * return -EINVAL; + */ + "beqz %4,1f\n" + "beqz %5,1f\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "subu %5,32\n" + "sw %4,0(%5)\n" + "sw %7,4(%5)\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "sub %5,32\n" + "sw %4,0(%5)\n" + "sw %7,8(%5)\n" + #else + "dsubu %5,32\n" + "sd %4,0(%5)\n" + "sd %7,8(%5)\n" + #endif + + /* $7 = syscall($4 = flags, + * $5 = child_stack, + * $6 = parent_tidptr, + * $7 = newtls, + * $8 = child_tidptr) + */ + "li $2,%2\n" + "syscall\n" + + /* if ($7 != 0) + * return $2; + */ + "bnez $7,1f\n" + "bnez $2,1f\n" + + /* In the child, now. Call "fn(arg)". + */ + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "lw $25,0($29)\n" + "lw $4,4($29)\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "lw $25,0($29)\n" + "lw $4,8($29)\n" + #else + "ld $25,0($29)\n" + "ld $4,8($29)\n" + #endif + "jalr $25\n" + + /* Call _exit($2) + */ + "move $4,$2\n" + "li $2,%3\n" + "syscall\n" + + "1:\n" + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "addu $29, 24\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "add $29, 16\n" + #else + "daddu $29,16\n" + #endif + : "+r" (__v0), "+r" (__r7) + : "i"(__NR_clone), "i"(__NR_exit), "r"(fn), + "r"(__stack), "r"(__flags), "r"(arg), + "r"(__ptid), "r"(__ctid) + : "$9", "$10", "$11", "$12", "$13", "$14", "$15", + "$24", "$25", "memory"); + } + LSS_RETURN(int, __v0, __r7); + } + #elif defined (__PPC__) + #undef LSS_LOADARGS_0 + #define LSS_LOADARGS_0(name, dummy...) \ + __sc_0 = __NR_##name + #undef LSS_LOADARGS_1 + #define LSS_LOADARGS_1(name, arg1) \ + LSS_LOADARGS_0(name); \ + __sc_3 = (unsigned long) (arg1) + #undef LSS_LOADARGS_2 + #define LSS_LOADARGS_2(name, arg1, arg2) \ + LSS_LOADARGS_1(name, arg1); \ + __sc_4 = (unsigned long) (arg2) + #undef LSS_LOADARGS_3 + #define LSS_LOADARGS_3(name, arg1, arg2, arg3) \ + LSS_LOADARGS_2(name, arg1, arg2); \ + __sc_5 = (unsigned long) (arg3) + #undef LSS_LOADARGS_4 + #define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4) \ + LSS_LOADARGS_3(name, arg1, arg2, arg3); \ + __sc_6 = (unsigned long) (arg4) + #undef LSS_LOADARGS_5 + #define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5) \ + LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4); \ + __sc_7 = (unsigned long) (arg5) + #undef LSS_LOADARGS_6 + #define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6) \ + LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5); \ + __sc_8 = (unsigned long) (arg6) + #undef LSS_ASMINPUT_0 + #define LSS_ASMINPUT_0 "0" (__sc_0) + #undef LSS_ASMINPUT_1 + #define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1" (__sc_3) + #undef LSS_ASMINPUT_2 + #define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2" (__sc_4) + #undef LSS_ASMINPUT_3 + #define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3" (__sc_5) + #undef LSS_ASMINPUT_4 + #define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4" (__sc_6) + #undef LSS_ASMINPUT_5 + #define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5" (__sc_7) + #undef LSS_ASMINPUT_6 + #define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6" (__sc_8) + #undef LSS_BODY + #define LSS_BODY(nr, type, name, args...) \ + long __sc_ret, __sc_err; \ + { \ + register unsigned long __sc_0 __asm__ ("r0"); \ + register unsigned long __sc_3 __asm__ ("r3"); \ + register unsigned long __sc_4 __asm__ ("r4"); \ + register unsigned long __sc_5 __asm__ ("r5"); \ + register unsigned long __sc_6 __asm__ ("r6"); \ + register unsigned long __sc_7 __asm__ ("r7"); \ + register unsigned long __sc_8 __asm__ ("r8"); \ + \ + LSS_LOADARGS_##nr(name, args); \ + __asm__ __volatile__ \ + ("sc\n\t" \ + "mfcr %0" \ + : "=&r" (__sc_0), \ + "=&r" (__sc_3), "=&r" (__sc_4), \ + "=&r" (__sc_5), "=&r" (__sc_6), \ + "=&r" (__sc_7), "=&r" (__sc_8) \ + : LSS_ASMINPUT_##nr \ + : "cr0", "ctr", "memory", \ + "r9", "r10", "r11", "r12"); \ + __sc_ret = __sc_3; \ + __sc_err = __sc_0; \ + } \ + LSS_RETURN(type, __sc_ret, __sc_err) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(0, type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(1, type, name, arg1); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_BODY(2, type, name, arg1, arg2); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_BODY(3, type, name, arg1, arg2, arg3); \ + } + #undef _syscall4 + #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(4, type, name, arg1, arg2, arg3, arg4); \ + } + #undef _syscall5 + #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5); \ + } + #undef _syscall6 + #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6); \ + } + /* clone function adapted from glibc 2.3.6 clone.S */ + /* TODO(csilvers): consider wrapping some args up in a struct, like we + * do for i386's _syscall6, so we can compile successfully on gcc 2.95 + */ + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __ret, __err; + { + register int (*__fn)(void *) __asm__ ("r8") = fn; + register void *__cstack __asm__ ("r4") = child_stack; + register int __flags __asm__ ("r3") = flags; + register void * __arg __asm__ ("r9") = arg; + register int * __ptidptr __asm__ ("r5") = parent_tidptr; + register void * __newtls __asm__ ("r6") = newtls; + register int * __ctidptr __asm__ ("r7") = child_tidptr; + __asm__ __volatile__( + /* check for fn == NULL + * and child_stack == NULL + */ + "cmpwi cr0, %6, 0\n\t" + "cmpwi cr1, %7, 0\n\t" + "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t" + "beq- cr0, 1f\n\t" + + /* set up stack frame for child */ + "clrrwi %7, %7, 4\n\t" + "li 0, 0\n\t" + "stwu 0, -16(%7)\n\t" + + /* fn, arg, child_stack are saved across the syscall: r28-30 */ + "mr 28, %6\n\t" + "mr 29, %7\n\t" + "mr 27, %9\n\t" + + /* syscall */ + "li 0, %4\n\t" + /* flags already in r3 + * child_stack already in r4 + * ptidptr already in r5 + * newtls already in r6 + * ctidptr already in r7 + */ + "sc\n\t" + + /* Test if syscall was successful */ + "cmpwi cr1, 3, 0\n\t" + "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" + "bne- cr1, 1f\n\t" + + /* Do the function call */ + "mtctr 28\n\t" + "mr 3, 27\n\t" + "bctrl\n\t" + + /* Call _exit(r3) */ + "li 0, %5\n\t" + "sc\n\t" + + /* Return to parent */ + "1:\n" + "mfcr %1\n\t" + "mr %0, 3\n\t" + : "=r" (__ret), "=r" (__err) + : "0" (-1), "1" (EINVAL), + "i" (__NR_clone), "i" (__NR_exit), + "r" (__fn), "r" (__cstack), "r" (__flags), + "r" (__arg), "r" (__ptidptr), "r" (__newtls), + "r" (__ctidptr) + : "cr0", "cr1", "memory", "ctr", + "r0", "r29", "r27", "r28"); + } + LSS_RETURN(int, __ret, __err); + } + #elif defined(__s390__) + #undef LSS_REG + #define LSS_REG(r, a) register unsigned long __r##r __asm__("r"#r) = (unsigned long) a + #undef LSS_BODY + #define LSS_BODY(type, name, args...) \ + register unsigned long __nr __asm__("r1") \ + = (unsigned long)(__NR_##name); \ + register long __res_r2 __asm__("r2"); \ + long __res; \ + __asm__ __volatile__ \ + ("svc 0\n\t" \ + : "=d"(__res_r2) \ + : "d"(__nr), ## args \ + : "memory"); \ + __res = __res_r2; \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_REG(2, arg1); \ + LSS_BODY(type, name, "0"(__r2)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_REG(2, arg1); LSS_REG(3, arg2); \ + LSS_BODY(type, name, "0"(__r2), "d"(__r3)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3); \ + LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4)); \ + } + #undef _syscall4 + #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, \ + type4 arg4) { \ + LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3); \ + LSS_REG(5, arg4); \ + LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4), \ + "d"(__r5)); \ + } + #undef _syscall5 + #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, \ + type4 arg4, type5 arg5) { \ + LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3); \ + LSS_REG(5, arg4); LSS_REG(6, arg5); \ + LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4), \ + "d"(__r5), "d"(__r6)); \ + } + #undef _syscall6 + #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, \ + type4 arg4, type5 arg5, type6 arg6) { \ + LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3); \ + LSS_REG(5, arg4); LSS_REG(6, arg5); LSS_REG(7, arg6); \ + LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4), \ + "d"(__r5), "d"(__r6), "d"(__r7)); \ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __ret; + { + register int (*__fn)(void *) __asm__ ("r1") = fn; + register void *__cstack __asm__ ("r2") = child_stack; + register int __flags __asm__ ("r3") = flags; + register void *__arg __asm__ ("r0") = arg; + register int *__ptidptr __asm__ ("r4") = parent_tidptr; + register void *__newtls __asm__ ("r6") = newtls; + register int *__ctidptr __asm__ ("r5") = child_tidptr; + __asm__ __volatile__ ( + #ifndef __s390x__ + /* arg already in r0 */ + "ltr %4, %4\n\t" /* check fn, which is already in r1 */ + "jz 1f\n\t" /* NULL function pointer, return -EINVAL */ + "ltr %5, %5\n\t" /* check child_stack, which is already in r2 */ + "jz 1f\n\t" /* NULL stack pointer, return -EINVAL */ + /* flags already in r3 */ + /* parent_tidptr already in r4 */ + /* child_tidptr already in r5 */ + /* newtls already in r6 */ + "svc %2\n\t" /* invoke clone syscall */ + "ltr %0,%%r2\n\t" /* load return code into __ret and test */ + "jnz 1f\n\t" /* return to parent if non-zero */ + /* start child thread */ + "lr %%r2, %7\n\t" /* set first parameter to void *arg */ + "ahi %%r15, -96\n\t" /* make room on the stack for the save area */ + "xc 0(4,%%r15), 0(%%r15)\n\t" + "basr %%r14, %4\n\t" /* jump to fn */ + "svc %3\n" /* invoke exit syscall */ + "1:\n" + #else + /* arg already in r0 */ + "ltgr %4, %4\n\t" /* check fn, which is already in r1 */ + "jz 1f\n\t" /* NULL function pointer, return -EINVAL */ + "ltgr %5, %5\n\t" /* check child_stack, which is already in r2 */ + "jz 1f\n\t" /* NULL stack pointer, return -EINVAL */ + /* flags already in r3 */ + /* parent_tidptr already in r4 */ + /* child_tidptr already in r5 */ + /* newtls already in r6 */ + "svc %2\n\t" /* invoke clone syscall */ + "ltgr %0, %%r2\n\t" /* load return code into __ret and test */ + "jnz 1f\n\t" /* return to parent if non-zero */ + /* start child thread */ + "lgr %%r2, %7\n\t" /* set first parameter to void *arg */ + "aghi %%r15, -160\n\t" /* make room on the stack for the save area */ + "xc 0(8,%%r15), 0(%%r15)\n\t" + "basr %%r14, %4\n\t" /* jump to fn */ + "svc %3\n" /* invoke exit syscall */ + "1:\n" + #endif + : "=r" (__ret) + : "0" (-EINVAL), "i" (__NR_clone), "i" (__NR_exit), + "d" (__fn), "d" (__cstack), "d" (__flags), "d" (__arg), + "d" (__ptidptr), "d" (__newtls), "d" (__ctidptr) + : "cc", "r14", "memory" + ); + } + LSS_RETURN(int, __ret); + } + #elif defined(__riscv) && __riscv_xlen == 64 + #undef LSS_REG + #define LSS_REG(r,a) register int64_t __r##r __asm__("a"#r) = (int64_t)a + #undef LSS_BODY + #define LSS_BODY(type,name,args...) \ + register int64_t __res_a0 __asm__("a0"); \ + register int64_t __a7 __asm__("a7") = __NR_##name; \ + int64_t __res; \ + __asm__ __volatile__ ("scall\n" \ + : "=r"(__res_a0) \ + : "r"(__a7) , ## args \ + : "memory"); \ + __res = __res_a0; \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__r0)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4)); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4), "r"(__r5)); \ + } + + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + int64_t __res; + { + register int64_t __res_a0 __asm__("a0"); + register uint64_t __flags __asm__("a0") = flags; + register void *__stack __asm__("a1") = child_stack; + register void *__ptid __asm__("a2") = parent_tidptr; + register void *__tls __asm__("a3") = newtls; + register int *__ctid __asm__("a4") = child_tidptr; + __asm__ __volatile__(/* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "addi %2,%2,-16\n" + "sd %1, 0(%2)\n" + "sd %4, 8(%2)\n" + + /* %a0 = syscall(%a0 = flags, + * %a1 = child_stack, + * %a2 = parent_tidptr, + * %a3 = newtls, + * %a4 = child_tidptr) + */ + "li a7, %8\n" + "scall\n" + + /* if (%a0 != 0) + * return %a0; + */ + "bnez %0, 1f\n" + + /* In the child, now. Call "fn(arg)". + */ + "ld a1, 0(sp)\n" + "ld a0, 8(sp)\n" + "jalr a1\n" + + /* Call _exit(%a0). + */ + "li a7, %9\n" + "scall\n" + "1:\n" + : "=r" (__res_a0) + : "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), + "r"(__ptid), "r"(__tls), "r"(__ctid), + "i"(__NR_clone), "i"(__NR_exit) + : "cc", "memory"); + __res = __res_a0; + } + LSS_RETURN(int, __res); + } + #elif defined(__e2k__) + + #undef _LSS_BODY + #define _LSS_BODY(nr, type, name, ...) \ + register unsigned long long __res; \ + __asm__ __volatile__ \ + ( \ + "{\n\t" \ + " sdisp %%ctpr1, 0x3\n\t" \ + " addd, s 0x0, %[sys_num], %%b[0]\n\t" \ + LSS_BODY_ASM##nr \ + "}\n\t" \ + "{\n\t" \ + " call %%ctpr1, wbs = %#\n\t" \ + "}\n\t" \ + "{\n\t" \ + " addd, s 0x0, %%b[0], %[res]\n\t" \ + "}\n\t" \ + : [res] "=r" (__res) \ + : \ + LSS_BODY_ARG##nr(__VA_ARGS__) \ + [sys_num] "ri" (__NR_##name) \ + : "ctpr1", "ctpr2", "ctpr3", \ + "b[0]", "b[1]", "b[2]", "b[3]", \ + "b[4]", "b[5]", "b[6]", "b[7]" \ + ); \ + LSS_RETURN(type, __res); + + #undef LSS_BODY + #define LSS_BODY(nr, type, name, args...) \ + _LSS_BODY(nr, type, name, ## args) + + #undef LSS_BODY_ASM0 + #undef LSS_BODY_ASM1 + #undef LSS_BODY_ASM2 + #undef LSS_BODY_ASM3 + #undef LSS_BODY_ASM4 + #undef LSS_BODY_ASM5 + #undef LSS_BODY_ASM6 + + #define LSS_BODY_ASM0 + #define LSS_BODY_ASM1 LSS_BODY_ASM0 \ + " addd, s 0x0, %[arg1], %%b[1]\n\t" + #define LSS_BODY_ASM2 LSS_BODY_ASM1 \ + " addd, s 0x0, %[arg2], %%b[2]\n\t" + #define LSS_BODY_ASM3 LSS_BODY_ASM2 \ + " addd, s 0x0, %[arg3], %%b[3]\n\t" + #define LSS_BODY_ASM4 LSS_BODY_ASM3 \ + " addd, s 0x0, %[arg4], %%b[4]\n\t" + #define LSS_BODY_ASM5 LSS_BODY_ASM4 \ + " addd, s 0x0, %[arg5], %%b[5]\n\t" + #define LSS_BODY_ASM6 LSS_BODY_ASM5 \ + "}\n\t" \ + "{\n\t" \ + " addd, s 0x0, %[arg6], %%b[6]\n\t" + + #undef LSS_SYSCALL_ARG + #define LSS_SYSCALL_ARG(a) ((unsigned long long)(uintptr_t)(a)) + + #undef LSS_BODY_ARG0 + #undef LSS_BODY_ARG1 + #undef LSS_BODY_ARG2 + #undef LSS_BODY_ARG3 + #undef LSS_BODY_ARG4 + #undef LSS_BODY_ARG5 + #undef LSS_BODY_ARG6 + + #define LSS_BODY_ARG0() + #define LSS_BODY_ARG1(_arg1) \ + [arg1] "ri" LSS_SYSCALL_ARG(_arg1), + #define LSS_BODY_ARG2(_arg1, _arg2) \ + LSS_BODY_ARG1(_arg1) \ + [arg2] "ri" LSS_SYSCALL_ARG(_arg2), + #define LSS_BODY_ARG3(_arg1, _arg2, _arg3) \ + LSS_BODY_ARG2(_arg1, _arg2) \ + [arg3] "ri" LSS_SYSCALL_ARG(_arg3), + #define LSS_BODY_ARG4(_arg1, _arg2, _arg3, _arg4) \ + LSS_BODY_ARG3(_arg1, _arg2, _arg3) \ + [arg4] "ri" LSS_SYSCALL_ARG(_arg4), + #define LSS_BODY_ARG5(_arg1, _arg2, _arg3, _arg4, _arg5) \ + LSS_BODY_ARG4(_arg1, _arg2, _arg3, _arg4) \ + [arg5] "ri" LSS_SYSCALL_ARG(_arg5), + #define LSS_BODY_ARG6(_arg1, _arg2, _arg3, _arg4, _arg5, _arg6) \ + LSS_BODY_ARG5(_arg1, _arg2, _arg3, _arg4, _arg5) \ + [arg6] "ri" LSS_SYSCALL_ARG(_arg6), + + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(0, type, name); \ + } + + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(1, type, name, arg1) \ + } + + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_BODY(2, type, name, arg1, arg2) \ + } + + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_BODY(3, type, name, arg1, arg2, arg3) \ + } + + #undef _syscall4 + #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(4, type, name, arg1, arg2, arg3, arg4) \ + } + + #undef _syscall5 + #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5) \ + } + + #undef _syscall6 + #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6) \ + } + + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + unsigned long long __res; + + __asm__ __volatile__ ( + "{\n\t" + " addd,s 0x0, %[nr_clone], %%b[0]\n\t" + " addd,s 0x0, %[flags], %%db[1]\n\t" + " addd,s 0x0, %[child_stack], %%db[2]\n\t" + " addd,s 0x0, %[parent_tidptr], %%db[3]\n\t" + " addd,s 0x0, %[child_tidptr], %%db[4]\n\t" + " addd,s 0x0, %[newtls], %%db[5]\n\t" + "}\n\t" + /* if (fn == NULL) + * return -EINVAL; + */ + + "{\n\t" + " disp %%ctpr1, .L1\n\t" + "}\n\t" + "{\n\t" + " cmpesb,s 0x0, %[fn], %%pred0\n\t" + "}\n\t" + "{\n\t" + " ct %%ctpr1 ? %%pred0\n\t" + "}\n\t" + + /* if (child_stack == NULL) + * return -EINVAL; + */ + "{\n\t" + " cmpesb,s 0x0, %%db[2], %%pred0\n\t" + "}\n\t" + "{\n\t" + " ct %%ctpr1 ? %%pred0\n\t" + "}\n\t" + + /* b[0] = syscall(%b[0] = __NR_clone, + * %db[1] = flags, + * %db[2] = child_stack, + * %db[3] = parent_tidptr, + * %db[4] = child_tidptr, + * %db[5] = newtls) + */ + "{\n\t" + " sdisp %%ctpr1, 0x3\n\t" + "}\n\t" + "{\n\t" + " call %%ctpr1, wbs = %#\n\t" + "}\n\t" + + /* if (%[b0] != 0) + * return %b[0]; + */ + "{\n\t" + " disp %%ctpr1, .L2\n\t" + " cmpesb,s 0x0, %%b[0], %%pred0\n\t" + "}\n\t" + "{\n\t" + " ct %%ctpr1 ? ~%%pred0\n\t" + "}\n\t" + /* In the child, now. Call "fn(arg)". + */ + + "{\n\t" + " movtd,s %[fn], %%ctpr1\n\t" + "}\n\t" + "{\n\t" + " addd,s 0x0, %[arg], %%db[0]\n\t" + "}\n\t" + "{\n\t" + " call %%ctpr1, wbs = %#\n\t" + "}\n\t" + /* Call _exit(%b[0]). + */ + + "{\n\t" + " sdisp %%ctpr1, 0x3\n\t" + " addd,s 0x0, %%b[0], %%b[1]\n\t" + "}\n\t" + "{\n\t" + " addd,s 0x0, %[nr_exit], %%b[0]\n\t" + "}\n\t" + "{\n\t" + " call %%ctpr1, wbs = %#\n\t" + "}\n\t" + "{\n\t" + " disp %%ctpr1, .L2\n\t" + " adds,s 0x0, 0x0, %%b[0]\n\t" + "}\n\t" + "{\n\t" + " ct %%ctpr1\n\t" + "}\n\t" + ".L1:\n\t" + "{\n\t" + " addd,s 0x0, %[einval], %%b[0]\n\t" + "}\n\t" + ".L2:\n\t" + "{\n\t" + " addd,s 0x0, %%b[0], %[res]\n\t" + "}\n\t" + : [res] "=r" LSS_SYSCALL_ARG(__res) + : [nr_clone] "ri" LSS_SYSCALL_ARG(__NR_clone) + [arg] "ri" LSS_SYSCALL_ARG(arg) + [nr_exit] "ri" LSS_SYSCALL_ARG(__NR_exit) + [flags] "ri" LSS_SYSCALL_ARG(flags) + [child_stack] "ri" LSS_SYSCALL_ARG(child_stack) + [parent_tidptr] "ri" + LSS_SYSCALL_ARG(parent_tidptr) + [newtls] "ri" LSS_SYSCALL_ARG(newtls) + [child_tidptr] "ri" + LSS_SYSCALL_ARG(child_tidptr) + [fn] "ri" LSS_SYSCALL_ARG(fn) + [einval] "ri" LSS_SYSCALL_ARG(-EINVAL) + : "ctpr1", "b[0]", "b[1]", "b[2]", "b[3]", + "b[4]", "b[5]", "pred0"); + LSS_RETURN(int, __res); + } + #elif defined(__loongarch_lp64) + /* Most definitions of _syscallX() neglect to mark "memory" as being + * clobbered. This causes problems with compilers, that do a better job + * at optimizing across __asm__ calls. + * So, we just have to redefine all of the _syscallX() macros. + */ + #undef LSS_REG + #define LSS_REG(ar,a) register int64_t __r##ar __asm__("a"#ar) = (int64_t)a + /* syscall is like subroutine calls, all caller-saved registers may be + * clobbered, we should add them to the |Clobbers| list. + * a0 is not included because it's in the output list. + */ + #define LSS_SYSCALL_CLOBBERS "t0", "t1", "t2", "t3", "t4", "t5", "t6", \ + "t7", "t8", "memory" + #undef LSS_BODY + #define LSS_BODY(type,name,args...) \ + register int64_t __res_a0 __asm__("a0"); \ + int64_t __res; \ + __asm__ __volatile__ ("li.d $a7, %1\n" \ + "syscall 0x0\n" \ + : "=r"(__res_a0) \ + : "i"(__NR_##name) , ## args \ + : LSS_SYSCALL_CLOBBERS); \ + __res = __res_a0; \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__r0)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4)); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4), "r"(__r5)); \ + } + + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + int64_t __res; + { + register int64_t __res_a0 __asm__("a0"); + register uint64_t __flags __asm__("a0") = flags; + register void *__stack __asm__("a1") = child_stack; + register void *__ptid __asm__("a2") = parent_tidptr; + register void *__tls __asm__("a3") = newtls; + register int *__ctid __asm__("a4") = child_tidptr; + __asm__ __volatile__(/* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "addi.d %2, %2, -16\n" + "st.d %1, %2, 8\n" + "st.d %4, %2, 0\n" + + /* %a0 = syscall(%a0 = flags, + * %a1 = child_stack, + * %a2 = parent_tidptr, + * %a3 = newtls, + * %a4 = child_tidptr) + */ + "li.d $a7, %8\n" + "syscall 0x0\n" + + /* if (%a0 != 0) + * return %a0; + */ + "bnez $a0, 1f\n" + + /* In the child, now. Call "fn(arg)". + */ + "ld.d $a0, $sp, 0\n" + "ld.d $a1, $sp, 8\n" + "addi.d $sp, $sp, 16\n" + "jirl $ra, $a1, 0\n" + + /* Call _exit(%a0). + */ + "li.d $a7, %9\n" + "syscall 0x0\n" + "1:\n" + : "=r" (__res_a0) + : "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), + "r"(__ptid), "r"(__tls), "r"(__ctid), + "i"(__NR_clone), "i"(__NR_exit) + : LSS_SYSCALL_CLOBBERS); + __res = __res_a0; + } + LSS_RETURN(int, __res); + } + + #endif + #define __NR__exit __NR_exit + #define __NR__gettid __NR_gettid + #define __NR__mremap __NR_mremap + LSS_INLINE _syscall1(void *, brk, void *, e) + LSS_INLINE _syscall1(int, chdir, const char *,p) + LSS_INLINE _syscall1(int, close, int, f) + LSS_INLINE _syscall2(int, clock_getres, int, c, + struct kernel_timespec*, t) + LSS_INLINE _syscall2(int, clock_gettime, int, c, + struct kernel_timespec*, t) + LSS_INLINE _syscall1(int, dup, int, f) + #if defined(__NR_dup2) + // dup2 is polyfilled below when not available. + LSS_INLINE _syscall2(int, dup2, int, s, + int, d) + #endif + #if defined(__NR_dup3) + LSS_INLINE _syscall3(int, dup3, int, s, int, d, int, f) + #endif + LSS_INLINE _syscall3(int, execve, const char*, f, + const char*const*,a,const char*const*, e) + LSS_INLINE _syscall1(int, _exit, int, e) + LSS_INLINE _syscall1(int, exit_group, int, e) + LSS_INLINE _syscall3(int, fcntl, int, f, + int, c, long, a) + #if defined(__NR_fork) + // fork is polyfilled below when not available. + LSS_INLINE _syscall0(pid_t, fork) + #endif + #if defined(__NR_fstat) + LSS_INLINE _syscall2(int, fstat, int, f, + struct kernel_stat*, b) + #endif + LSS_INLINE _syscall2(int, fstatfs, int, f, + struct kernel_statfs*, b) + #if defined(__x86_64__) + /* Need to make sure off_t isn't truncated to 32-bits under x32. */ + LSS_INLINE int LSS_NAME(ftruncate)(int f, off_t l) { + LSS_BODY(2, int, ftruncate, LSS_SYSCALL_ARG(f), (uint64_t)(l)); + } + #else + LSS_INLINE _syscall2(int, ftruncate, int, f, + off_t, l) + #endif + LSS_INLINE _syscall6(int, futex, int*, u, + int, o, int, v, + struct kernel_timespec*, t, + int*, u2, int, v2) + LSS_INLINE _syscall3(int, getdents, int, f, + struct kernel_dirent*, d, int, c) + LSS_INLINE _syscall3(int, getdents64, int, f, + struct kernel_dirent64*, d, int, c) + LSS_INLINE _syscall0(gid_t, getegid) + LSS_INLINE _syscall0(uid_t, geteuid) + LSS_INLINE _syscall2(int, getitimer, int, w, + struct kernel_itimerval*, c) + #if defined(__NR_getpgrp) + LSS_INLINE _syscall0(pid_t, getpgrp) + #endif + LSS_INLINE _syscall0(pid_t, getpid) + LSS_INLINE _syscall0(pid_t, getppid) + LSS_INLINE _syscall2(int, getpriority, int, a, + int, b) + LSS_INLINE _syscall3(int, getresgid, gid_t *, r, + gid_t *, e, gid_t *, s) + LSS_INLINE _syscall3(int, getresuid, uid_t *, r, + uid_t *, e, uid_t *, s) + #if defined(__NR_getrlimit) + LSS_INLINE _syscall2(int, getrlimit, int, r, + struct kernel_rlimit*, l) + #endif + LSS_INLINE _syscall1(pid_t, getsid, pid_t, p) + LSS_INLINE _syscall0(pid_t, _gettid) + LSS_INLINE _syscall2(pid_t, gettimeofday, struct kernel_timeval*, t, + void*, tz) + LSS_INLINE _syscall5(int, setxattr, const char *,p, + const char *, n, const void *,v, + size_t, s, int, f) + LSS_INLINE _syscall5(int, lsetxattr, const char *,p, + const char *, n, const void *,v, + size_t, s, int, f) + LSS_INLINE _syscall4(ssize_t, getxattr, const char *,p, + const char *, n, void *, v, size_t, s) + LSS_INLINE _syscall4(ssize_t, lgetxattr, const char *,p, + const char *, n, void *, v, size_t, s) + LSS_INLINE _syscall3(ssize_t, listxattr, const char *,p, + char *, l, size_t, s) + LSS_INLINE _syscall3(ssize_t, llistxattr, const char *,p, + char *, l, size_t, s) + LSS_INLINE _syscall3(int, ioctl, int, d, + int, r, void *, a) + LSS_INLINE _syscall2(int, ioprio_get, int, which, + int, who) + LSS_INLINE _syscall3(int, ioprio_set, int, which, + int, who, int, ioprio) + LSS_INLINE _syscall2(int, kill, pid_t, p, + int, s) + #if defined(__x86_64__) + /* Need to make sure off_t isn't truncated to 32-bits under x32. */ + LSS_INLINE off_t LSS_NAME(lseek)(int f, off_t o, int w) { + _LSS_BODY(3, off_t, lseek, off_t, LSS_SYSCALL_ARG(f), (uint64_t)(o), + LSS_SYSCALL_ARG(w)); + } + #else + LSS_INLINE _syscall3(off_t, lseek, int, f, + off_t, o, int, w) + #endif + LSS_INLINE _syscall2(int, munmap, void*, s, + size_t, l) + LSS_INLINE _syscall6(long, move_pages, pid_t, p, + unsigned long, n, void **,g, int *, d, + int *, s, int, f) + LSS_INLINE _syscall3(int, mprotect, const void *,a, + size_t, l, int, p) + LSS_INLINE _syscall5(void*, _mremap, void*, o, + size_t, os, size_t, ns, + unsigned long, f, void *, a) + #if defined(__NR_open) + // open is polyfilled below when not available. + LSS_INLINE _syscall3(int, open, const char*, p, + int, f, int, m) + #endif + #if defined(__NR_poll) + // poll is polyfilled below when not available. + LSS_INLINE _syscall3(int, poll, struct kernel_pollfd*, u, + unsigned int, n, int, t) + #endif + #if defined(__NR_ppoll) + LSS_INLINE _syscall5(int, ppoll, struct kernel_pollfd *, u, + unsigned int, n, const struct kernel_timespec *, t, + const struct kernel_sigset_t *, sigmask, size_t, s) + #endif + LSS_INLINE _syscall5(int, prctl, int, option, + unsigned long, arg2, + unsigned long, arg3, + unsigned long, arg4, + unsigned long, arg5) + LSS_INLINE _syscall4(long, ptrace, int, r, + pid_t, p, void *, a, void *, d) + #if defined(__NR_quotactl) + // Defined on x86_64 / i386 only + LSS_INLINE _syscall4(int, quotactl, int, cmd, const char *, special, + int, id, caddr_t, addr) + #endif + LSS_INLINE _syscall3(ssize_t, read, int, f, + void *, b, size_t, c) + #if defined(__NR_readlink) + // readlink is polyfilled below when not available. + LSS_INLINE _syscall3(int, readlink, const char*, p, + char*, b, size_t, s) + #endif + #if defined(__NR_readlinkat) + LSS_INLINE _syscall4(int, readlinkat, int, d, const char *, p, char *, b, + size_t, s) + #endif + LSS_INLINE _syscall4(int, rt_sigaction, int, s, + const struct kernel_sigaction*, a, + struct kernel_sigaction*, o, size_t, c) + LSS_INLINE _syscall2(int, rt_sigpending, struct kernel_sigset_t *, s, + size_t, c) + LSS_INLINE _syscall4(int, rt_sigprocmask, int, h, + const struct kernel_sigset_t*, s, + struct kernel_sigset_t*, o, size_t, c) + LSS_INLINE _syscall2(int, rt_sigsuspend, + const struct kernel_sigset_t*, s, size_t, c) + LSS_INLINE _syscall4(int, rt_sigtimedwait, const struct kernel_sigset_t*, s, + siginfo_t*, i, const struct timespec*, t, size_t, c) + LSS_INLINE _syscall3(int, sched_getaffinity,pid_t, p, + unsigned int, l, unsigned long *, m) + LSS_INLINE _syscall3(int, sched_setaffinity,pid_t, p, + unsigned int, l, unsigned long *, m) + LSS_INLINE _syscall0(int, sched_yield) + LSS_INLINE _syscall1(long, set_tid_address, int *, t) + LSS_INLINE _syscall1(int, setfsgid, gid_t, g) + LSS_INLINE _syscall1(int, setfsuid, uid_t, u) + LSS_INLINE _syscall1(int, setuid, uid_t, u) + LSS_INLINE _syscall1(int, setgid, gid_t, g) + LSS_INLINE _syscall3(int, setitimer, int, w, + const struct kernel_itimerval*, n, + struct kernel_itimerval*, o) + LSS_INLINE _syscall2(int, setpgid, pid_t, p, + pid_t, g) + LSS_INLINE _syscall3(int, setpriority, int, a, + int, b, int, p) + LSS_INLINE _syscall3(int, setresgid, gid_t, r, + gid_t, e, gid_t, s) + LSS_INLINE _syscall3(int, setresuid, uid_t, r, + uid_t, e, uid_t, s) + #if defined(__NR_setrlimit) + LSS_INLINE _syscall2(int, setrlimit, int, r, + const struct kernel_rlimit*, l) + #endif + LSS_INLINE _syscall0(pid_t, setsid) + LSS_INLINE _syscall2(int, sigaltstack, const stack_t*, s, + const stack_t*, o) + #if defined(__NR_sigreturn) + LSS_INLINE _syscall1(int, sigreturn, unsigned long, u) + #endif + #if defined(__NR_stat) + // stat and lstat are polyfilled below when not available. + LSS_INLINE _syscall2(int, stat, const char*, f, + struct kernel_stat*, b) + #endif + #if defined(__NR_lstat) + LSS_INLINE _syscall2(int, lstat, const char*, f, + struct kernel_stat*, b) + #endif + LSS_INLINE _syscall2(int, statfs, const char*, f, + struct kernel_statfs*, b) + LSS_INLINE _syscall3(int, tgkill, pid_t, p, + pid_t, t, int, s) + LSS_INLINE _syscall2(int, tkill, pid_t, p, + int, s) + #if defined(__NR_unlink) + // unlink is polyfilled below when not available. + LSS_INLINE _syscall1(int, unlink, const char*, f) + #endif + LSS_INLINE _syscall3(ssize_t, write, int, f, + const void *, b, size_t, c) + LSS_INLINE _syscall3(ssize_t, writev, int, f, + const struct kernel_iovec*, v, size_t, c) + #if defined(__NR_getcpu) + LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu, + unsigned *, node, void *, unused) + #endif + #if defined(__NR_fadvise64) + #if defined(__x86_64__) + /* Need to make sure loff_t isn't truncated to 32-bits under x32. */ + LSS_INLINE int LSS_NAME(fadvise64)(int fd, loff_t offset, loff_t len, + int advice) { + LSS_BODY(4, int, fadvise64, LSS_SYSCALL_ARG(fd), (uint64_t)(offset), + (uint64_t)(len), LSS_SYSCALL_ARG(advice)); + } + #else + LSS_INLINE _syscall4(int, fadvise64, + int, fd, loff_t, offset, loff_t, len, int, advice) + #endif + #elif defined(__i386__) + #define __NR__fadvise64_64 __NR_fadvise64_64 + LSS_INLINE _syscall6(int, _fadvise64_64, int, fd, + unsigned, offset_lo, unsigned, offset_hi, + unsigned, len_lo, unsigned, len_hi, + int, advice) + + LSS_INLINE int LSS_NAME(fadvise64)(int fd, loff_t offset, + loff_t len, int advice) { + return LSS_NAME(_fadvise64_64)(fd, + (unsigned)offset, (unsigned)(offset >>32), + (unsigned)len, (unsigned)(len >> 32), + advice); + } + + #elif defined(__s390__) && !defined(__s390x__) + #define __NR__fadvise64_64 __NR_fadvise64_64 + struct kernel_fadvise64_64_args { + int fd; + long long offset; + long long len; + int advice; + }; + + LSS_INLINE _syscall1(int, _fadvise64_64, + struct kernel_fadvise64_64_args *args) + + LSS_INLINE int LSS_NAME(fadvise64)(int fd, loff_t offset, + loff_t len, int advice) { + struct kernel_fadvise64_64_args args = { fd, offset, len, advice }; + return LSS_NAME(_fadvise64_64)(&args); + } + #endif + #if defined(__NR_fallocate) + #if defined(__x86_64__) + /* Need to make sure loff_t isn't truncated to 32-bits under x32. */ + LSS_INLINE int LSS_NAME(fallocate)(int f, int mode, loff_t offset, + loff_t len) { + LSS_BODY(4, int, fallocate, LSS_SYSCALL_ARG(f), LSS_SYSCALL_ARG(mode), + (uint64_t)(offset), (uint64_t)(len)); + } + #elif (defined(__i386__) || (defined(__s390__) && !defined(__s390x__)) \ + || defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) \ + || (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) \ + || defined(__PPC__)) + #define __NR__fallocate __NR_fallocate + LSS_INLINE _syscall6(int, _fallocate, int, fd, + int, mode, + unsigned, offset_lo, unsigned, offset_hi, + unsigned, len_lo, unsigned, len_hi) + + LSS_INLINE int LSS_NAME(fallocate)(int fd, int mode, + loff_t offset, loff_t len) { + union { loff_t off; unsigned w[2]; } o = { offset }, l = { len }; + return LSS_NAME(_fallocate)(fd, mode, o.w[0], o.w[1], l.w[0], l.w[1]); + } + #else + LSS_INLINE _syscall4(int, fallocate, + int, f, int, mode, loff_t, offset, loff_t, len) + #endif + #endif + #if defined(__NR_getrandom) + LSS_INLINE _syscall3(ssize_t, getrandom, void*, buffer, size_t, length, + unsigned int, flags) + #endif + #if defined(__NR_newfstatat) + LSS_INLINE _syscall4(int, newfstatat, int, d, + const char *, p, + struct kernel_stat*, b, int, f) + #endif + #if defined(__NR_statx) + LSS_INLINE _syscall5(int, statx, int, d, + const char *, p, + int, f, int, m, + struct kernel_statx*, b) + #endif + #if defined(__x86_64__) || defined(__s390x__) + LSS_INLINE int LSS_NAME(getresgid32)(gid_t *rgid, + gid_t *egid, + gid_t *sgid) { + return LSS_NAME(getresgid)(rgid, egid, sgid); + } + + LSS_INLINE int LSS_NAME(getresuid32)(uid_t *ruid, + uid_t *euid, + uid_t *suid) { + return LSS_NAME(getresuid)(ruid, euid, suid); + } + + LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) { + return LSS_NAME(setfsgid)(gid); + } + + LSS_INLINE int LSS_NAME(setfsuid32)(uid_t uid) { + return LSS_NAME(setfsuid)(uid); + } + + LSS_INLINE int LSS_NAME(setresgid32)(gid_t rgid, gid_t egid, gid_t sgid) { + return LSS_NAME(setresgid)(rgid, egid, sgid); + } + + LSS_INLINE int LSS_NAME(setresuid32)(uid_t ruid, uid_t euid, uid_t suid) { + return LSS_NAME(setresuid)(ruid, euid, suid); + } + + LSS_INLINE int LSS_NAME(sigaction)(int signum, + const struct kernel_sigaction *act, + struct kernel_sigaction *oldact) { + #if defined(__x86_64__) + /* On x86_64, the kernel requires us to always set our own + * SA_RESTORER in order to be able to return from a signal handler. + * This function must have a "magic" signature that the "gdb" + * (and maybe the kernel?) can recognize. + */ + if (act != NULL && !(act->sa_flags & SA_RESTORER)) { + struct kernel_sigaction a = *act; + a.sa_flags |= SA_RESTORER; + a.sa_restorer = LSS_NAME(restore_rt)(); + return LSS_NAME(rt_sigaction)(signum, &a, oldact, + (KERNEL_NSIG+7)/8); + } else + #endif + return LSS_NAME(rt_sigaction)(signum, act, oldact, + (KERNEL_NSIG+7)/8); + } + + LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) { + return LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8); + } + + LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) { + return LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8); + } + #endif + #if defined(__NR_rt_sigprocmask) + LSS_INLINE int LSS_NAME(sigprocmask)(int how, + const struct kernel_sigset_t *set, + struct kernel_sigset_t *oldset) { + return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); + } + #endif + #if defined(__NR_rt_sigtimedwait) + LSS_INLINE int LSS_NAME(sigtimedwait)(const struct kernel_sigset_t *set, + siginfo_t *info, + const struct timespec *timeout) { + return LSS_NAME(rt_sigtimedwait)(set, info, timeout, (KERNEL_NSIG+7)/8); + } + #endif + #if defined(__NR_wait4) + LSS_INLINE _syscall4(pid_t, wait4, pid_t, p, + int*, s, int, o, + struct kernel_rusage*, r) + #endif + #if defined(__NR_openat) + LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m) + #endif + #if defined(__NR_unlinkat) + LSS_INLINE _syscall3(int, unlinkat, int, d, const char *, p, int, f) + #endif + #if defined(__i386__) || defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) || \ + (defined(__s390__) && !defined(__s390x__)) + #define __NR__getresgid32 __NR_getresgid32 + #define __NR__getresuid32 __NR_getresuid32 + #define __NR__setfsgid32 __NR_setfsgid32 + #define __NR__setfsuid32 __NR_setfsuid32 + #define __NR__setresgid32 __NR_setresgid32 + #define __NR__setresuid32 __NR_setresuid32 +#if defined(__ARM_EABI__) + LSS_INLINE _syscall2(int, ugetrlimit, int, r, + struct kernel_rlimit*, l) +#endif + LSS_INLINE _syscall3(int, _getresgid32, gid_t *, r, + gid_t *, e, gid_t *, s) + LSS_INLINE _syscall3(int, _getresuid32, uid_t *, r, + uid_t *, e, uid_t *, s) + LSS_INLINE _syscall1(int, _setfsgid32, gid_t, f) + LSS_INLINE _syscall1(int, _setfsuid32, uid_t, f) + LSS_INLINE _syscall3(int, _setresgid32, gid_t, r, + gid_t, e, gid_t, s) + LSS_INLINE _syscall3(int, _setresuid32, uid_t, r, + uid_t, e, uid_t, s) + + LSS_INLINE int LSS_NAME(getresgid32)(gid_t *rgid, + gid_t *egid, + gid_t *sgid) { + int rc; + if ((rc = LSS_NAME(_getresgid32)(rgid, egid, sgid)) < 0 && + LSS_ERRNO == ENOSYS) { + if ((rgid == NULL) || (egid == NULL) || (sgid == NULL)) { + return EFAULT; + } + // Clear the high bits first, since getresgid only sets 16 bits + *rgid = *egid = *sgid = 0; + rc = LSS_NAME(getresgid)(rgid, egid, sgid); + } + return rc; + } + + LSS_INLINE int LSS_NAME(getresuid32)(uid_t *ruid, + uid_t *euid, + uid_t *suid) { + int rc; + if ((rc = LSS_NAME(_getresuid32)(ruid, euid, suid)) < 0 && + LSS_ERRNO == ENOSYS) { + if ((ruid == NULL) || (euid == NULL) || (suid == NULL)) { + return EFAULT; + } + // Clear the high bits first, since getresuid only sets 16 bits + *ruid = *euid = *suid = 0; + rc = LSS_NAME(getresuid)(ruid, euid, suid); + } + return rc; + } + + LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) { + int rc; + if ((rc = LSS_NAME(_setfsgid32)(gid)) < 0 && + LSS_ERRNO == ENOSYS) { + if ((unsigned int)gid & ~0xFFFFu) { + rc = EINVAL; + } else { + rc = LSS_NAME(setfsgid)(gid); + } + } + return rc; + } + + LSS_INLINE int LSS_NAME(setfsuid32)(uid_t uid) { + int rc; + if ((rc = LSS_NAME(_setfsuid32)(uid)) < 0 && + LSS_ERRNO == ENOSYS) { + if ((unsigned int)uid & ~0xFFFFu) { + rc = EINVAL; + } else { + rc = LSS_NAME(setfsuid)(uid); + } + } + return rc; + } + + LSS_INLINE int LSS_NAME(setresgid32)(gid_t rgid, gid_t egid, gid_t sgid) { + int rc; + if ((rc = LSS_NAME(_setresgid32)(rgid, egid, sgid)) < 0 && + LSS_ERRNO == ENOSYS) { + if ((unsigned int)rgid & ~0xFFFFu || + (unsigned int)egid & ~0xFFFFu || + (unsigned int)sgid & ~0xFFFFu) { + rc = EINVAL; + } else { + rc = LSS_NAME(setresgid)(rgid, egid, sgid); + } + } + return rc; + } + + LSS_INLINE int LSS_NAME(setresuid32)(uid_t ruid, uid_t euid, uid_t suid) { + int rc; + if ((rc = LSS_NAME(_setresuid32)(ruid, euid, suid)) < 0 && + LSS_ERRNO == ENOSYS) { + if ((unsigned int)ruid & ~0xFFFFu || + (unsigned int)euid & ~0xFFFFu || + (unsigned int)suid & ~0xFFFFu) { + rc = EINVAL; + } else { + rc = LSS_NAME(setresuid)(ruid, euid, suid); + } + } + return rc; + } + #endif + LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) { + memset(&set->sig, 0, sizeof(set->sig)); + return 0; + } + + LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) { + memset(&set->sig, -1, sizeof(set->sig)); + return 0; + } + + LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set, + int signum) { + if (signum < 1 || (size_t)signum > (8*sizeof(set->sig))) { + LSS_ERRNO = EINVAL; + return -1; + } else { + set->sig[(size_t)(signum - 1)/(8*sizeof(set->sig[0]))] + |= 1UL << ((size_t)(signum - 1) % (8*sizeof(set->sig[0]))); + return 0; + } + } + + LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set, + int signum) { + if (signum < 1 || (size_t)signum > (8*sizeof(set->sig))) { + LSS_ERRNO = EINVAL; + return -1; + } else { + set->sig[(size_t)(signum - 1)/(8*sizeof(set->sig[0]))] + &= ~(1UL << ((size_t)(signum - 1) % (8*sizeof(set->sig[0])))); + return 0; + } + } + + LSS_INLINE int LSS_NAME(sigismember)(struct kernel_sigset_t *set, + int signum) { + if (signum < 1 || (size_t)signum > (8*sizeof(set->sig))) { + LSS_ERRNO = EINVAL; + return -1; + } else { + return !!(set->sig[(size_t)(signum - 1)/(8*sizeof(set->sig[0]))] & + (1UL << ((size_t)(signum - 1) % (8*sizeof(set->sig[0]))))); + } + } + #if defined(__i386__) || \ + defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || \ + defined(__PPC__) || \ + (defined(__s390__) && !defined(__s390x__)) || defined(__e2k__) + #define __NR__sigaction __NR_sigaction + #define __NR__sigpending __NR_sigpending + #define __NR__sigsuspend __NR_sigsuspend + #define __NR__socketcall __NR_socketcall + LSS_INLINE _syscall2(int, fstat64, int, f, + struct kernel_stat64 *, b) + LSS_INLINE _syscall5(int, _llseek, uint, fd, + unsigned long, hi, unsigned long, lo, + loff_t *, res, uint, wh) +#if defined(__s390__) && !defined(__s390x__) + /* On s390, mmap2() arguments are passed in memory. */ + LSS_INLINE void* LSS_NAME(_mmap2)(void *s, size_t l, int p, int f, int d, + off_t o) { + unsigned long buf[6] = { (unsigned long) s, (unsigned long) l, + (unsigned long) p, (unsigned long) f, + (unsigned long) d, (unsigned long) o }; + LSS_REG(2, buf); + LSS_BODY(void*, mmap2, "0"(__r2)); + } +#else + #define __NR__mmap2 __NR_mmap2 + LSS_INLINE _syscall6(void*, _mmap2, void*, s, + size_t, l, int, p, + int, f, int, d, + off_t, o) +#endif + LSS_INLINE _syscall3(int, _sigaction, int, s, + const struct kernel_old_sigaction*, a, + struct kernel_old_sigaction*, o) + LSS_INLINE _syscall1(int, _sigpending, unsigned long*, s) + #ifdef __PPC__ + LSS_INLINE _syscall1(int, _sigsuspend, unsigned long, s) + #else + LSS_INLINE _syscall3(int, _sigsuspend, const void*, a, + int, b, + unsigned long, s) + #endif + LSS_INLINE _syscall2(int, stat64, const char *, p, + struct kernel_stat64 *, b) + + LSS_INLINE int LSS_NAME(sigaction)(int signum, + const struct kernel_sigaction *act, + struct kernel_sigaction *oldact) { + int old_errno = LSS_ERRNO; + int rc; + struct kernel_sigaction a; + if (act != NULL) { + a = *act; + #ifdef __i386__ + /* On i386, the kernel requires us to always set our own + * SA_RESTORER when using realtime signals. Otherwise, it does not + * know how to return from a signal handler. This function must have + * a "magic" signature that the "gdb" (and maybe the kernel?) can + * recognize. + * Apparently, a SA_RESTORER is implicitly set by the kernel, when + * using non-realtime signals. + * + * TODO: Test whether ARM needs a restorer + */ + if (!(a.sa_flags & SA_RESTORER)) { + a.sa_flags |= SA_RESTORER; + a.sa_restorer = (a.sa_flags & SA_SIGINFO) + ? LSS_NAME(restore_rt)() : LSS_NAME(restore)(); + } + #endif + } + rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact, + (KERNEL_NSIG+7)/8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { + struct kernel_old_sigaction oa, ooa, *ptr_a = &oa, *ptr_oa = &ooa; + if (!act) { + ptr_a = NULL; + } else { + oa.sa_handler_ = act->sa_handler_; + memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask)); + #ifndef __mips__ + oa.sa_restorer = act->sa_restorer; + #endif + oa.sa_flags = act->sa_flags; + } + if (!oldact) { + ptr_oa = NULL; + } + LSS_ERRNO = old_errno; + rc = LSS_NAME(_sigaction)(signum, ptr_a, ptr_oa); + if (rc == 0 && oldact) { + if (act) { + memcpy(oldact, act, sizeof(*act)); + } else { + memset(oldact, 0, sizeof(*oldact)); + } + oldact->sa_handler_ = ptr_oa->sa_handler_; + oldact->sa_flags = ptr_oa->sa_flags; + memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask)); + #ifndef __mips__ + oldact->sa_restorer = ptr_oa->sa_restorer; + #endif + } + } + return rc; + } + + LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) { + int old_errno = LSS_ERRNO; + int rc = LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { + LSS_ERRNO = old_errno; + LSS_NAME(sigemptyset)(set); + rc = LSS_NAME(_sigpending)(&set->sig[0]); + } + return rc; + } + + LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) { + int olderrno = LSS_ERRNO; + int rc = LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { + LSS_ERRNO = olderrno; + rc = LSS_NAME(_sigsuspend)( + #ifndef __PPC__ + set, 0, + #endif + set->sig[0]); + } + return rc; + } + #endif + #if defined(__i386__) || \ + defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || \ + defined(__PPC__) || \ + (defined(__s390__) && !defined(__s390x__)) + /* On these architectures, implement mmap() with mmap2(). */ + LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d, + int64_t o) { + if (o % 4096) { + LSS_ERRNO = EINVAL; + return (void *) -1; + } + return LSS_NAME(_mmap2)(s, l, p, f, d, (o / 4096)); + } + #elif defined(__s390x__) + /* On s390x, mmap() arguments are passed in memory. */ + LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d, + int64_t o) { + unsigned long buf[6] = { (unsigned long) s, (unsigned long) l, + (unsigned long) p, (unsigned long) f, + (unsigned long) d, (unsigned long) o }; + LSS_REG(2, buf); + LSS_BODY(void*, mmap, "0"(__r2)); + } + #elif defined(__x86_64__) + /* Need to make sure __off64_t isn't truncated to 32-bits under x32. */ + LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d, + int64_t o) { + LSS_BODY(6, void*, mmap, LSS_SYSCALL_ARG(s), LSS_SYSCALL_ARG(l), + LSS_SYSCALL_ARG(p), LSS_SYSCALL_ARG(f), + LSS_SYSCALL_ARG(d), (uint64_t)(o)); + } + #else + /* Remaining 64-bit architectures. */ + LSS_INLINE _syscall6(void*, mmap, void*, addr, size_t, length, int, prot, + int, flags, int, fd, int64_t, offset) + #endif + #if defined(__PPC__) + #undef LSS_SC_LOADARGS_0 + #define LSS_SC_LOADARGS_0(dummy...) + #undef LSS_SC_LOADARGS_1 + #define LSS_SC_LOADARGS_1(arg1) \ + __sc_4 = (unsigned long) (arg1) + #undef LSS_SC_LOADARGS_2 + #define LSS_SC_LOADARGS_2(arg1, arg2) \ + LSS_SC_LOADARGS_1(arg1); \ + __sc_5 = (unsigned long) (arg2) + #undef LSS_SC_LOADARGS_3 + #define LSS_SC_LOADARGS_3(arg1, arg2, arg3) \ + LSS_SC_LOADARGS_2(arg1, arg2); \ + __sc_6 = (unsigned long) (arg3) + #undef LSS_SC_LOADARGS_4 + #define LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4) \ + LSS_SC_LOADARGS_3(arg1, arg2, arg3); \ + __sc_7 = (unsigned long) (arg4) + #undef LSS_SC_LOADARGS_5 + #define LSS_SC_LOADARGS_5(arg1, arg2, arg3, arg4, arg5) \ + LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4); \ + __sc_8 = (unsigned long) (arg5) + #undef LSS_SC_BODY + #define LSS_SC_BODY(nr, type, opt, args...) \ + long __sc_ret, __sc_err; \ + { \ + register unsigned long __sc_0 __asm__ ("r0") = __NR_socketcall; \ + register unsigned long __sc_3 __asm__ ("r3") = opt; \ + register unsigned long __sc_4 __asm__ ("r4"); \ + register unsigned long __sc_5 __asm__ ("r5"); \ + register unsigned long __sc_6 __asm__ ("r6"); \ + register unsigned long __sc_7 __asm__ ("r7"); \ + register unsigned long __sc_8 __asm__ ("r8"); \ + LSS_SC_LOADARGS_##nr(args); \ + __asm__ __volatile__ \ + ("stwu 1, -48(1)\n\t" \ + "stw 4, 20(1)\n\t" \ + "stw 5, 24(1)\n\t" \ + "stw 6, 28(1)\n\t" \ + "stw 7, 32(1)\n\t" \ + "stw 8, 36(1)\n\t" \ + "addi 4, 1, 20\n\t" \ + "sc\n\t" \ + "mfcr %0" \ + : "=&r" (__sc_0), \ + "=&r" (__sc_3), "=&r" (__sc_4), \ + "=&r" (__sc_5), "=&r" (__sc_6), \ + "=&r" (__sc_7), "=&r" (__sc_8) \ + : LSS_ASMINPUT_##nr \ + : "cr0", "ctr", "memory"); \ + __sc_ret = __sc_3; \ + __sc_err = __sc_0; \ + } \ + LSS_RETURN(type, __sc_ret, __sc_err) + + LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg, + int flags){ + LSS_SC_BODY(3, ssize_t, 17, s, msg, flags); + } + + LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s, + const struct kernel_msghdr *msg, + int flags) { + LSS_SC_BODY(3, ssize_t, 16, s, msg, flags); + } + + // TODO(csilvers): why is this ifdef'ed out? +#if 0 + LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len, + int flags, + const struct kernel_sockaddr *to, + unsigned int tolen) { + LSS_BODY(6, ssize_t, 11, s, buf, len, flags, to, tolen); + } +#endif + + LSS_INLINE int LSS_NAME(shutdown)(int s, int how) { + LSS_SC_BODY(2, int, 13, s, how); + } + + LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { + LSS_SC_BODY(3, int, 1, domain, type, protocol); + } + + LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol, + int sv[2]) { + LSS_SC_BODY(4, int, 8, d, type, protocol, sv); + } + #endif + #if defined(__NR_recvmsg) + LSS_INLINE _syscall3(ssize_t, recvmsg, int, s, struct kernel_msghdr*, msg, + int, flags) + #endif + #if defined(__NR_sendmsg) + LSS_INLINE _syscall3(ssize_t, sendmsg, int, s, const struct kernel_msghdr*, + msg, int, flags) + #endif + #if defined(__NR_sendto) + LSS_INLINE _syscall6(ssize_t, sendto, int, s, const void*, buf, size_t,len, + int, flags, const struct kernel_sockaddr*, to, + unsigned int, tolen) + #endif + #if defined(__NR_shutdown) + LSS_INLINE _syscall2(int, shutdown, int, s, int, how) + #endif + #if defined(__NR_socket) + LSS_INLINE _syscall3(int, socket, int, domain, int, type, int, protocol) + #endif + #if defined(__NR_socketpair) + LSS_INLINE _syscall4(int, socketpair, int, d, int, type, int, protocol, + int*, sv) + #endif + + #if defined(__NR_socketcall) + LSS_INLINE _syscall2(int, _socketcall, int, c, + va_list, a) + LSS_INLINE int LSS_NAME(socketcall)(int op, ...) { + int rc; + va_list ap; + va_start(ap, op); + rc = LSS_NAME(_socketcall)(op, ap); + va_end(ap); + return rc; + } + + # if !defined(__NR_recvmsg) + LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg, + int flags){ + return (ssize_t)LSS_NAME(socketcall)(17, s, msg, flags); + } + # endif + # if !defined(__NR_sendmsg) + LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s, + const struct kernel_msghdr *msg, + int flags) { + return (ssize_t)LSS_NAME(socketcall)(16, s, msg, flags); + } + # endif + # if !defined(__NR_sendto) + LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len, + int flags, + const struct kernel_sockaddr *to, + unsigned int tolen) { + return (ssize_t)LSS_NAME(socketcall)(11, s, buf, len, flags, to, tolen); + } + # endif + # if !defined(__NR_shutdown) + LSS_INLINE int LSS_NAME(shutdown)(int s, int how) { + return LSS_NAME(socketcall)(13, s, how); + } + # endif + # if !defined(__NR_socket) + LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { + return LSS_NAME(socketcall)(1, domain, type, protocol); + } + # endif + # if !defined(__NR_socketpair) + LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol, + int sv[2]) { + return LSS_NAME(socketcall)(8, d, type, protocol, sv); + } + # endif + #endif + #if defined(__NR_fstatat64) + LSS_INLINE _syscall4(int, fstatat64, int, d, + const char *, p, + struct kernel_stat64 *, b, int, f) + #endif + #if defined(__NR_waitpid) + // waitpid is polyfilled below when not available. + LSS_INLINE _syscall3(pid_t, waitpid, pid_t, p, + int*, s, int, o) + #endif + #if defined(__mips__) + /* sys_pipe() on MIPS has non-standard calling conventions, as it returns + * both file handles through CPU registers. + */ + LSS_INLINE int LSS_NAME(pipe)(int *p) { + register unsigned long __v0 __asm__("$2") = __NR_pipe; + register unsigned long __v1 __asm__("$3"); + register unsigned long __r7 __asm__("$7"); + __asm__ __volatile__ ("syscall\n" + : "=r"(__v0), "=r"(__v1), "=r" (__r7) + : "0"(__v0) + : "$8", "$9", "$10", "$11", "$12", + "$13", "$14", "$15", "$24", "$25", "memory"); + if (__r7) { + unsigned long __errnovalue = __v0; + LSS_ERRNO = __errnovalue; + return -1; + } else { + p[0] = __v0; + p[1] = __v1; + return 0; + } + } + #elif defined(__NR_pipe) + // pipe is polyfilled below when not available. + LSS_INLINE _syscall1(int, pipe, int *, p) + #endif + #if defined(__NR_pipe2) + LSS_INLINE _syscall2(int, pipe2, int *, pipefd, int, flags) + #endif + /* TODO(csilvers): see if ppc can/should support this as well */ + #if defined(__i386__) || defined(__ARM_ARCH_3__) || \ + defined(__ARM_EABI__) || \ + (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64) || \ + (defined(__s390__) && !defined(__s390x__)) + #define __NR__statfs64 __NR_statfs64 + #define __NR__fstatfs64 __NR_fstatfs64 + LSS_INLINE _syscall3(int, _statfs64, const char*, p, + size_t, s,struct kernel_statfs64*, b) + LSS_INLINE _syscall3(int, _fstatfs64, int, f, + size_t, s,struct kernel_statfs64*, b) + LSS_INLINE int LSS_NAME(statfs64)(const char *p, + struct kernel_statfs64 *b) { + return LSS_NAME(_statfs64)(p, sizeof(*b), b); + } + LSS_INLINE int LSS_NAME(fstatfs64)(int f,struct kernel_statfs64 *b) { + return LSS_NAME(_fstatfs64)(f, sizeof(*b), b); + } + #endif + + LSS_INLINE int LSS_NAME(execv)(const char *path, const char *const argv[]) { + extern char **environ; + return LSS_NAME(execve)(path, argv, (const char *const *)environ); + } + + LSS_INLINE pid_t LSS_NAME(gettid)(void) { + pid_t tid = LSS_NAME(_gettid)(); + if (tid != -1) { + return tid; + } + return LSS_NAME(getpid)(); + } + + LSS_INLINE void *LSS_NAME(mremap)(void *old_address, size_t old_size, + size_t new_size, int flags, ...) { + va_list ap; + void *new_address, *rc; + va_start(ap, flags); + new_address = va_arg(ap, void *); + rc = LSS_NAME(_mremap)(old_address, old_size, new_size, + (unsigned long)flags, new_address); + va_end(ap); + return rc; + } + + LSS_INLINE long LSS_NAME(ptrace_detach)(pid_t pid) { + /* PTRACE_DETACH can sometimes forget to wake up the tracee and it + * then sends job control signals to the real parent, rather than to + * the tracer. We reduce the risk of this happening by starting a + * whole new time slice, and then quickly sending a SIGCONT signal + * right after detaching from the tracee. + * + * We use tkill to ensure that we only issue a wakeup for the thread being + * detached. Large multi threaded apps can take a long time in the kernel + * processing SIGCONT. + */ + long rc; + int err; + LSS_NAME(sched_yield)(); + rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0); + err = LSS_ERRNO; + LSS_NAME(tkill)(pid, SIGCONT); + /* Old systems don't have tkill */ + if (LSS_ERRNO == ENOSYS) + LSS_NAME(kill)(pid, SIGCONT); + LSS_ERRNO = err; + return rc; + } + + LSS_INLINE int LSS_NAME(raise)(int sig) { + return LSS_NAME(kill)(LSS_NAME(getpid)(), sig); + } + + LSS_INLINE int LSS_NAME(setpgrp)(void) { + return LSS_NAME(setpgid)(0, 0); + } + + #if defined(__x86_64__) + /* Need to make sure loff_t isn't truncated to 32-bits under x32. */ + LSS_INLINE ssize_t LSS_NAME(pread64)(int f, void *b, size_t c, loff_t o) { + LSS_BODY(4, ssize_t, pread64, LSS_SYSCALL_ARG(f), LSS_SYSCALL_ARG(b), + LSS_SYSCALL_ARG(c), (uint64_t)(o)); + } + + LSS_INLINE ssize_t LSS_NAME(pwrite64)(int f, const void *b, size_t c, + loff_t o) { + LSS_BODY(4, ssize_t, pwrite64, LSS_SYSCALL_ARG(f), LSS_SYSCALL_ARG(b), + LSS_SYSCALL_ARG(c), (uint64_t)(o)); + } + + LSS_INLINE int LSS_NAME(readahead)(int f, loff_t o, size_t c) { + LSS_BODY(3, int, readahead, LSS_SYSCALL_ARG(f), (uint64_t)(o), + LSS_SYSCALL_ARG(c)); + } + #elif defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI64 + LSS_INLINE _syscall4(ssize_t, pread64, int, f, + void *, b, size_t, c, + loff_t, o) + LSS_INLINE _syscall4(ssize_t, pwrite64, int, f, + const void *, b, size_t, c, + loff_t, o) + LSS_INLINE _syscall3(int, readahead, int, f, + loff_t, o, unsigned, c) + #else + #define __NR__pread64 __NR_pread64 + #define __NR__pwrite64 __NR_pwrite64 + #define __NR__readahead __NR_readahead + #if defined(__ARM_EABI__) || defined(__mips__) + /* On ARM and MIPS, a 64-bit parameter has to be in an even-odd register + * pair. Hence these calls ignore their fourth argument (r3) so that their + * fifth and sixth make such a pair (r4,r5). + */ + #define LSS_LLARG_PAD 0, + LSS_INLINE _syscall6(ssize_t, _pread64, int, f, + void *, b, size_t, c, + unsigned, skip, unsigned, o1, unsigned, o2) + LSS_INLINE _syscall6(ssize_t, _pwrite64, int, f, + const void *, b, size_t, c, + unsigned, skip, unsigned, o1, unsigned, o2) + LSS_INLINE _syscall5(int, _readahead, int, f, + unsigned, skip, + unsigned, o1, unsigned, o2, size_t, c) + #else + #define LSS_LLARG_PAD + LSS_INLINE _syscall5(ssize_t, _pread64, int, f, + void *, b, size_t, c, unsigned, o1, + unsigned, o2) + LSS_INLINE _syscall5(ssize_t, _pwrite64, int, f, + const void *, b, size_t, c, unsigned, o1, + unsigned, o2) + LSS_INLINE _syscall4(int, _readahead, int, f, + unsigned, o1, unsigned, o2, size_t, c) + #endif + /* We force 64bit-wide parameters onto the stack, then access each + * 32-bit component individually. This guarantees that we build the + * correct parameters independent of the native byte-order of the + * underlying architecture. + */ + LSS_INLINE ssize_t LSS_NAME(pread64)(int fd, void *buf, size_t count, + loff_t off) { + union { loff_t off; unsigned arg[2]; } o = { off }; + return LSS_NAME(_pread64)(fd, buf, count, + LSS_LLARG_PAD o.arg[0], o.arg[1]); + } + LSS_INLINE ssize_t LSS_NAME(pwrite64)(int fd, const void *buf, + size_t count, loff_t off) { + union { loff_t off; unsigned arg[2]; } o = { off }; + return LSS_NAME(_pwrite64)(fd, buf, count, + LSS_LLARG_PAD o.arg[0], o.arg[1]); + } + LSS_INLINE int LSS_NAME(readahead)(int fd, loff_t off, size_t count) { + union { loff_t off; unsigned arg[2]; } o = { off }; + return LSS_NAME(_readahead)(fd, LSS_LLARG_PAD o.arg[0], o.arg[1], count); + } + #endif +#endif + +/* + * Polyfills for deprecated syscalls. + */ + +#if !defined(__NR_dup2) + LSS_INLINE int LSS_NAME(dup2)(int s, int d) { + return LSS_NAME(dup3)(s, d, 0); + } +#endif + +#if !defined(__NR_open) + LSS_INLINE int LSS_NAME(open)(const char *pathname, int flags, int mode) { + return LSS_NAME(openat)(AT_FDCWD, pathname, flags, mode); + } +#endif + +#if !defined(__NR_unlink) + LSS_INLINE int LSS_NAME(unlink)(const char *pathname) { + return LSS_NAME(unlinkat)(AT_FDCWD, pathname, 0); + } +#endif + +#if !defined(__NR_readlink) + LSS_INLINE int LSS_NAME(readlink)(const char *pathname, char *buffer, + size_t size) { + return LSS_NAME(readlinkat)(AT_FDCWD, pathname, buffer, size); + } +#endif + +#if !defined(__NR_pipe) + LSS_INLINE int LSS_NAME(pipe)(int *pipefd) { + return LSS_NAME(pipe2)(pipefd, 0); + } +#endif + +#if !defined(__NR_poll) + LSS_INLINE int LSS_NAME(poll)(struct kernel_pollfd *fds, unsigned int nfds, + int timeout) { + struct kernel_timespec timeout_ts; + struct kernel_timespec *timeout_ts_p = NULL; + + if (timeout >= 0) { + timeout_ts.tv_sec = timeout / 1000; + timeout_ts.tv_nsec = (timeout % 1000) * 1000000; + timeout_ts_p = &timeout_ts; + } + return LSS_NAME(ppoll)(fds, nfds, timeout_ts_p, NULL, 0); + } +#endif + +#if defined(__NR_statx) + /* copy the contents of kernel_statx to the kernel_stat structure. */ + LSS_INLINE void LSS_NAME(cp_stat_statx)(struct kernel_stat *to, + struct kernel_statx *from) { + memset(to, 0, sizeof(struct kernel_stat)); + to->st_dev = (kernel_dev_t)((from->stx_dev_minor & 0xff) | + ((from->stx_dev_major & 0xfff) << 8) | + ((from->stx_dev_minor & ~0xffu) << 12)); + to->st_rdev = (kernel_dev_t)((from->stx_rdev_minor & 0xff) | + ((from->stx_rdev_major & 0xfff) << 8) | + ((from->stx_rdev_minor & ~0xffu) << 12)); + to->st_ino = (kernel_ino_t)from->stx_ino; + to->st_mode = (kernel_mode_t)from->stx_mode; + to->st_nlink = (kernel_nlink_t)from->stx_nlink; + to->st_uid = (kernel_uid_t)from->stx_uid; + to->st_gid = (kernel_gid_t)from->stx_gid; + to->st_atime_ = (kernel_time_t)(from->stx_atime.tv_sec); + to->st_atime_nsec_ = from->stx_atime.tv_nsec; + to->st_mtime_ = (kernel_time_t)(from->stx_mtime.tv_sec); + to->st_mtime_nsec_ = from->stx_mtime.tv_nsec; + to->st_ctime_ = (kernel_time_t)(from->stx_ctime.tv_sec); + to->st_ctime_nsec_ = from->stx_ctime.tv_nsec; + to->st_size = (kernel_off_t)(from->stx_size); + to->st_blocks = (kernel_blkcnt_t)(from->stx_blocks); + to->st_blksize = (kernel_blksize_t)from->stx_blksize; + } +#endif + +#if !defined(__NR_fstat) + LSS_INLINE int LSS_NAME(fstat)(int fd, + struct kernel_stat *buf) { + #if defined(__NR_newfstatat) + return LSS_NAME(newfstatat)(fd, "", buf, AT_EMPTY_PATH); + #elif defined(__NR_statx) + struct kernel_statx stx; + int flags = AT_NO_AUTOMOUNT | AT_EMPTY_PATH; + int mask = STATX_BASIC_STATS; + int res = LSS_NAME(statx)(fd, "", flags, mask, &stx); + LSS_NAME(cp_stat_statx)(buf, &stx); + return res; + #endif + } +#endif + +#if !defined(__NR_stat) + LSS_INLINE int LSS_NAME(stat)(const char *pathname, + struct kernel_stat *buf) { + #if defined(__NR_newfstatat) + return LSS_NAME(newfstatat)(AT_FDCWD, pathname, buf, 0); + #elif defined(__NR_statx) + struct kernel_statx stx; + int flags = AT_NO_AUTOMOUNT | AT_STATX_SYNC_AS_STAT; + int mask = STATX_BASIC_STATS; + int res = LSS_NAME(statx)(AT_FDCWD, pathname, flags, mask, &stx); + LSS_NAME(cp_stat_statx)(buf, &stx); + return res; + #endif + } +#endif + +#if !defined(__NR_lstat) + LSS_INLINE int LSS_NAME(lstat)(const char *pathname, + struct kernel_stat *buf) { + #if defined(__NR_newfstatat) + return LSS_NAME(newfstatat)(AT_FDCWD, pathname, buf, AT_SYMLINK_NOFOLLOW); + #elif defined(__NR_statx) + struct kernel_statx stx; + int flags = AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW; + int mask = STATX_BASIC_STATS; + int res = LSS_NAME(statx)(AT_FDCWD, pathname, flags, mask, &stx); + LSS_NAME(cp_stat_statx)(buf, &stx); + return res; + #endif + } +#endif + +#if !defined(__NR_waitpid) + LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options) { + return LSS_NAME(wait4)(pid, status, options, 0); + } +#endif + +#if !defined(__NR_fork) +// TODO: define this in an arch-independant way instead of inlining the clone +// syscall body. + +# if defined(__aarch64__) || defined(__riscv) || defined(__loongarch_lp64) + LSS_INLINE pid_t LSS_NAME(fork)(void) { + // No fork syscall on aarch64 - implement by means of the clone syscall. + // Note that this does not reset glibc's cached view of the PID/TID, so + // some glibc interfaces might go wrong in the forked subprocess. + int flags = SIGCHLD; + void *child_stack = NULL; + void *parent_tidptr = NULL; + void *newtls = NULL; + void *child_tidptr = NULL; + + LSS_REG(0, flags); + LSS_REG(1, child_stack); + LSS_REG(2, parent_tidptr); + LSS_REG(3, newtls); + LSS_REG(4, child_tidptr); + LSS_BODY(pid_t, clone, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), + "r"(__r4)); + } +# elif defined(__x86_64__) + LSS_INLINE pid_t LSS_NAME(fork)(void) { + // Android disallows the fork syscall on x86_64 - implement by means of the + // clone syscall as above for aarch64. + int flags = SIGCHLD; + void *child_stack = NULL; + void *parent_tidptr = NULL; + void *newtls = NULL; + void *child_tidptr = NULL; + + LSS_BODY(5, pid_t, clone, LSS_SYSCALL_ARG(flags), + LSS_SYSCALL_ARG(child_stack), LSS_SYSCALL_ARG(parent_tidptr), + LSS_SYSCALL_ARG(newtls), LSS_SYSCALL_ARG(child_tidptr)); + } +# else +# error missing fork polyfill for this architecture +# endif +#endif + +/* These restore the original values of these macros saved by the + * corresponding #pragma push_macro near the top of this file. */ +#pragma pop_macro("stat64") +#pragma pop_macro("fstat64") +#pragma pop_macro("lstat64") +#pragma pop_macro("pread64") +#pragma pop_macro("pwrite64") +#pragma pop_macro("getdents64") + +#if defined(__cplusplus) && !defined(SYS_CPLUSPLUS) +} +#endif + +#endif +#endif diff --git a/tests/.gitignore b/tests/.gitignore new file mode 100644 index 000000000000..89c5d3c934d7 --- /dev/null +++ b/tests/.gitignore @@ -0,0 +1,4 @@ +/*_test + +# Some tests create temp files. +/tempfile.* diff --git a/tests/Makefile b/tests/Makefile new file mode 100644 index 000000000000..554813d8bb69 --- /dev/null +++ b/tests/Makefile @@ -0,0 +1,147 @@ +# Copyright 2018, Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +top_srcdir ?= .. + +DEF_FLAGS = -g -pipe +DEF_WFLAGS = -Wall +CFLAGS ?= $(DEF_FLAGS) +CXXFLAGS ?= $(DEF_FLAGS) +CFLAGS += $(DEF_WFLAGS) -Wstrict-prototypes +CXXFLAGS += $(DEF_WFLAGS) +CPPFLAGS += -I$(top_srcdir) +# We use static linking here so that if people run through qemu/etc... by hand, +# it's a lot easier to run/debug. Same for strace output. +LDFLAGS += -static + +TESTS = \ + fallocate \ + getitimer \ + getrandom \ + lstat \ + setitimer \ + sigaction \ + sigtimedwait \ + stat \ + unlink \ + +all: check + +%_test: %.c test_skel.h $(top_srcdir)/linux_syscall_support.h + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o $@ $< + +# Force building C as C++ code to improve compile-time coverage. +%_cc_test: %.c test_skel.h $(top_srcdir)/linux_syscall_support.h + $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) -o $@ $< + +%_test: %.cc test_skel.h $(top_srcdir)/linux_syscall_support.h + $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) -o $@ $< + +%_run: %_test + @t=$(@:_run=_test); \ + echo "./$$t"; \ + env -i ./$$t; \ + exit_status=$$?; \ + if [ $$exit_status = 77 ]; then \ + echo "SKIP: $$t"; \ + elif [ $$exit_status != 0 ]; then \ + echo "FAIL: $$t"; \ + env -i strace -f -v ./$$t; \ + echo "TRY: gdb -q -ex r -ex bt ./$$t"; \ + exit 1; \ + fi + +ALL_TEST_TARGETS = $(TESTS:=_test) $(TESTS:=_cc_test) +compile_tests: $(ALL_TEST_TARGETS) + +ALL_RUN_TARGETS = $(TESTS:=_run) $(TESTS:=_cc_run) +check: $(ALL_RUN_TARGETS) + +# The "tempfile" targets are the names we use with temp files. +# Clean them out in case some tests crashed in the middle. +clean: + rm -f *~ *.o tempfile.* a.out core $(ALL_TEST_TARGETS) + +.SUFFIXES: +.PHONY: all check clean compile_tests +.SECONDARY: $(ALL_TEST_TARGETS) + +# Try to cross-compile the tests for all our supported arches. We test with +# both gcc and clang. We don't support execution (yet?), but just compiling +# & linking helps catch common bugs. +.PHONY: cross compile_cross +cross_compile: + @echo "Running: $(MAKE) $@ CC='$(CC)' CXX='$(CXX)'"; \ + if (echo '#include ' | $(CC) -x c -c -o /dev/null -) 2>/dev/null; then \ + $(MAKE) -s clean; \ + $(MAKE) -k --no-print-directory compile_tests; \ + else \ + echo "Skipping $(CC) test: not installed"; \ + fi; \ + echo + +# The names here are a best effort. Not easy to probe for. +cross: + @for cc in \ + "x86_64-pc-linux-gnu-gcc" \ + "i686-pc-linux-gnu-gcc" \ + "x86_64-pc-linux-gnu-gcc -mx32" \ + "armv7a-unknown-linux-gnueabi-gcc -marm -mhard-float" \ + "armv7a-unknown-linux-gnueabi-gcc -mthumb -mhard-float" \ + "powerpc-unknown-linux-gnu-gcc" \ + "aarch64-unknown-linux-gnu-gcc" \ + "mips64-unknown-linux-gnu-gcc -mabi=64" \ + "mips64-unknown-linux-gnu-gcc -mabi=32" \ + "mips64-unknown-linux-gnu-gcc -mabi=n32" \ + "s390-ibm-linux-gnu-gcc" \ + "s390x-ibm-linux-gnu-gcc" \ + "loongarch64-unknown-linux-gnu-gcc" \ + ; do \ + cxx=`echo "$$cc" | sed 's:-gcc:-g++:'`; \ + $(MAKE) --no-print-directory CC="$$cc" CXX="$$cxx" cross_compile; \ + \ + sysroot=`$$cc --print-sysroot 2>/dev/null`; \ + gccdir=`$$cc -print-file-name=libgcc.a 2>/dev/null`; \ + gccdir=`dirname "$$gccdir"`; \ + : Skip building for clang for mips/o32 and s390/31-bit until it works.; \ + case $$cc in \ + mips64*-mabi=32) continue;; \ + s390-*) continue;; \ + esac; \ + set -- $$cc; \ + tuple=$${1%-gcc}; \ + shift; \ + cc="clang -target $$tuple $$*"; \ + : Assume the build system is x86_64 based, so ignore the sysroot.; \ + case $$tuple in \ + x86_64*) ;; \ + *) cc="$$cc --sysroot $$sysroot -B$$gccdir -L$$gccdir";; \ + esac; \ + cxx=`echo "$$cc" | sed 's:^clang:clang++:'`; \ + $(MAKE) --no-print-directory CC="$$cc" CXX="$$cxx" cross_compile; \ + done diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 000000000000..45af3c37ed3f --- /dev/null +++ b/tests/README.md @@ -0,0 +1,52 @@ +# LSS Tests + +## Source Layout + +The general layout of the tests: +* [test_skel.h]: Test helpers for common checks/etc... +* xxx.c: Unittest for the xxx syscall (e.g. `open.c`). +* [Makefile]: New tests should be registered in the `TESTS` variable. + +## Test Guidelines + +The unittest itself generally follows the conventions: +* Written in C (unless a very specific language behavior is needed). +* You should only need to `#include "test_skel.h"`. For new system headers, try + to add them here rather than copying to exact unittest (if possible). + It might slow compilation down slightly, but makes the code easier to manage. + Make sure it is included first. +* Use `assert()` on everything to check return values. +* Use `sys_xxx()` to access the syscall via LSS (compared to `xxx()` which tends + to come from the C library). +* If you need a tempfile, use `tempfile.XXXXXX` for templates with helpers like + `mkstemp`. Try to clean them up when you're done with them. + These will be created in the cwd, but that's fine. +* Don't worry about trying to verify the kernel/C library API and various edge + cases. The goal of LSS is to make sure that we pass args along correctly to + the syscall only. +* Make sure to leave comments in the test so it's clear what behavior you're + trying to verify (and how). + +Feel free to extend [test_skel.h] with more helpers if they're useful to more +than one test. + +If you're looking for a simple example, start with [unlink.c](./unlink.c). +You should be able to copy this over and replace the content of `main()`. + +## Running The Tests + +Simply run `make`. This will compile & execute all the tests on your local +system. A standard `make clean` will clean up all the objects. + +If you need to debug something, then the programs are simply named `xxx_test` +and can easily be thrown into `gdb ./xxx_test`. + +We have rudimentary cross-compile testing via gcc and clang. Try running +`make cross` -- for any toolchains you don't have available, it should skip +things automatically. This only verifies the compilation & linking stages +though. + +The cross-compilers can be created using . + +[Makefile]: ./Makefile +[test_skel.h]: ./test_skel.h diff --git a/tests/fallocate.c b/tests/fallocate.c new file mode 100644 index 000000000000..f6589070f7f0 --- /dev/null +++ b/tests/fallocate.c @@ -0,0 +1,69 @@ +/* Copyright 2019, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test_skel.h" + +int main(int argc, char *argv[]) { + int fd = 0, mode = 0; + loff_t offset = 0, len = 0; + + // Bad file descriptor. + fd = -1; + assert(sys_fallocate(fd, mode, offset, len) == -1); + assert(errno == EBADF); + + char filename[] = "tempfile.XXXXXX"; + fd = mkstemp(filename); + assert(fd >= 0); + + // Invalid len. + assert(sys_fallocate(fd, mode, offset, len) == -1); + assert(errno == EINVAL); + + // Small offset and length succeeds. + len = 4096; + assert(sys_fallocate(fd, mode, offset, len) == 0); + + // Large offset succeeds and isn't truncated. + offset = 1llu + UINT32_MAX; + assert(sys_fallocate(fd, mode , offset, len) == 0); + +#if defined(__NR_fstat64) + struct kernel_stat64 st; + assert(sys_fstat64(fd, &st) == 0); +#else + struct kernel_stat st; + assert(sys_fstat(fd, &st) == 0); +#endif + assert(st.st_size == offset + len); + + sys_unlink(filename); + + return 0; +} diff --git a/tests/getitimer.c b/tests/getitimer.c new file mode 100644 index 000000000000..507948fea089 --- /dev/null +++ b/tests/getitimer.c @@ -0,0 +1,84 @@ +/* Copyright 2022, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test_skel.h" + +int main(int argc, char *argv[]) { + // We need an invalid timer value. The assert()'s below should + // be static asserts but it is not avalible in older C versions. +#define kInvalidTimer 9999 + assert(kInvalidTimer != ITIMER_REAL); + assert(kInvalidTimer != ITIMER_VIRTUAL); + assert(kInvalidTimer != ITIMER_PROF); + + // This should fail with EINVAL. + struct kernel_itimerval curr_itimer; + assert(sys_getitimer(kInvalidTimer, &curr_itimer) == -1); + assert(errno == EINVAL); + + // Create a read-only page. + size_t page_size = getpagesize(); + void* read_only_page = sys_mmap(NULL, page_size, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + assert(read_only_page != MAP_FAILED); + + // This should fail with EFAULT. + assert(sys_getitimer(ITIMER_REAL, + (struct kernel_itimerval*) read_only_page) == -1); + assert(errno == EFAULT); + + // This should complete without an error. + assert(sys_getitimer(ITIMER_REAL, &curr_itimer) == 0); + + // Set up a real time timer with very long interval and value so that + // we do not need to handle SIGALARM in test. + struct kernel_itimerval new_itimer; + const time_t kIntervalSec = 60 * 60 * 24 * 365; // One year. + const long kIntervalUSec = 123; + new_itimer.it_interval.tv_sec = kIntervalSec; + new_itimer.it_interval.tv_usec = kIntervalUSec; + new_itimer.it_value = new_itimer.it_interval; + assert(sys_setitimer(ITIMER_REAL, &new_itimer, NULL) == 0); + + assert(sys_getitimer(ITIMER_REAL, &curr_itimer) == 0); + assert(kernel_timeval_eq(&curr_itimer.it_interval, &new_itimer.it_interval)); + + // Disable timer. + struct kernel_itimerval empty_itimer; + empty_itimer.it_interval.tv_sec = 0; + empty_itimer.it_interval.tv_usec = 0; + empty_itimer.it_value = empty_itimer.it_interval; + assert(sys_setitimer(ITIMER_REAL, &empty_itimer, NULL) == 0); + + // We should read back an empty itimer. + assert(sys_getitimer(ITIMER_REAL, &curr_itimer) == 0); + assert(kernel_itimerval_eq(&curr_itimer, &empty_itimer)); + + return 0; +} diff --git a/tests/getrandom.c b/tests/getrandom.c new file mode 100644 index 000000000000..eb1f16228413 --- /dev/null +++ b/tests/getrandom.c @@ -0,0 +1,59 @@ +/* Copyright 2020, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test_skel.h" + +#define BUFFER_SIZE 256 + +int main(int argc, char *argv[]) { + char buffer[BUFFER_SIZE]; + // Zero it out so we can check later that it's at least not all 0s. + memset(buffer, 0, BUFFER_SIZE); + bool buffer_contains_all_zeros = true; + + // Don't bother passing any flags. (If we're using lss, we might not have the + // right header files with the flags defined anyway, and we'd have to copy + // this in here too, and risk getting out of sync in yet another way.) + const ssize_t r = sys_getrandom(buffer, BUFFER_SIZE, 0); + + // Make sure it either worked, or that it's just not supported. + assert(r == BUFFER_SIZE || errno == ENOSYS); + + if (r == BUFFER_SIZE) { + // If all the bytes are 0, it didn't really work. + for (size_t i = 0; i < BUFFER_SIZE; ++i) { + if (buffer[i] != 0) { + buffer_contains_all_zeros = false; + } + } + assert(!buffer_contains_all_zeros); + } + + return 0; +} diff --git a/tests/lstat.c b/tests/lstat.c new file mode 100644 index 000000000000..33848a9817fe --- /dev/null +++ b/tests/lstat.c @@ -0,0 +1,97 @@ +/* Copyright 2021, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test_skel.h" + +int main(int argc, char *argv[]) { + int exit_status = 0; + + // Get two unique paths to play with. + char foo[] = "tempfile.XXXXXX"; + char bar[] = "tempfile.XXXXXX"; + int fd_foo = mkstemp(foo); + int fd_bar = mkstemp(bar); + assert(fd_foo != -1); + assert(fd_bar != -1); + + // Then delete foo. + assert(sys_unlink(foo) == 0); + + // Now make foo a symlink to bar. + assert(symlink(bar, foo) == 0); + + // Make sure sys_stat() and sys_lstat() implementation return different + // information. + + // We need to check our stat syscalls for EOVERFLOW, as sometimes the integer + // types used in the stat structures are too small to fit the actual value. + // E.g. on some systems st_ino is 32-bit, but some filesystems have 64-bit + // inodes. + + struct kernel_stat lstat_info; + int rc = sys_lstat(foo, &lstat_info); + if (rc < 0 && errno == EOVERFLOW) { + // Bail out since we had an overflow in the stat structure. + exit_status = SKIP_TEST_EXIT_STATUS; + goto cleanup; + } + assert(rc == 0); + + struct kernel_stat stat_info; + rc = sys_stat(foo, &stat_info); + if (rc < 0 && errno == EOVERFLOW) { + // Bail out since we had an overflow in the stat structure. + exit_status = SKIP_TEST_EXIT_STATUS; + goto cleanup; + } + assert(rc == 0); + + struct kernel_stat bar_stat_info; + rc = sys_stat(bar, &bar_stat_info); + if (rc < 0 && errno == EOVERFLOW) { + // Bail out since we had an overflow in the stat structure. + exit_status = SKIP_TEST_EXIT_STATUS; + goto cleanup; + } + assert(rc == 0); + + // lstat should produce information about a symlink. + assert((lstat_info.st_mode & S_IFMT) == S_IFLNK); + + // stat-ing foo and bar should produce the same inode. + assert(stat_info.st_ino == bar_stat_info.st_ino); + + // lstat-ing foo should give a different inode than stat-ing foo. + assert(stat_info.st_ino != lstat_info.st_ino); + +cleanup: + sys_unlink(foo); + sys_unlink(bar); + return exit_status; +} diff --git a/tests/setitimer.c b/tests/setitimer.c new file mode 100644 index 000000000000..1bd71141de12 --- /dev/null +++ b/tests/setitimer.c @@ -0,0 +1,90 @@ +/* Copyright 2022, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test_skel.h" + +int main(int argc, char *argv[]) { + // We need an invalid timer value. The assert()'s below should + // be static asserts but it is not avalible in older C versions. +#define kInvalidTimer 9999 + assert(kInvalidTimer != ITIMER_REAL); + assert(kInvalidTimer != ITIMER_VIRTUAL); + assert(kInvalidTimer != ITIMER_PROF); + + // Invalid timer returns EINVAL. + assert(sys_setitimer(kInvalidTimer, NULL, NULL) == -1); + assert(errno == EINVAL); + + const int kSignal = SIGALRM; + const size_t kSigsetSize = sizeof(struct kernel_sigset_t); + + // Block SIGALRM. + struct kernel_sigset_t sigalarm_only; + struct kernel_sigset_t old_sigset; + assert(sys_sigemptyset(&sigalarm_only) == 0); + assert(sys_sigaddset(&sigalarm_only, kSignal) == 0); + assert(sys_rt_sigprocmask(SIG_BLOCK, &sigalarm_only, &old_sigset, + kSigsetSize) == 0); + + // Set up a real time timer. + struct kernel_itimerval new_itimer = {}; + const long kIntervalUSec = 123; + new_itimer.it_interval.tv_sec = 0; + new_itimer.it_interval.tv_usec = kIntervalUSec; + new_itimer.it_value = new_itimer.it_interval; + assert(sys_setitimer(ITIMER_REAL, &new_itimer, NULL) == 0); + + // Wait for alarm. + struct timespec timeout; + const unsigned long kNanoSecsPerSec = 1000000000; + const unsigned long kNanoSecsPerMicroSec = 1000; + + // Use a timeout 3 times of the timer interval. + unsigned long duration_ns = kIntervalUSec * kNanoSecsPerMicroSec * 3; + timeout.tv_sec = duration_ns / kNanoSecsPerSec ; + timeout.tv_nsec = duration_ns % kNanoSecsPerSec; + + int sig; + do { + sig = sys_sigtimedwait(&sigalarm_only, NULL, &timeout); + } while (sig == -1 && errno == EINTR); + assert(sig == kSignal); + + // Disable timer, check saving of old timer value. + struct kernel_itimerval empty_itimer = {}; + struct kernel_itimerval old_itimer; + empty_itimer.it_interval.tv_sec = 0; + empty_itimer.it_interval.tv_usec = 0; + empty_itimer.it_value = empty_itimer.it_interval; + + assert(sys_setitimer(ITIMER_REAL, &empty_itimer, &old_itimer) == 0); + assert(kernel_timeval_eq(&old_itimer.it_interval, &new_itimer.it_interval)); + + return 0; +} diff --git a/tests/sigaction.c b/tests/sigaction.c new file mode 100644 index 000000000000..f247aca7c87b --- /dev/null +++ b/tests/sigaction.c @@ -0,0 +1,54 @@ +/* Copyright 2020, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test_skel.h" + +void test_handler(int sig) {} + +int main(int argc, char *argv[]) { + const size_t kSigsetSize = sizeof(struct kernel_sigset_t); + struct kernel_sigaction action = {}; + // Invalid signal returns EINVAL. + assert(sys_rt_sigaction(SIGKILL, &action, NULL, kSigsetSize) == -1); + assert(errno == EINVAL); + + // Set an action. + action.sa_handler_ = test_handler; + action.sa_flags = SA_SIGINFO; + assert(sys_sigemptyset(&action.sa_mask) == 0); + assert(sys_sigaddset(&action.sa_mask, SIGPIPE) == 0); + assert(sys_rt_sigaction(SIGSEGV, &action, NULL, kSigsetSize) == 0); + + // Retrieve the action. + struct kernel_sigaction old_action = {}; + assert(sys_rt_sigaction(SIGSEGV, NULL, &old_action, kSigsetSize) == 0); + assert(memcmp(&action, &old_action, sizeof(action)) == 0); + + return 0; +} diff --git a/tests/sigtimedwait.c b/tests/sigtimedwait.c new file mode 100644 index 000000000000..ee2f740fe4e4 --- /dev/null +++ b/tests/sigtimedwait.c @@ -0,0 +1,58 @@ +/* Copyright 2019, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test_skel.h" + +int main(int argc, char *argv[]) { + + struct kernel_sigset_t sigset = {}; + siginfo_t siginfo = {}; + struct timespec timeout = {}; + + // Invalid timeouts. + timeout.tv_sec = -1; + assert(sys_sigtimedwait(&sigset, &siginfo, &timeout) == -1); + assert(errno == EINVAL); + + // Expired timeouts. + timeout.tv_sec = 0; + assert(sys_sigtimedwait(&sigset, &siginfo, &timeout) == -1); + assert(errno == EAGAIN); + + // Success. + const int kTestSignal = SIGCONT; + assert(sys_sigemptyset(&sigset) == 0); + assert(sys_sigaddset(&sigset, kTestSignal) == 0); + assert(sys_sigprocmask(SIG_BLOCK, &sigset, NULL) == 0); + assert(raise(kTestSignal) == 0); + assert(sys_sigtimedwait(&sigset, &siginfo, &timeout) == kTestSignal); + assert(siginfo.si_signo == kTestSignal); + + return 0; +} diff --git a/tests/stat.c b/tests/stat.c new file mode 100644 index 000000000000..48cac620cbcd --- /dev/null +++ b/tests/stat.c @@ -0,0 +1,67 @@ +/* Copyright 2021, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test_skel.h" + +int main(int argc, char *argv[]) { + int exit_status = 0; + + // Get two unique paths to play with. + char foo[] = "tempfile.XXXXXX"; + int fd_foo = mkstemp(foo); + assert(fd_foo != -1); + + // Make sure it exists. + assert(access(foo, F_OK) == 0); + + // Make sure sys_stat() and a libc stat() implementation return the same + // information. + struct stat libc_stat; + assert(stat(foo, &libc_stat) == 0); + + struct kernel_stat raw_stat; + // We need to check our stat syscall for EOVERFLOW, as sometimes the integer + // types used in the stat structures are too small to fit the actual value. + // E.g. on some systems st_ino is 32-bit, but some filesystems have 64-bit + // inodes. + int rc = sys_stat(foo, &raw_stat); + if (rc < 0 && errno == EOVERFLOW) { + // Bail out since we had an overflow in the stat structure. + exit_status = SKIP_TEST_EXIT_STATUS; + goto cleanup; + } + assert(rc == 0); + + assert(libc_stat.st_ino == raw_stat.st_ino); + + +cleanup: + sys_unlink(foo); + return exit_status; +} diff --git a/tests/test_skel.h b/tests/test_skel.h new file mode 100644 index 000000000000..024906cc9b55 --- /dev/null +++ b/tests/test_skel.h @@ -0,0 +1,89 @@ +/* Copyright 2018, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Make sure it's defined before including anything else. A number of syscalls + * are GNU extensions and rely on being exported by glibc. + */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +/* + * Make sure the assert checks aren't removed as all the unittests are based + * on them. + */ +#undef NDEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "linux_syscall_support.h" + +#define SKIP_TEST_EXIT_STATUS 77 + +void assert_buffers_eq_len(const void *buf1, const void *buf2, size_t len) { + const uint8_t *u8_1 = (const uint8_t *)buf1; + const uint8_t *u8_2 = (const uint8_t *)buf2; + size_t i; + + for (i = 0; i < len; ++i) { + if (u8_1[i] != u8_2[i]) + printf("offset %zu: %02x != %02x\n", i, u8_1[i], u8_2[i]); + } +} +#define assert_buffers_eq(obj1, obj2) assert_buffers_eq_len(obj1, obj2, sizeof(*obj1)) + +// Returns true iff pointed timevals are equal. +static inline bool kernel_timeval_eq(const struct kernel_timeval* lhs, + const struct kernel_timeval* rhs) { + return (lhs->tv_sec == rhs->tv_sec) && (lhs->tv_usec == rhs->tv_usec); +} + +// Returns true iff pointed itimervals are equal. +static inline bool kernel_itimerval_eq(const struct kernel_itimerval* lhs, + const struct kernel_itimerval* rhs) { + return kernel_timeval_eq(&lhs->it_interval, &rhs->it_interval) && + kernel_timeval_eq(&lhs->it_value, &rhs->it_value); +} + diff --git a/tests/unlink.c b/tests/unlink.c new file mode 100644 index 000000000000..70c8bc9a5e5c --- /dev/null +++ b/tests/unlink.c @@ -0,0 +1,48 @@ +/* Copyright 2018, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test_skel.h" + +int main(int argc, char *argv[]) { + // Get a unique path to play with. + char foo[] = "tempfile.XXXXXX"; + int fd = mkstemp(foo); + assert(fd != -1); + + // Make sure it exists. + assert(access(foo, F_OK) == 0); + + // Then delete it. + assert(sys_unlink(foo) == 0); + + // Make sure it's gone. + assert(access(foo, F_OK) != 0); + + return 0; +} From 718c97d2317444f56c3616a7716883f0af54b3d7 Mon Sep 17 00:00:00 2001 From: "Chrome Release Bot (LUCI)" Date: Thu, 21 Mar 2024 17:19:53 +0000 Subject: [PATCH 2/2] Publish DEPS for 114.0.5735.358 git-subtree-dir: third_party/ply git-subtree-split: 1759c6ae9316996b9f150c0ce9d0ca78a3d15c02 --- DIR_METADATA | 3 + LICENSE | 30 + OWNERS | 3 + README.chromium | 25 + README.md | 275 ++++ __init__.py | 37 + lex.py | 1098 +++++++++++++++ license.patch | 42 + ply.gni | 5 + yacc.py | 3502 +++++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 5020 insertions(+) create mode 100644 DIR_METADATA create mode 100644 LICENSE create mode 100644 OWNERS create mode 100644 README.chromium create mode 100644 README.md create mode 100644 __init__.py create mode 100644 lex.py create mode 100644 license.patch create mode 100644 ply.gni create mode 100644 yacc.py diff --git a/DIR_METADATA b/DIR_METADATA new file mode 100644 index 000000000000..983ea0140f45 --- /dev/null +++ b/DIR_METADATA @@ -0,0 +1,3 @@ +monorail { + component: "Tools" +} diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000000..b2738663454b --- /dev/null +++ b/LICENSE @@ -0,0 +1,30 @@ +PLY (Python Lex-Yacc) Version 3.4 + +Copyright (C) 2001-2011, +David M. Beazley (Dabeaz LLC) +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +* Neither the name of the David Beazley or Dabeaz LLC may be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/OWNERS b/OWNERS new file mode 100644 index 000000000000..93a641dd7a3e --- /dev/null +++ b/OWNERS @@ -0,0 +1,3 @@ +rockot@google.com + +file://tools/idl_parser/OWNERS diff --git a/README.chromium b/README.chromium new file mode 100644 index 000000000000..45e10b87b6b6 --- /dev/null +++ b/README.chromium @@ -0,0 +1,25 @@ +Name: PLY (Python Lex-Yacc) +Current version: 3.11 +URL: http://www.dabeaz.com/ply/ply-3.11.tar.gz +License: BSD +License File: LICENSE +Security Critical: no +Version: 3.11 + +PLY is used by (at least) the Mojo python bindings, the PPAPI +IDL generator, and the Blink IDL generator. + +This directory contains a copy of these ply-3.11 components: + +README ply-3.11/README.md +Sources ply-3.11/ply/__init__.py + ply-3.11/ply/lex.py + ply-3.11/ply/yacc.py + + +The license is in LICENSE. + +Modifications made: +- Added the file README.chromium (this file) +- Applies license.patch +- Added ply.gni to list sources diff --git a/README.md b/README.md new file mode 100644 index 000000000000..05df32a5b9fc --- /dev/null +++ b/README.md @@ -0,0 +1,275 @@ +# PLY (Python Lex-Yacc) Version 3.11 + +[![Build Status](https://travis-ci.org/dabeaz/ply.svg?branch=master)](https://travis-ci.org/dabeaz/ply) + +Copyright (C) 2001-2018 +David M. Beazley (Dabeaz LLC) +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +* Neither the name of the David Beazley or Dabeaz LLC may be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Introduction +============ + +PLY is a 100% Python implementation of the common parsing tools lex +and yacc. Here are a few highlights: + + - PLY is very closely modeled after traditional lex/yacc. + If you know how to use these tools in C, you will find PLY + to be similar. + + - PLY provides *very* extensive error reporting and diagnostic + information to assist in parser construction. The original + implementation was developed for instructional purposes. As + a result, the system tries to identify the most common types + of errors made by novice users. + + - PLY provides full support for empty productions, error recovery, + precedence specifiers, and moderately ambiguous grammars. + + - Parsing is based on LR-parsing which is fast, memory efficient, + better suited to large grammars, and which has a number of nice + properties when dealing with syntax errors and other parsing problems. + Currently, PLY builds its parsing tables using the LALR(1) + algorithm used in yacc. + + - PLY uses Python introspection features to build lexers and parsers. + This greatly simplifies the task of parser construction since it reduces + the number of files and eliminates the need to run a separate lex/yacc + tool before running your program. + + - PLY can be used to build parsers for "real" programming languages. + Although it is not ultra-fast due to its Python implementation, + PLY can be used to parse grammars consisting of several hundred + rules (as might be found for a language like C). The lexer and LR + parser are also reasonably efficient when parsing typically + sized programs. People have used PLY to build parsers for + C, C++, ADA, and other real programming languages. + +How to Use +========== + +PLY consists of two files : lex.py and yacc.py. These are contained +within the 'ply' directory which may also be used as a Python package. +To use PLY, simply copy the 'ply' directory to your project and import +lex and yacc from the associated 'ply' package. For example: + + import ply.lex as lex + import ply.yacc as yacc + +Alternatively, you can copy just the files lex.py and yacc.py +individually and use them as modules. For example: + + import lex + import yacc + +The file setup.py can be used to install ply using distutils. + +The file doc/ply.html contains complete documentation on how to use +the system. + +The example directory contains several different examples including a +PLY specification for ANSI C as given in K&R 2nd Ed. + +A simple example is found at the end of this document + +Requirements +============ +PLY requires the use of Python 2.6 or greater. However, you should +use the latest Python release if possible. It should work on just +about any platform. PLY has been tested with both CPython and Jython. +It also seems to work with IronPython. + +Resources +========= +More information about PLY can be obtained on the PLY webpage at: + + http://www.dabeaz.com/ply + +For a detailed overview of parsing theory, consult the excellent +book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and +Ullman. The topics found in "Lex & Yacc" by Levine, Mason, and Brown +may also be useful. + +The GitHub page for PLY can be found at: + + https://github.com/dabeaz/ply + +An old and relatively inactive discussion group for PLY is found at: + + http://groups.google.com/group/ply-hack + +Acknowledgments +=============== +A special thanks is in order for all of the students in CS326 who +suffered through about 25 different versions of these tools :-). + +The CHANGES file acknowledges those who have contributed patches. + +Elias Ioup did the first implementation of LALR(1) parsing in PLY-1.x. +Andrew Waters and Markus Schoepflin were instrumental in reporting bugs +and testing a revised LALR(1) implementation for PLY-2.0. + +Special Note for PLY-3.0 +======================== +PLY-3.0 the first PLY release to support Python 3. However, backwards +compatibility with Python 2.6 is still preserved. PLY provides dual +Python 2/3 compatibility by restricting its implementation to a common +subset of basic language features. You should not convert PLY using +2to3--it is not necessary and may in fact break the implementation. + +Example +======= + +Here is a simple example showing a PLY implementation of a calculator +with variables. + + # ----------------------------------------------------------------------------- + # calc.py + # + # A simple calculator with variables. + # ----------------------------------------------------------------------------- + + tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + + # Tokens + + t_PLUS = r'\+' + t_MINUS = r'-' + t_TIMES = r'\*' + t_DIVIDE = r'/' + t_EQUALS = r'=' + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + + def t_NUMBER(t): + r'\d+' + t.value = int(t.value) + return t + + # Ignored characters + t_ignore = " \t" + + def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + + def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + + # Build the lexer + import ply.lex as lex + lex.lex() + + # Precedence rules for the arithmetic operators + precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + + # dictionary of names (for storing variables) + names = { } + + def p_statement_assign(p): + 'statement : NAME EQUALS expression' + names[p[1]] = p[3] + + def p_statement_expr(p): + 'statement : expression' + print(p[1]) + + def p_expression_binop(p): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if p[2] == '+' : p[0] = p[1] + p[3] + elif p[2] == '-': p[0] = p[1] - p[3] + elif p[2] == '*': p[0] = p[1] * p[3] + elif p[2] == '/': p[0] = p[1] / p[3] + + def p_expression_uminus(p): + 'expression : MINUS expression %prec UMINUS' + p[0] = -p[2] + + def p_expression_group(p): + 'expression : LPAREN expression RPAREN' + p[0] = p[2] + + def p_expression_number(p): + 'expression : NUMBER' + p[0] = p[1] + + def p_expression_name(p): + 'expression : NAME' + try: + p[0] = names[p[1]] + except LookupError: + print("Undefined name '%s'" % p[1]) + p[0] = 0 + + def p_error(p): + print("Syntax error at '%s'" % p.value) + + import ply.yacc as yacc + yacc.yacc() + + while True: + try: + s = raw_input('calc > ') # use input() on Python 3 + except EOFError: + break + yacc.parse(s) + + +Bug Reports and Patches +======================= +My goal with PLY is to simply have a decent lex/yacc implementation +for Python. As a general rule, I don't spend huge amounts of time +working on it unless I receive very specific bug reports and/or +patches to fix problems. I also try to incorporate submitted feature +requests and enhancements into each new version. Please visit the PLY +github page at https://github.com/dabeaz/ply to submit issues and pull +requests. To contact me about bugs and/or new features, please send +email to dave@dabeaz.com. + +-- Dave + + + + + + + + + diff --git a/__init__.py b/__init__.py new file mode 100644 index 000000000000..e48880f50fbc --- /dev/null +++ b/__init__.py @@ -0,0 +1,37 @@ +# PLY package +# Author: David Beazley (dave@dabeaz.com) +# ----------------------------------------------------------------------------- +# ply: __init__.py +# +# Copyright (C) 2001-2018 +# David M. Beazley (Dabeaz LLC) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of the David Beazley or Dabeaz LLC may be used to +# endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- + +__version__ = '3.11' +__all__ = ['lex','yacc'] diff --git a/lex.py b/lex.py new file mode 100644 index 000000000000..f95bcdbf1bb5 --- /dev/null +++ b/lex.py @@ -0,0 +1,1098 @@ +# ----------------------------------------------------------------------------- +# ply: lex.py +# +# Copyright (C) 2001-2018 +# David M. Beazley (Dabeaz LLC) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of the David Beazley or Dabeaz LLC may be used to +# endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- + +__version__ = '3.11' +__tabversion__ = '3.10' + +import re +import sys +import types +import copy +import os +import inspect + +# This tuple contains known string types +try: + # Python 2.6 + StringTypes = (types.StringType, types.UnicodeType) +except AttributeError: + # Python 3.0 + StringTypes = (str, bytes) + +# This regular expression is used to match valid token names +_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') + +# Exception thrown when invalid token encountered and no default error +# handler is defined. +class LexError(Exception): + def __init__(self, message, s): + self.args = (message,) + self.text = s + + +# Token class. This class is used to represent the tokens produced. +class LexToken(object): + def __str__(self): + return 'LexToken(%s,%r,%d,%d)' % (self.type, self.value, self.lineno, self.lexpos) + + def __repr__(self): + return str(self) + + +# This object is a stand-in for a logging object created by the +# logging module. + +class PlyLogger(object): + def __init__(self, f): + self.f = f + + def critical(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') + + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') + + info = critical + debug = critical + + +# Null logger is used when no output is generated. Does nothing. +class NullLogger(object): + def __getattribute__(self, name): + return self + + def __call__(self, *args, **kwargs): + return self + + +# ----------------------------------------------------------------------------- +# === Lexing Engine === +# +# The following Lexer class implements the lexer runtime. There are only +# a few public methods and attributes: +# +# input() - Store a new string in the lexer +# token() - Get the next token +# clone() - Clone the lexer +# +# lineno - Current line number +# lexpos - Current position in the input string +# ----------------------------------------------------------------------------- + +class Lexer: + def __init__(self): + self.lexre = None # Master regular expression. This is a list of + # tuples (re, findex) where re is a compiled + # regular expression and findex is a list + # mapping regex group numbers to rules + self.lexretext = None # Current regular expression strings + self.lexstatere = {} # Dictionary mapping lexer states to master regexs + self.lexstateretext = {} # Dictionary mapping lexer states to regex strings + self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names + self.lexstate = 'INITIAL' # Current lexer state + self.lexstatestack = [] # Stack of lexer states + self.lexstateinfo = None # State information + self.lexstateignore = {} # Dictionary of ignored characters for each state + self.lexstateerrorf = {} # Dictionary of error functions for each state + self.lexstateeoff = {} # Dictionary of eof functions for each state + self.lexreflags = 0 # Optional re compile flags + self.lexdata = None # Actual input data (as a string) + self.lexpos = 0 # Current position in input text + self.lexlen = 0 # Length of the input text + self.lexerrorf = None # Error rule (if any) + self.lexeoff = None # EOF rule (if any) + self.lextokens = None # List of valid tokens + self.lexignore = '' # Ignored characters + self.lexliterals = '' # Literal characters that can be passed through + self.lexmodule = None # Module + self.lineno = 1 # Current line number + self.lexoptimize = False # Optimized mode + + def clone(self, object=None): + c = copy.copy(self) + + # If the object parameter has been supplied, it means we are attaching the + # lexer to a new object. In this case, we have to rebind all methods in + # the lexstatere and lexstateerrorf tables. + + if object: + newtab = {} + for key, ritem in self.lexstatere.items(): + newre = [] + for cre, findex in ritem: + newfindex = [] + for f in findex: + if not f or not f[0]: + newfindex.append(f) + continue + newfindex.append((getattr(object, f[0].__name__), f[1])) + newre.append((cre, newfindex)) + newtab[key] = newre + c.lexstatere = newtab + c.lexstateerrorf = {} + for key, ef in self.lexstateerrorf.items(): + c.lexstateerrorf[key] = getattr(object, ef.__name__) + c.lexmodule = object + return c + + # ------------------------------------------------------------ + # writetab() - Write lexer information to a table file + # ------------------------------------------------------------ + def writetab(self, lextab, outputdir=''): + if isinstance(lextab, types.ModuleType): + raise IOError("Won't overwrite existing lextab module") + basetabmodule = lextab.split('.')[-1] + filename = os.path.join(outputdir, basetabmodule) + '.py' + with open(filename, 'w') as tf: + tf.write('# %s.py. This file automatically created by PLY (version %s). Don\'t edit!\n' % (basetabmodule, __version__)) + tf.write('_tabversion = %s\n' % repr(__tabversion__)) + tf.write('_lextokens = set(%s)\n' % repr(tuple(sorted(self.lextokens)))) + tf.write('_lexreflags = %s\n' % repr(int(self.lexreflags))) + tf.write('_lexliterals = %s\n' % repr(self.lexliterals)) + tf.write('_lexstateinfo = %s\n' % repr(self.lexstateinfo)) + + # Rewrite the lexstatere table, replacing function objects with function names + tabre = {} + for statename, lre in self.lexstatere.items(): + titem = [] + for (pat, func), retext, renames in zip(lre, self.lexstateretext[statename], self.lexstaterenames[statename]): + titem.append((retext, _funcs_to_names(func, renames))) + tabre[statename] = titem + + tf.write('_lexstatere = %s\n' % repr(tabre)) + tf.write('_lexstateignore = %s\n' % repr(self.lexstateignore)) + + taberr = {} + for statename, ef in self.lexstateerrorf.items(): + taberr[statename] = ef.__name__ if ef else None + tf.write('_lexstateerrorf = %s\n' % repr(taberr)) + + tabeof = {} + for statename, ef in self.lexstateeoff.items(): + tabeof[statename] = ef.__name__ if ef else None + tf.write('_lexstateeoff = %s\n' % repr(tabeof)) + + # ------------------------------------------------------------ + # readtab() - Read lexer information from a tab file + # ------------------------------------------------------------ + def readtab(self, tabfile, fdict): + if isinstance(tabfile, types.ModuleType): + lextab = tabfile + else: + exec('import %s' % tabfile) + lextab = sys.modules[tabfile] + + if getattr(lextab, '_tabversion', '0.0') != __tabversion__: + raise ImportError('Inconsistent PLY version') + + self.lextokens = lextab._lextokens + self.lexreflags = lextab._lexreflags + self.lexliterals = lextab._lexliterals + self.lextokens_all = self.lextokens | set(self.lexliterals) + self.lexstateinfo = lextab._lexstateinfo + self.lexstateignore = lextab._lexstateignore + self.lexstatere = {} + self.lexstateretext = {} + for statename, lre in lextab._lexstatere.items(): + titem = [] + txtitem = [] + for pat, func_name in lre: + titem.append((re.compile(pat, lextab._lexreflags), _names_to_funcs(func_name, fdict))) + + self.lexstatere[statename] = titem + self.lexstateretext[statename] = txtitem + + self.lexstateerrorf = {} + for statename, ef in lextab._lexstateerrorf.items(): + self.lexstateerrorf[statename] = fdict[ef] + + self.lexstateeoff = {} + for statename, ef in lextab._lexstateeoff.items(): + self.lexstateeoff[statename] = fdict[ef] + + self.begin('INITIAL') + + # ------------------------------------------------------------ + # input() - Push a new string into the lexer + # ------------------------------------------------------------ + def input(self, s): + # Pull off the first character to see if s looks like a string + c = s[:1] + if not isinstance(c, StringTypes): + raise ValueError('Expected a string') + self.lexdata = s + self.lexpos = 0 + self.lexlen = len(s) + + # ------------------------------------------------------------ + # begin() - Changes the lexing state + # ------------------------------------------------------------ + def begin(self, state): + if state not in self.lexstatere: + raise ValueError('Undefined state') + self.lexre = self.lexstatere[state] + self.lexretext = self.lexstateretext[state] + self.lexignore = self.lexstateignore.get(state, '') + self.lexerrorf = self.lexstateerrorf.get(state, None) + self.lexeoff = self.lexstateeoff.get(state, None) + self.lexstate = state + + # ------------------------------------------------------------ + # push_state() - Changes the lexing state and saves old on stack + # ------------------------------------------------------------ + def push_state(self, state): + self.lexstatestack.append(self.lexstate) + self.begin(state) + + # ------------------------------------------------------------ + # pop_state() - Restores the previous state + # ------------------------------------------------------------ + def pop_state(self): + self.begin(self.lexstatestack.pop()) + + # ------------------------------------------------------------ + # current_state() - Returns the current lexing state + # ------------------------------------------------------------ + def current_state(self): + return self.lexstate + + # ------------------------------------------------------------ + # skip() - Skip ahead n characters + # ------------------------------------------------------------ + def skip(self, n): + self.lexpos += n + + # ------------------------------------------------------------ + # opttoken() - Return the next token from the Lexer + # + # Note: This function has been carefully implemented to be as fast + # as possible. Don't make changes unless you really know what + # you are doing + # ------------------------------------------------------------ + def token(self): + # Make local copies of frequently referenced attributes + lexpos = self.lexpos + lexlen = self.lexlen + lexignore = self.lexignore + lexdata = self.lexdata + + while lexpos < lexlen: + # This code provides some short-circuit code for whitespace, tabs, and other ignored characters + if lexdata[lexpos] in lexignore: + lexpos += 1 + continue + + # Look for a regular expression match + for lexre, lexindexfunc in self.lexre: + m = lexre.match(lexdata, lexpos) + if not m: + continue + + # Create a token for return + tok = LexToken() + tok.value = m.group() + tok.lineno = self.lineno + tok.lexpos = lexpos + + i = m.lastindex + func, tok.type = lexindexfunc[i] + + if not func: + # If no token type was set, it's an ignored token + if tok.type: + self.lexpos = m.end() + return tok + else: + lexpos = m.end() + break + + lexpos = m.end() + + # If token is processed by a function, call it + + tok.lexer = self # Set additional attributes useful in token rules + self.lexmatch = m + self.lexpos = lexpos + + newtok = func(tok) + + # Every function must return a token, if nothing, we just move to next token + if not newtok: + lexpos = self.lexpos # This is here in case user has updated lexpos. + lexignore = self.lexignore # This is here in case there was a state change + break + + # Verify type of the token. If not in the token map, raise an error + if not self.lexoptimize: + if newtok.type not in self.lextokens_all: + raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( + func.__code__.co_filename, func.__code__.co_firstlineno, + func.__name__, newtok.type), lexdata[lexpos:]) + + return newtok + else: + # No match, see if in literals + if lexdata[lexpos] in self.lexliterals: + tok = LexToken() + tok.value = lexdata[lexpos] + tok.lineno = self.lineno + tok.type = tok.value + tok.lexpos = lexpos + self.lexpos = lexpos + 1 + return tok + + # No match. Call t_error() if defined. + if self.lexerrorf: + tok = LexToken() + tok.value = self.lexdata[lexpos:] + tok.lineno = self.lineno + tok.type = 'error' + tok.lexer = self + tok.lexpos = lexpos + self.lexpos = lexpos + newtok = self.lexerrorf(tok) + if lexpos == self.lexpos: + # Error method didn't change text position at all. This is an error. + raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) + lexpos = self.lexpos + if not newtok: + continue + return newtok + + self.lexpos = lexpos + raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos], lexpos), lexdata[lexpos:]) + + if self.lexeoff: + tok = LexToken() + tok.type = 'eof' + tok.value = '' + tok.lineno = self.lineno + tok.lexpos = lexpos + tok.lexer = self + self.lexpos = lexpos + newtok = self.lexeoff(tok) + return newtok + + self.lexpos = lexpos + 1 + if self.lexdata is None: + raise RuntimeError('No input string given with input()') + return None + + # Iterator interface + def __iter__(self): + return self + + def next(self): + t = self.token() + if t is None: + raise StopIteration + return t + + __next__ = next + +# ----------------------------------------------------------------------------- +# ==== Lex Builder === +# +# The functions and classes below are used to collect lexing information +# and build a Lexer object from it. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# _get_regex(func) +# +# Returns the regular expression assigned to a function either as a doc string +# or as a .regex attribute attached by the @TOKEN decorator. +# ----------------------------------------------------------------------------- +def _get_regex(func): + return getattr(func, 'regex', func.__doc__) + +# ----------------------------------------------------------------------------- +# get_caller_module_dict() +# +# This function returns a dictionary containing all of the symbols defined within +# a caller further down the call stack. This is used to get the environment +# associated with the yacc() call if none was provided. +# ----------------------------------------------------------------------------- +def get_caller_module_dict(levels): + f = sys._getframe(levels) + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + return ldict + +# ----------------------------------------------------------------------------- +# _funcs_to_names() +# +# Given a list of regular expression functions, this converts it to a list +# suitable for output to a table file +# ----------------------------------------------------------------------------- +def _funcs_to_names(funclist, namelist): + result = [] + for f, name in zip(funclist, namelist): + if f and f[0]: + result.append((name, f[1])) + else: + result.append(f) + return result + +# ----------------------------------------------------------------------------- +# _names_to_funcs() +# +# Given a list of regular expression function names, this converts it back to +# functions. +# ----------------------------------------------------------------------------- +def _names_to_funcs(namelist, fdict): + result = [] + for n in namelist: + if n and n[0]: + result.append((fdict[n[0]], n[1])) + else: + result.append(n) + return result + +# ----------------------------------------------------------------------------- +# _form_master_re() +# +# This function takes a list of all of the regex components and attempts to +# form the master regular expression. Given limitations in the Python re +# module, it may be necessary to break the master regex into separate expressions. +# ----------------------------------------------------------------------------- +def _form_master_re(relist, reflags, ldict, toknames): + if not relist: + return [] + regex = '|'.join(relist) + try: + lexre = re.compile(regex, reflags) + + # Build the index to function map for the matching engine + lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) + lexindexnames = lexindexfunc[:] + + for f, i in lexre.groupindex.items(): + handle = ldict.get(f, None) + if type(handle) in (types.FunctionType, types.MethodType): + lexindexfunc[i] = (handle, toknames[f]) + lexindexnames[i] = f + elif handle is not None: + lexindexnames[i] = f + if f.find('ignore_') > 0: + lexindexfunc[i] = (None, None) + else: + lexindexfunc[i] = (None, toknames[f]) + + return [(lexre, lexindexfunc)], [regex], [lexindexnames] + except Exception: + m = int(len(relist)/2) + if m == 0: + m = 1 + llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames) + rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) + return (llist+rlist), (lre+rre), (lnames+rnames) + +# ----------------------------------------------------------------------------- +# def _statetoken(s,names) +# +# Given a declaration name s of the form "t_" and a dictionary whose keys are +# state names, this function returns a tuple (states,tokenname) where states +# is a tuple of state names and tokenname is the name of the token. For example, +# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') +# ----------------------------------------------------------------------------- +def _statetoken(s, names): + parts = s.split('_') + for i, part in enumerate(parts[1:], 1): + if part not in names and part != 'ANY': + break + + if i > 1: + states = tuple(parts[1:i]) + else: + states = ('INITIAL',) + + if 'ANY' in states: + states = tuple(names) + + tokenname = '_'.join(parts[i:]) + return (states, tokenname) + + +# ----------------------------------------------------------------------------- +# LexerReflect() +# +# This class represents information needed to build a lexer as extracted from a +# user's input file. +# ----------------------------------------------------------------------------- +class LexerReflect(object): + def __init__(self, ldict, log=None, reflags=0): + self.ldict = ldict + self.error_func = None + self.tokens = [] + self.reflags = reflags + self.stateinfo = {'INITIAL': 'inclusive'} + self.modules = set() + self.error = False + self.log = PlyLogger(sys.stderr) if log is None else log + + # Get all of the basic information + def get_all(self): + self.get_tokens() + self.get_literals() + self.get_states() + self.get_rules() + + # Validate all of the information + def validate_all(self): + self.validate_tokens() + self.validate_literals() + self.validate_rules() + return self.error + + # Get the tokens map + def get_tokens(self): + tokens = self.ldict.get('tokens', None) + if not tokens: + self.log.error('No token list is defined') + self.error = True + return + + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True + return + + if not tokens: + self.log.error('tokens is empty') + self.error = True + return + + self.tokens = tokens + + # Validate the tokens + def validate_tokens(self): + terminals = {} + for n in self.tokens: + if not _is_identifier.match(n): + self.log.error("Bad token name '%s'", n) + self.error = True + if n in terminals: + self.log.warning("Token '%s' multiply defined", n) + terminals[n] = 1 + + # Get the literals specifier + def get_literals(self): + self.literals = self.ldict.get('literals', '') + if not self.literals: + self.literals = '' + + # Validate literals + def validate_literals(self): + try: + for c in self.literals: + if not isinstance(c, StringTypes) or len(c) > 1: + self.log.error('Invalid literal %s. Must be a single character', repr(c)) + self.error = True + + except TypeError: + self.log.error('Invalid literals specification. literals must be a sequence of characters') + self.error = True + + def get_states(self): + self.states = self.ldict.get('states', None) + # Build statemap + if self.states: + if not isinstance(self.states, (tuple, list)): + self.log.error('states must be defined as a tuple or list') + self.error = True + else: + for s in self.states: + if not isinstance(s, tuple) or len(s) != 2: + self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')", repr(s)) + self.error = True + continue + name, statetype = s + if not isinstance(name, StringTypes): + self.log.error('State name %s must be a string', repr(name)) + self.error = True + continue + if not (statetype == 'inclusive' or statetype == 'exclusive'): + self.log.error("State type for state %s must be 'inclusive' or 'exclusive'", name) + self.error = True + continue + if name in self.stateinfo: + self.log.error("State '%s' already defined", name) + self.error = True + continue + self.stateinfo[name] = statetype + + # Get all of the symbols with a t_ prefix and sort them into various + # categories (functions, strings, error functions, and ignore characters) + + def get_rules(self): + tsymbols = [f for f in self.ldict if f[:2] == 't_'] + + # Now build up a list of functions and a list of strings + self.toknames = {} # Mapping of symbols to token names + self.funcsym = {} # Symbols defined as functions + self.strsym = {} # Symbols defined as strings + self.ignore = {} # Ignore strings by state + self.errorf = {} # Error functions by state + self.eoff = {} # EOF functions by state + + for s in self.stateinfo: + self.funcsym[s] = [] + self.strsym[s] = [] + + if len(tsymbols) == 0: + self.log.error('No rules of the form t_rulename are defined') + self.error = True + return + + for f in tsymbols: + t = self.ldict[f] + states, tokname = _statetoken(f, self.stateinfo) + self.toknames[f] = tokname + + if hasattr(t, '__call__'): + if tokname == 'error': + for s in states: + self.errorf[s] = t + elif tokname == 'eof': + for s in states: + self.eoff[s] = t + elif tokname == 'ignore': + line = t.__code__.co_firstlineno + file = t.__code__.co_filename + self.log.error("%s:%d: Rule '%s' must be defined as a string", file, line, t.__name__) + self.error = True + else: + for s in states: + self.funcsym[s].append((f, t)) + elif isinstance(t, StringTypes): + if tokname == 'ignore': + for s in states: + self.ignore[s] = t + if '\\' in t: + self.log.warning("%s contains a literal backslash '\\'", f) + + elif tokname == 'error': + self.log.error("Rule '%s' must be defined as a function", f) + self.error = True + else: + for s in states: + self.strsym[s].append((f, t)) + else: + self.log.error('%s not defined as a function or string', f) + self.error = True + + # Sort the functions by line number + for f in self.funcsym.values(): + f.sort(key=lambda x: x[1].__code__.co_firstlineno) + + # Sort the strings by regular expression length + for s in self.strsym.values(): + s.sort(key=lambda x: len(x[1]), reverse=True) + + # Validate all of the t_rules collected + def validate_rules(self): + for state in self.stateinfo: + # Validate all rules defined by functions + + for fname, f in self.funcsym[state]: + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) + + tokname = self.toknames[fname] + if isinstance(f, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + nargs = f.__code__.co_argcount + if nargs > reqargs: + self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) + self.error = True + continue + + if nargs < reqargs: + self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) + self.error = True + continue + + if not _get_regex(f): + self.log.error("%s:%d: No regular expression defined for rule '%s'", file, line, f.__name__) + self.error = True + continue + + try: + c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) + if c.match(''): + self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file, line, f.__name__) + self.error = True + except re.error as e: + self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) + if '#' in _get_regex(f): + self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'", file, line, f.__name__) + self.error = True + + # Validate all rules defined by strings + for name, r in self.strsym[state]: + tokname = self.toknames[name] + if tokname == 'error': + self.log.error("Rule '%s' must be defined as a function", name) + self.error = True + continue + + if tokname not in self.tokens and tokname.find('ignore_') < 0: + self.log.error("Rule '%s' defined for an unspecified token %s", name, tokname) + self.error = True + continue + + try: + c = re.compile('(?P<%s>%s)' % (name, r), self.reflags) + if (c.match('')): + self.log.error("Regular expression for rule '%s' matches empty string", name) + self.error = True + except re.error as e: + self.log.error("Invalid regular expression for rule '%s'. %s", name, e) + if '#' in r: + self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'", name) + self.error = True + + if not self.funcsym[state] and not self.strsym[state]: + self.log.error("No rules defined for state '%s'", state) + self.error = True + + # Validate the error function + efunc = self.errorf.get(state, None) + if efunc: + f = efunc + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) + + if isinstance(f, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + nargs = f.__code__.co_argcount + if nargs > reqargs: + self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) + self.error = True + + if nargs < reqargs: + self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) + self.error = True + + for module in self.modules: + self.validate_module(module) + + # ----------------------------------------------------------------------------- + # validate_module() + # + # This checks to see if there are duplicated t_rulename() functions or strings + # in the parser input file. This is done using a simple regular expression + # match on each line in the source code of the given module. + # ----------------------------------------------------------------------------- + + def validate_module(self, module): + try: + lines, linen = inspect.getsourcelines(module) + except IOError: + return + + fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') + sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') + + counthash = {} + linen += 1 + for line in lines: + m = fre.match(line) + if not m: + m = sre.match(line) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + filename = inspect.getsourcefile(module) + self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev) + self.error = True + linen += 1 + +# ----------------------------------------------------------------------------- +# lex(module) +# +# Build all of the regular expression rules from definitions in the supplied module +# ----------------------------------------------------------------------------- +def lex(module=None, object=None, debug=False, optimize=False, lextab='lextab', + reflags=int(re.VERBOSE), nowarn=False, outputdir=None, debuglog=None, errorlog=None): + + if lextab is None: + lextab = 'lextab' + + global lexer + + ldict = None + stateinfo = {'INITIAL': 'inclusive'} + lexobj = Lexer() + lexobj.lexoptimize = optimize + global token, input + + if errorlog is None: + errorlog = PlyLogger(sys.stderr) + + if debug: + if debuglog is None: + debuglog = PlyLogger(sys.stderr) + + # Get the module dictionary used for the lexer + if object: + module = object + + # Get the module dictionary used for the parser + if module: + _items = [(k, getattr(module, k)) for k in dir(module)] + ldict = dict(_items) + # If no __file__ attribute is available, try to obtain it from the __module__ instead + if '__file__' not in ldict: + ldict['__file__'] = sys.modules[ldict['__module__']].__file__ + else: + ldict = get_caller_module_dict(2) + + # Determine if the module is package of a package or not. + # If so, fix the tabmodule setting so that tables load correctly + pkg = ldict.get('__package__') + if pkg and isinstance(lextab, str): + if '.' not in lextab: + lextab = pkg + '.' + lextab + + # Collect parser information from the dictionary + linfo = LexerReflect(ldict, log=errorlog, reflags=reflags) + linfo.get_all() + if not optimize: + if linfo.validate_all(): + raise SyntaxError("Can't build lexer") + + if optimize and lextab: + try: + lexobj.readtab(lextab, ldict) + token = lexobj.token + input = lexobj.input + lexer = lexobj + return lexobj + + except ImportError: + pass + + # Dump some basic debugging information + if debug: + debuglog.info('lex: tokens = %r', linfo.tokens) + debuglog.info('lex: literals = %r', linfo.literals) + debuglog.info('lex: states = %r', linfo.stateinfo) + + # Build a dictionary of valid token names + lexobj.lextokens = set() + for n in linfo.tokens: + lexobj.lextokens.add(n) + + # Get literals specification + if isinstance(linfo.literals, (list, tuple)): + lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) + else: + lexobj.lexliterals = linfo.literals + + lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals) + + # Get the stateinfo dictionary + stateinfo = linfo.stateinfo + + regexs = {} + # Build the master regular expressions + for state in stateinfo: + regex_list = [] + + # Add rules defined by functions first + for fname, f in linfo.funcsym[state]: + regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) + if debug: + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state) + + # Now add all of the simple rules + for name, r in linfo.strsym[state]: + regex_list.append('(?P<%s>%s)' % (name, r)) + if debug: + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) + + regexs[state] = regex_list + + # Build the master regular expressions + + if debug: + debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====') + + for state in regexs: + lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames) + lexobj.lexstatere[state] = lexre + lexobj.lexstateretext[state] = re_text + lexobj.lexstaterenames[state] = re_names + if debug: + for i, text in enumerate(re_text): + debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) + + # For inclusive states, we need to add the regular expressions from the INITIAL state + for state, stype in stateinfo.items(): + if state != 'INITIAL' and stype == 'inclusive': + lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) + lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) + lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) + + lexobj.lexstateinfo = stateinfo + lexobj.lexre = lexobj.lexstatere['INITIAL'] + lexobj.lexretext = lexobj.lexstateretext['INITIAL'] + lexobj.lexreflags = reflags + + # Set up ignore variables + lexobj.lexstateignore = linfo.ignore + lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '') + + # Set up error functions + lexobj.lexstateerrorf = linfo.errorf + lexobj.lexerrorf = linfo.errorf.get('INITIAL', None) + if not lexobj.lexerrorf: + errorlog.warning('No t_error rule is defined') + + # Set up eof functions + lexobj.lexstateeoff = linfo.eoff + lexobj.lexeoff = linfo.eoff.get('INITIAL', None) + + # Check state information for ignore and error rules + for s, stype in stateinfo.items(): + if stype == 'exclusive': + if s not in linfo.errorf: + errorlog.warning("No error rule is defined for exclusive state '%s'", s) + if s not in linfo.ignore and lexobj.lexignore: + errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) + elif stype == 'inclusive': + if s not in linfo.errorf: + linfo.errorf[s] = linfo.errorf.get('INITIAL', None) + if s not in linfo.ignore: + linfo.ignore[s] = linfo.ignore.get('INITIAL', '') + + # Create global versions of the token() and input() functions + token = lexobj.token + input = lexobj.input + lexer = lexobj + + # If in optimize mode, we write the lextab + if lextab and optimize: + if outputdir is None: + # If no output directory is set, the location of the output files + # is determined according to the following rules: + # - If lextab specifies a package, files go into that package directory + # - Otherwise, files go in the same directory as the specifying module + if isinstance(lextab, types.ModuleType): + srcfile = lextab.__file__ + else: + if '.' not in lextab: + srcfile = ldict['__file__'] + else: + parts = lextab.split('.') + pkgname = '.'.join(parts[:-1]) + exec('import %s' % pkgname) + srcfile = getattr(sys.modules[pkgname], '__file__', '') + outputdir = os.path.dirname(srcfile) + try: + lexobj.writetab(lextab, outputdir) + if lextab in sys.modules: + del sys.modules[lextab] + except IOError as e: + errorlog.warning("Couldn't write lextab module %r. %s" % (lextab, e)) + + return lexobj + +# ----------------------------------------------------------------------------- +# runmain() +# +# This runs the lexer as a main program +# ----------------------------------------------------------------------------- + +def runmain(lexer=None, data=None): + if not data: + try: + filename = sys.argv[1] + f = open(filename) + data = f.read() + f.close() + except IndexError: + sys.stdout.write('Reading from standard input (type EOF to end):\n') + data = sys.stdin.read() + + if lexer: + _input = lexer.input + else: + _input = input + _input(data) + if lexer: + _token = lexer.token + else: + _token = token + + while True: + tok = _token() + if not tok: + break + sys.stdout.write('(%s,%r,%d,%d)\n' % (tok.type, tok.value, tok.lineno, tok.lexpos)) + +# ----------------------------------------------------------------------------- +# @TOKEN(regex) +# +# This decorator function can be used to set the regex expression on a function +# when its docstring might need to be set in an alternative way +# ----------------------------------------------------------------------------- + +def TOKEN(r): + def set_regex(f): + if hasattr(r, '__call__'): + f.regex = _get_regex(r) + else: + f.regex = r + return f + return set_regex + +# Alternative spelling of the TOKEN decorator +Token = TOKEN diff --git a/license.patch b/license.patch new file mode 100644 index 000000000000..b6cdff196c4b --- /dev/null +++ b/license.patch @@ -0,0 +1,42 @@ +diff --git a/third_party/ply/__init__.py b/third_party/ply/__init__.py +index 853a985..f3da03e 100644 +--- a/third_party/ply/__init__.py ++++ b/third_party/ply/__init__.py +@@ -1,4 +1,36 @@ + # PLY package + # Author: David Beazley (dave@dabeaz.com) ++# ----------------------------------------------------------------------------- ++# ply: __init__.py ++# ++# Copyright (C) 2001-2018 ++# David M. Beazley (Dabeaz LLC) ++# All rights reserved. ++# ++# Redistribution and use in source and binary forms, with or without ++# modification, are permitted provided that the following conditions are ++# met: ++# ++# * Redistributions of source code must retain the above copyright notice, ++# this list of conditions and the following disclaimer. ++# * Redistributions in binary form must reproduce the above copyright notice, ++# this list of conditions and the following disclaimer in the documentation ++# and/or other materials provided with the distribution. ++# * Neither the name of the David Beazley or Dabeaz LLC may be used to ++# endorse or promote products derived from this software without ++# specific prior written permission. ++# ++# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++# ----------------------------------------------------------------------------- + + __version__ = '3.11' + __all__ = ['lex','yacc'] diff --git a/ply.gni b/ply.gni new file mode 100644 index 000000000000..6a2db436f125 --- /dev/null +++ b/ply.gni @@ -0,0 +1,5 @@ +ply_sources = [ + "//third_party/ply/__init__.py", + "//third_party/ply/lex.py", + "//third_party/ply/yacc.py", +] diff --git a/yacc.py b/yacc.py new file mode 100644 index 000000000000..88188a1e8ead --- /dev/null +++ b/yacc.py @@ -0,0 +1,3502 @@ +# ----------------------------------------------------------------------------- +# ply: yacc.py +# +# Copyright (C) 2001-2018 +# David M. Beazley (Dabeaz LLC) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of the David Beazley or Dabeaz LLC may be used to +# endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# +# This implements an LR parser that is constructed from grammar rules defined +# as Python functions. The grammar is specified by supplying the BNF inside +# Python documentation strings. The inspiration for this technique was borrowed +# from John Aycock's Spark parsing system. PLY might be viewed as cross between +# Spark and the GNU bison utility. +# +# The current implementation is only somewhat object-oriented. The +# LR parser itself is defined in terms of an object (which allows multiple +# parsers to co-exist). However, most of the variables used during table +# construction are defined in terms of global variables. Users shouldn't +# notice unless they are trying to define multiple parsers at the same +# time using threads (in which case they should have their head examined). +# +# This implementation supports both SLR and LALR(1) parsing. LALR(1) +# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), +# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, +# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced +# by the more efficient DeRemer and Pennello algorithm. +# +# :::::::: WARNING ::::::: +# +# Construction of LR parsing tables is fairly complicated and expensive. +# To make this module run fast, a *LOT* of work has been put into +# optimization---often at the expensive of readability and what might +# consider to be good Python "coding style." Modify the code at your +# own risk! +# ---------------------------------------------------------------------------- + +import re +import types +import sys +import os.path +import inspect +import warnings + +__version__ = '3.11' +__tabversion__ = '3.10' + +#----------------------------------------------------------------------------- +# === User configurable parameters === +# +# Change these to modify the default behavior of yacc (if you wish) +#----------------------------------------------------------------------------- + +yaccdebug = True # Debugging mode. If set, yacc generates a + # a 'parser.out' file in the current directory + +debug_file = 'parser.out' # Default name of the debugging file +tab_module = 'parsetab' # Default name of the table module +default_lr = 'LALR' # Default LR table generation method + +error_count = 3 # Number of symbols that must be shifted to leave recovery mode + +yaccdevel = False # Set to True if developing yacc. This turns off optimized + # implementations of certain functions. + +resultlimit = 40 # Size limit of results when running in debug mode. + +pickle_protocol = 0 # Protocol to use when writing pickle files + +# String type-checking compatibility +if sys.version_info[0] < 3: + string_types = basestring +else: + string_types = str + +MAXINT = sys.maxsize + +# This object is a stand-in for a logging object created by the +# logging module. PLY will use this by default to create things +# such as the parser.out file. If a user wants more detailed +# information, they can create their own logging object and pass +# it into PLY. + +class PlyLogger(object): + def __init__(self, f): + self.f = f + + def debug(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') + + info = debug + + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') + + critical = debug + +# Null logger is used when no output is generated. Does nothing. +class NullLogger(object): + def __getattribute__(self, name): + return self + + def __call__(self, *args, **kwargs): + return self + +# Exception raised for yacc-related errors +class YaccError(Exception): + pass + +# Format the result message that the parser produces when running in debug mode. +def format_result(r): + repr_str = repr(r) + if '\n' in repr_str: + repr_str = repr(repr_str) + if len(repr_str) > resultlimit: + repr_str = repr_str[:resultlimit] + ' ...' + result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) + return result + +# Format stack entries when the parser is running in debug mode +def format_stack_entry(r): + repr_str = repr(r) + if '\n' in repr_str: + repr_str = repr(repr_str) + if len(repr_str) < 16: + return repr_str + else: + return '<%s @ 0x%x>' % (type(r).__name__, id(r)) + +# Panic mode error recovery support. This feature is being reworked--much of the +# code here is to offer a deprecation/backwards compatible transition + +_errok = None +_token = None +_restart = None +_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error(). +Instead, invoke the methods on the associated parser instance: + + def p_error(p): + ... + # Use parser.errok(), parser.token(), parser.restart() + ... + + parser = yacc.yacc() +''' + +def errok(): + warnings.warn(_warnmsg) + return _errok() + +def restart(): + warnings.warn(_warnmsg) + return _restart() + +def token(): + warnings.warn(_warnmsg) + return _token() + +# Utility function to call the p_error() function with some deprecation hacks +def call_errorfunc(errorfunc, token, parser): + global _errok, _token, _restart + _errok = parser.errok + _token = parser.token + _restart = parser.restart + r = errorfunc(token) + try: + del _errok, _token, _restart + except NameError: + pass + return r + +#----------------------------------------------------------------------------- +# === LR Parsing Engine === +# +# The following classes are used for the LR parser itself. These are not +# used during table construction and are independent of the actual LR +# table generation algorithm +#----------------------------------------------------------------------------- + +# This class is used to hold non-terminal grammar symbols during parsing. +# It normally has the following attributes set: +# .type = Grammar symbol type +# .value = Symbol value +# .lineno = Starting line number +# .endlineno = Ending line number (optional, set automatically) +# .lexpos = Starting lex position +# .endlexpos = Ending lex position (optional, set automatically) + +class YaccSymbol: + def __str__(self): + return self.type + + def __repr__(self): + return str(self) + +# This class is a wrapper around the objects actually passed to each +# grammar rule. Index lookup and assignment actually assign the +# .value attribute of the underlying YaccSymbol object. +# The lineno() method returns the line number of a given +# item (or 0 if not defined). The linespan() method returns +# a tuple of (startline,endline) representing the range of lines +# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) +# representing the range of positional information for a symbol. + +class YaccProduction: + def __init__(self, s, stack=None): + self.slice = s + self.stack = stack + self.lexer = None + self.parser = None + + def __getitem__(self, n): + if isinstance(n, slice): + return [s.value for s in self.slice[n]] + elif n >= 0: + return self.slice[n].value + else: + return self.stack[n].value + + def __setitem__(self, n, v): + self.slice[n].value = v + + def __getslice__(self, i, j): + return [s.value for s in self.slice[i:j]] + + def __len__(self): + return len(self.slice) + + def lineno(self, n): + return getattr(self.slice[n], 'lineno', 0) + + def set_lineno(self, n, lineno): + self.slice[n].lineno = lineno + + def linespan(self, n): + startline = getattr(self.slice[n], 'lineno', 0) + endline = getattr(self.slice[n], 'endlineno', startline) + return startline, endline + + def lexpos(self, n): + return getattr(self.slice[n], 'lexpos', 0) + + def set_lexpos(self, n, lexpos): + self.slice[n].lexpos = lexpos + + def lexspan(self, n): + startpos = getattr(self.slice[n], 'lexpos', 0) + endpos = getattr(self.slice[n], 'endlexpos', startpos) + return startpos, endpos + + def error(self): + raise SyntaxError + +# ----------------------------------------------------------------------------- +# == LRParser == +# +# The LR Parsing engine. +# ----------------------------------------------------------------------------- + +class LRParser: + def __init__(self, lrtab, errorf): + self.productions = lrtab.lr_productions + self.action = lrtab.lr_action + self.goto = lrtab.lr_goto + self.errorfunc = errorf + self.set_defaulted_states() + self.errorok = True + + def errok(self): + self.errorok = True + + def restart(self): + del self.statestack[:] + del self.symstack[:] + sym = YaccSymbol() + sym.type = '$end' + self.symstack.append(sym) + self.statestack.append(0) + + # Defaulted state support. + # This method identifies parser states where there is only one possible reduction action. + # For such states, the parser can make a choose to make a rule reduction without consuming + # the next look-ahead token. This delayed invocation of the tokenizer can be useful in + # certain kinds of advanced parsing situations where the lexer and parser interact with + # each other or change states (i.e., manipulation of scope, lexer states, etc.). + # + # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions + def set_defaulted_states(self): + self.defaulted_states = {} + for state, actions in self.action.items(): + rules = list(actions.values()) + if len(rules) == 1 and rules[0] < 0: + self.defaulted_states[state] = rules[0] + + def disable_defaulted_states(self): + self.defaulted_states = {} + + def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + if debug or yaccdevel: + if isinstance(debug, int): + debug = PlyLogger(sys.stderr) + return self.parsedebug(input, lexer, debug, tracking, tokenfunc) + elif tracking: + return self.parseopt(input, lexer, debug, tracking, tokenfunc) + else: + return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) + + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # parsedebug(). + # + # This is the debugging enabled version of parse(). All changes made to the + # parsing engine should be made here. Optimized versions of this function + # are automatically created by the ply/ygen.py script. This script cuts out + # sections enclosed in markers such as this: + # + # #--! DEBUG + # statements + # #--! DEBUG + # + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parsedebug-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + + #--! DEBUG + debug.info('PLY: PARSE DEBUG START') + #--! DEBUG + + # If no lexer was given, we will try to use the lex module + if not lexer: + from . import lex + lexer = lex.lexer + + # Set up the lexer and parser objects on pslice + pslice.lexer = lexer + pslice.parser = self + + # If input was supplied, pass to lexer + if input is not None: + lexer.input(input) + + if tokenfunc is None: + # Tokenize function + get_token = lexer.token + else: + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token + + # Set up the state and symbol stacks + + statestack = [] # Stack of parsing states + self.statestack = statestack + symstack = [] # Stack of grammar symbols + self.symstack = symstack + + pslice.stack = symstack # Put in the production + errtoken = None # Err token + + # The start state is assumed to be (0,$end) + + statestack.append(0) + sym = YaccSymbol() + sym.type = '$end' + symstack.append(sym) + state = 0 + while True: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + + #--! DEBUG + debug.debug('') + debug.debug('State : %s', state) + #--! DEBUG + + if state not in defaulted_states: + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + #--! DEBUG + debug.debug('Defaulted state %s: Reduce using %d', state, -t) + #--! DEBUG + + #--! DEBUG + debug.debug('Stack : %s', + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + #--! DEBUG + + if t is not None: + if t > 0: + # shift a symbol on the stack + statestack.append(t) + state = t + + #--! DEBUG + debug.debug('Action : Shift and goto state %s', t) + #--! DEBUG + + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if errorcount: + errorcount -= 1 + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + #--! DEBUG + if plen: + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, + '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', + goto[statestack[-1-plen]][pname]) + else: + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], + goto[statestack[-1]][pname]) + + #--! DEBUG + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + + #--! TRACKING + if tracking: + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + #--! TRACKING + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # below as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + del symstack[-plen:] + self.state = state + p.callable(pslice) + del statestack[-plen:] + #--! DEBUG + debug.info('Result : %s', format_result(pslice[0])) + #--! DEBUG + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = error_count + self.errorok = False + + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + else: + + #--! TRACKING + if tracking: + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + #--! TRACKING + + targ = [sym] + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # above as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + self.state = state + p.callable(pslice) + #--! DEBUG + debug.info('Result : %s', format_result(pslice[0])) + #--! DEBUG + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = error_count + self.errorok = False + + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + if t == 0: + n = symstack[-1] + result = getattr(n, 'value', None) + #--! DEBUG + debug.info('Done : Returning %s', format_result(result)) + debug.info('PLY: PARSE DEBUG END') + #--! DEBUG + return result + + if t is None: + + #--! DEBUG + debug.error('Error : %s', + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + #--! DEBUG + + # We have some kind of parsing error here. To handle + # this, we are going to push the current token onto + # the tokenstack and replace it with an 'error' token. + # If there are any synchronization rules, they may + # catch it. + # + # In addition to pushing the error token, we call call + # the user defined p_error() function if this is the + # first syntax error. This function is only called if + # errorcount == 0. + if errorcount == 0 or self.errorok: + errorcount = error_count + self.errorok = False + errtoken = lookahead + if errtoken.type == '$end': + errtoken = None # End of file! + if self.errorfunc: + if errtoken and not hasattr(errtoken, 'lexer'): + errtoken.lexer = lexer + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) + if self.errorok: + # User must have done some kind of panic + # mode recovery on their own. The + # returned token is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 + if lineno: + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) + else: + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) + else: + sys.stderr.write('yacc: Parse error in input. EOF\n') + return + + else: + errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != '$end': + lookahead = None + errtoken = None + state = 0 + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == '$end': + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + #--! TRACKING + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) + #--! TRACKING + lookahead = None + continue + + # Create the error symbol for the first time and make it the new lookahead symbol + t = YaccSymbol() + t.type = 'error' + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + sym = symstack.pop() + #--! TRACKING + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos + #--! TRACKING + statestack.pop() + state = statestack[-1] + + continue + + # Call an error function here + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parsedebug-end + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # parseopt(). + # + # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY! + # This code is automatically generated by the ply/ygen.py script. Make + # changes to the parsedebug() method instead. + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parseopt-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + + + # If no lexer was given, we will try to use the lex module + if not lexer: + from . import lex + lexer = lex.lexer + + # Set up the lexer and parser objects on pslice + pslice.lexer = lexer + pslice.parser = self + + # If input was supplied, pass to lexer + if input is not None: + lexer.input(input) + + if tokenfunc is None: + # Tokenize function + get_token = lexer.token + else: + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token + + # Set up the state and symbol stacks + + statestack = [] # Stack of parsing states + self.statestack = statestack + symstack = [] # Stack of grammar symbols + self.symstack = symstack + + pslice.stack = symstack # Put in the production + errtoken = None # Err token + + # The start state is assumed to be (0,$end) + + statestack.append(0) + sym = YaccSymbol() + sym.type = '$end' + symstack.append(sym) + state = 0 + while True: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + + + if state not in defaulted_states: + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + + + if t is not None: + if t > 0: + # shift a symbol on the stack + statestack.append(t) + state = t + + + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if errorcount: + errorcount -= 1 + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + + #--! TRACKING + if tracking: + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + #--! TRACKING + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # below as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + del symstack[-plen:] + self.state = state + p.callable(pslice) + del statestack[-plen:] + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = error_count + self.errorok = False + + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + else: + + #--! TRACKING + if tracking: + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + #--! TRACKING + + targ = [sym] + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # above as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + self.state = state + p.callable(pslice) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = error_count + self.errorok = False + + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + if t == 0: + n = symstack[-1] + result = getattr(n, 'value', None) + return result + + if t is None: + + + # We have some kind of parsing error here. To handle + # this, we are going to push the current token onto + # the tokenstack and replace it with an 'error' token. + # If there are any synchronization rules, they may + # catch it. + # + # In addition to pushing the error token, we call call + # the user defined p_error() function if this is the + # first syntax error. This function is only called if + # errorcount == 0. + if errorcount == 0 or self.errorok: + errorcount = error_count + self.errorok = False + errtoken = lookahead + if errtoken.type == '$end': + errtoken = None # End of file! + if self.errorfunc: + if errtoken and not hasattr(errtoken, 'lexer'): + errtoken.lexer = lexer + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) + if self.errorok: + # User must have done some kind of panic + # mode recovery on their own. The + # returned token is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 + if lineno: + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) + else: + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) + else: + sys.stderr.write('yacc: Parse error in input. EOF\n') + return + + else: + errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != '$end': + lookahead = None + errtoken = None + state = 0 + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == '$end': + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + #--! TRACKING + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) + #--! TRACKING + lookahead = None + continue + + # Create the error symbol for the first time and make it the new lookahead symbol + t = YaccSymbol() + t.type = 'error' + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + sym = symstack.pop() + #--! TRACKING + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos + #--! TRACKING + statestack.pop() + state = statestack[-1] + + continue + + # Call an error function here + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parseopt-end + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # parseopt_notrack(). + # + # Optimized version of parseopt() with line number tracking removed. + # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated + # by the ply/ygen.py script. Make changes to the parsedebug() method instead. + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parseopt-notrack-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + + + # If no lexer was given, we will try to use the lex module + if not lexer: + from . import lex + lexer = lex.lexer + + # Set up the lexer and parser objects on pslice + pslice.lexer = lexer + pslice.parser = self + + # If input was supplied, pass to lexer + if input is not None: + lexer.input(input) + + if tokenfunc is None: + # Tokenize function + get_token = lexer.token + else: + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token + + # Set up the state and symbol stacks + + statestack = [] # Stack of parsing states + self.statestack = statestack + symstack = [] # Stack of grammar symbols + self.symstack = symstack + + pslice.stack = symstack # Put in the production + errtoken = None # Err token + + # The start state is assumed to be (0,$end) + + statestack.append(0) + sym = YaccSymbol() + sym.type = '$end' + symstack.append(sym) + state = 0 + while True: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + + + if state not in defaulted_states: + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + + + if t is not None: + if t > 0: + # shift a symbol on the stack + statestack.append(t) + state = t + + + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if errorcount: + errorcount -= 1 + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # below as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + del symstack[-plen:] + self.state = state + p.callable(pslice) + del statestack[-plen:] + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = error_count + self.errorok = False + + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + else: + + + targ = [sym] + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # above as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + self.state = state + p.callable(pslice) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = error_count + self.errorok = False + + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + if t == 0: + n = symstack[-1] + result = getattr(n, 'value', None) + return result + + if t is None: + + + # We have some kind of parsing error here. To handle + # this, we are going to push the current token onto + # the tokenstack and replace it with an 'error' token. + # If there are any synchronization rules, they may + # catch it. + # + # In addition to pushing the error token, we call call + # the user defined p_error() function if this is the + # first syntax error. This function is only called if + # errorcount == 0. + if errorcount == 0 or self.errorok: + errorcount = error_count + self.errorok = False + errtoken = lookahead + if errtoken.type == '$end': + errtoken = None # End of file! + if self.errorfunc: + if errtoken and not hasattr(errtoken, 'lexer'): + errtoken.lexer = lexer + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) + if self.errorok: + # User must have done some kind of panic + # mode recovery on their own. The + # returned token is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 + if lineno: + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) + else: + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) + else: + sys.stderr.write('yacc: Parse error in input. EOF\n') + return + + else: + errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != '$end': + lookahead = None + errtoken = None + state = 0 + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == '$end': + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + lookahead = None + continue + + # Create the error symbol for the first time and make it the new lookahead symbol + t = YaccSymbol() + t.type = 'error' + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + sym = symstack.pop() + statestack.pop() + state = statestack[-1] + + continue + + # Call an error function here + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parseopt-notrack-end + +# ----------------------------------------------------------------------------- +# === Grammar Representation === +# +# The following functions, classes, and variables are used to represent and +# manipulate the rules that make up a grammar. +# ----------------------------------------------------------------------------- + +# regex matching identifiers +_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') + +# ----------------------------------------------------------------------------- +# class Production: +# +# This class stores the raw information about a single production or grammar rule. +# A grammar rule refers to a specification such as this: +# +# expr : expr PLUS term +# +# Here are the basic attributes defined on all productions +# +# name - Name of the production. For example 'expr' +# prod - A list of symbols on the right side ['expr','PLUS','term'] +# prec - Production precedence level +# number - Production number. +# func - Function that executes on reduce +# file - File where production function is defined +# lineno - Line number where production function is defined +# +# The following attributes are defined or optional. +# +# len - Length of the production (number of symbols on right hand side) +# usyms - Set of unique symbols found in the production +# ----------------------------------------------------------------------------- + +class Production(object): + reduced = 0 + def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): + self.name = name + self.prod = tuple(prod) + self.number = number + self.func = func + self.callable = None + self.file = file + self.line = line + self.prec = precedence + + # Internal settings used during table construction + + self.len = len(self.prod) # Length of the production + + # Create a list of unique production symbols used in the production + self.usyms = [] + for s in self.prod: + if s not in self.usyms: + self.usyms.append(s) + + # List of all LR items for the production + self.lr_items = [] + self.lr_next = None + + # Create a string representation + if self.prod: + self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) + else: + self.str = '%s -> ' % self.name + + def __str__(self): + return self.str + + def __repr__(self): + return 'Production(' + str(self) + ')' + + def __len__(self): + return len(self.prod) + + def __nonzero__(self): + return 1 + + def __getitem__(self, index): + return self.prod[index] + + # Return the nth lr_item from the production (or None if at the end) + def lr_item(self, n): + if n > len(self.prod): + return None + p = LRItem(self, n) + # Precompute the list of productions immediately following. + try: + p.lr_after = self.Prodnames[p.prod[n+1]] + except (IndexError, KeyError): + p.lr_after = [] + try: + p.lr_before = p.prod[n-1] + except IndexError: + p.lr_before = None + return p + + # Bind the production function name to a callable + def bind(self, pdict): + if self.func: + self.callable = pdict[self.func] + +# This class serves as a minimal standin for Production objects when +# reading table data from files. It only contains information +# actually used by the LR parsing engine, plus some additional +# debugging information. +class MiniProduction(object): + def __init__(self, str, name, len, func, file, line): + self.name = name + self.len = len + self.func = func + self.callable = None + self.file = file + self.line = line + self.str = str + + def __str__(self): + return self.str + + def __repr__(self): + return 'MiniProduction(%s)' % self.str + + # Bind the production function name to a callable + def bind(self, pdict): + if self.func: + self.callable = pdict[self.func] + + +# ----------------------------------------------------------------------------- +# class LRItem +# +# This class represents a specific stage of parsing a production rule. For +# example: +# +# expr : expr . PLUS term +# +# In the above, the "." represents the current location of the parse. Here +# basic attributes: +# +# name - Name of the production. For example 'expr' +# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] +# number - Production number. +# +# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' +# then lr_next refers to 'expr -> expr PLUS . term' +# lr_index - LR item index (location of the ".") in the prod list. +# lookaheads - LALR lookahead symbols for this item +# len - Length of the production (number of symbols on right hand side) +# lr_after - List of all productions that immediately follow +# lr_before - Grammar symbol immediately before +# ----------------------------------------------------------------------------- + +class LRItem(object): + def __init__(self, p, n): + self.name = p.name + self.prod = list(p.prod) + self.number = p.number + self.lr_index = n + self.lookaheads = {} + self.prod.insert(n, '.') + self.prod = tuple(self.prod) + self.len = len(self.prod) + self.usyms = p.usyms + + def __str__(self): + if self.prod: + s = '%s -> %s' % (self.name, ' '.join(self.prod)) + else: + s = '%s -> ' % self.name + return s + + def __repr__(self): + return 'LRItem(' + str(self) + ')' + +# ----------------------------------------------------------------------------- +# rightmost_terminal() +# +# Return the rightmost terminal from a list of symbols. Used in add_production() +# ----------------------------------------------------------------------------- +def rightmost_terminal(symbols, terminals): + i = len(symbols) - 1 + while i >= 0: + if symbols[i] in terminals: + return symbols[i] + i -= 1 + return None + +# ----------------------------------------------------------------------------- +# === GRAMMAR CLASS === +# +# The following class represents the contents of the specified grammar along +# with various computed properties such as first sets, follow sets, LR items, etc. +# This data is used for critical parts of the table generation process later. +# ----------------------------------------------------------------------------- + +class GrammarError(YaccError): + pass + +class Grammar(object): + def __init__(self, terminals): + self.Productions = [None] # A list of all of the productions. The first + # entry is always reserved for the purpose of + # building an augmented grammar + + self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all + # productions of that nonterminal. + + self.Prodmap = {} # A dictionary that is only used to detect duplicate + # productions. + + self.Terminals = {} # A dictionary mapping the names of terminal symbols to a + # list of the rules where they are used. + + for term in terminals: + self.Terminals[term] = [] + + self.Terminals['error'] = [] + + self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list + # of rule numbers where they are used. + + self.First = {} # A dictionary of precomputed FIRST(x) symbols + + self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols + + self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the + # form ('right',level) or ('nonassoc', level) or ('left',level) + + self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. + # This is only used to provide error checking and to generate + # a warning about unused precedence rules. + + self.Start = None # Starting symbol for the grammar + + + def __len__(self): + return len(self.Productions) + + def __getitem__(self, index): + return self.Productions[index] + + # ----------------------------------------------------------------------------- + # set_precedence() + # + # Sets the precedence for a given terminal. assoc is the associativity such as + # 'left','right', or 'nonassoc'. level is a numeric level. + # + # ----------------------------------------------------------------------------- + + def set_precedence(self, term, assoc, level): + assert self.Productions == [None], 'Must call set_precedence() before add_production()' + if term in self.Precedence: + raise GrammarError('Precedence already specified for terminal %r' % term) + if assoc not in ['left', 'right', 'nonassoc']: + raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") + self.Precedence[term] = (assoc, level) + + # ----------------------------------------------------------------------------- + # add_production() + # + # Given an action function, this function assembles a production rule and + # computes its precedence level. + # + # The production rule is supplied as a list of symbols. For example, + # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and + # symbols ['expr','PLUS','term']. + # + # Precedence is determined by the precedence of the right-most non-terminal + # or the precedence of a terminal specified by %prec. + # + # A variety of error checks are performed to make sure production symbols + # are valid and that %prec is used correctly. + # ----------------------------------------------------------------------------- + + def add_production(self, prodname, syms, func=None, file='', line=0): + + if prodname in self.Terminals: + raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) + if prodname == 'error': + raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) + if not _is_identifier.match(prodname): + raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) + + # Look for literal tokens + for n, s in enumerate(syms): + if s[0] in "'\"": + try: + c = eval(s) + if (len(c) > 1): + raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % + (file, line, s, prodname)) + if c not in self.Terminals: + self.Terminals[c] = [] + syms[n] = c + continue + except SyntaxError: + pass + if not _is_identifier.match(s) and s != '%prec': + raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) + + # Determine the precedence level + if '%prec' in syms: + if syms[-1] == '%prec': + raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) + if syms[-2] != '%prec': + raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % + (file, line)) + precname = syms[-1] + prodprec = self.Precedence.get(precname) + if not prodprec: + raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) + else: + self.UsedPrecedence.add(precname) + del syms[-2:] # Drop %prec from the rule + else: + # If no %prec, precedence is determined by the rightmost terminal symbol + precname = rightmost_terminal(syms, self.Terminals) + prodprec = self.Precedence.get(precname, ('right', 0)) + + # See if the rule is already in the rulemap + map = '%s -> %s' % (prodname, syms) + if map in self.Prodmap: + m = self.Prodmap[map] + raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + + 'Previous definition at %s:%d' % (m.file, m.line)) + + # From this point on, everything is valid. Create a new Production instance + pnumber = len(self.Productions) + if prodname not in self.Nonterminals: + self.Nonterminals[prodname] = [] + + # Add the production number to Terminals and Nonterminals + for t in syms: + if t in self.Terminals: + self.Terminals[t].append(pnumber) + else: + if t not in self.Nonterminals: + self.Nonterminals[t] = [] + self.Nonterminals[t].append(pnumber) + + # Create a production and add it to the list of productions + p = Production(pnumber, prodname, syms, prodprec, func, file, line) + self.Productions.append(p) + self.Prodmap[map] = p + + # Add to the global productions list + try: + self.Prodnames[prodname].append(p) + except KeyError: + self.Prodnames[prodname] = [p] + + # ----------------------------------------------------------------------------- + # set_start() + # + # Sets the starting symbol and creates the augmented grammar. Production + # rule 0 is S' -> start where start is the start symbol. + # ----------------------------------------------------------------------------- + + def set_start(self, start=None): + if not start: + start = self.Productions[1].name + if start not in self.Nonterminals: + raise GrammarError('start symbol %s undefined' % start) + self.Productions[0] = Production(0, "S'", [start]) + self.Nonterminals[start].append(0) + self.Start = start + + # ----------------------------------------------------------------------------- + # find_unreachable() + # + # Find all of the nonterminal symbols that can't be reached from the starting + # symbol. Returns a list of nonterminals that can't be reached. + # ----------------------------------------------------------------------------- + + def find_unreachable(self): + + # Mark all symbols that are reachable from a symbol s + def mark_reachable_from(s): + if s in reachable: + return + reachable.add(s) + for p in self.Prodnames.get(s, []): + for r in p.prod: + mark_reachable_from(r) + + reachable = set() + mark_reachable_from(self.Productions[0].prod[0]) + return [s for s in self.Nonterminals if s not in reachable] + + # ----------------------------------------------------------------------------- + # infinite_cycles() + # + # This function looks at the various parsing rules and tries to detect + # infinite recursion cycles (grammar rules where there is no possible way + # to derive a string of only terminals). + # ----------------------------------------------------------------------------- + + def infinite_cycles(self): + terminates = {} + + # Terminals: + for t in self.Terminals: + terminates[t] = True + + terminates['$end'] = True + + # Nonterminals: + + # Initialize to false: + for n in self.Nonterminals: + terminates[n] = False + + # Then propagate termination until no change: + while True: + some_change = False + for (n, pl) in self.Prodnames.items(): + # Nonterminal n terminates iff any of its productions terminates. + for p in pl: + # Production p terminates iff all of its rhs symbols terminate. + for s in p.prod: + if not terminates[s]: + # The symbol s does not terminate, + # so production p does not terminate. + p_terminates = False + break + else: + # didn't break from the loop, + # so every symbol s terminates + # so production p terminates. + p_terminates = True + + if p_terminates: + # symbol n terminates! + if not terminates[n]: + terminates[n] = True + some_change = True + # Don't need to consider any more productions for this n. + break + + if not some_change: + break + + infinite = [] + for (s, term) in terminates.items(): + if not term: + if s not in self.Prodnames and s not in self.Terminals and s != 'error': + # s is used-but-not-defined, and we've already warned of that, + # so it would be overkill to say that it's also non-terminating. + pass + else: + infinite.append(s) + + return infinite + + # ----------------------------------------------------------------------------- + # undefined_symbols() + # + # Find all symbols that were used the grammar, but not defined as tokens or + # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol + # and prod is the production where the symbol was used. + # ----------------------------------------------------------------------------- + def undefined_symbols(self): + result = [] + for p in self.Productions: + if not p: + continue + + for s in p.prod: + if s not in self.Prodnames and s not in self.Terminals and s != 'error': + result.append((s, p)) + return result + + # ----------------------------------------------------------------------------- + # unused_terminals() + # + # Find all terminals that were defined, but not used by the grammar. Returns + # a list of all symbols. + # ----------------------------------------------------------------------------- + def unused_terminals(self): + unused_tok = [] + for s, v in self.Terminals.items(): + if s != 'error' and not v: + unused_tok.append(s) + + return unused_tok + + # ------------------------------------------------------------------------------ + # unused_rules() + # + # Find all grammar rules that were defined, but not used (maybe not reachable) + # Returns a list of productions. + # ------------------------------------------------------------------------------ + + def unused_rules(self): + unused_prod = [] + for s, v in self.Nonterminals.items(): + if not v: + p = self.Prodnames[s][0] + unused_prod.append(p) + return unused_prod + + # ----------------------------------------------------------------------------- + # unused_precedence() + # + # Returns a list of tuples (term,precedence) corresponding to precedence + # rules that were never used by the grammar. term is the name of the terminal + # on which precedence was applied and precedence is a string such as 'left' or + # 'right' corresponding to the type of precedence. + # ----------------------------------------------------------------------------- + + def unused_precedence(self): + unused = [] + for termname in self.Precedence: + if not (termname in self.Terminals or termname in self.UsedPrecedence): + unused.append((termname, self.Precedence[termname][0])) + + return unused + + # ------------------------------------------------------------------------- + # _first() + # + # Compute the value of FIRST1(beta) where beta is a tuple of symbols. + # + # During execution of compute_first1, the result may be incomplete. + # Afterward (e.g., when called from compute_follow()), it will be complete. + # ------------------------------------------------------------------------- + def _first(self, beta): + + # We are computing First(x1,x2,x3,...,xn) + result = [] + for x in beta: + x_produces_empty = False + + # Add all the non- symbols of First[x] to the result. + for f in self.First[x]: + if f == '': + x_produces_empty = True + else: + if f not in result: + result.append(f) + + if x_produces_empty: + # We have to consider the next x in beta, + # i.e. stay in the loop. + pass + else: + # We don't have to consider any further symbols in beta. + break + else: + # There was no 'break' from the loop, + # so x_produces_empty was true for all x in beta, + # so beta produces empty as well. + result.append('') + + return result + + # ------------------------------------------------------------------------- + # compute_first() + # + # Compute the value of FIRST1(X) for all symbols + # ------------------------------------------------------------------------- + def compute_first(self): + if self.First: + return self.First + + # Terminals: + for t in self.Terminals: + self.First[t] = [t] + + self.First['$end'] = ['$end'] + + # Nonterminals: + + # Initialize to the empty set: + for n in self.Nonterminals: + self.First[n] = [] + + # Then propagate symbols until no change: + while True: + some_change = False + for n in self.Nonterminals: + for p in self.Prodnames[n]: + for f in self._first(p.prod): + if f not in self.First[n]: + self.First[n].append(f) + some_change = True + if not some_change: + break + + return self.First + + # --------------------------------------------------------------------- + # compute_follow() + # + # Computes all of the follow sets for every non-terminal symbol. The + # follow set is the set of all symbols that might follow a given + # non-terminal. See the Dragon book, 2nd Ed. p. 189. + # --------------------------------------------------------------------- + def compute_follow(self, start=None): + # If already computed, return the result + if self.Follow: + return self.Follow + + # If first sets not computed yet, do that first. + if not self.First: + self.compute_first() + + # Add '$end' to the follow list of the start symbol + for k in self.Nonterminals: + self.Follow[k] = [] + + if not start: + start = self.Productions[1].name + + self.Follow[start] = ['$end'] + + while True: + didadd = False + for p in self.Productions[1:]: + # Here is the production set + for i, B in enumerate(p.prod): + if B in self.Nonterminals: + # Okay. We got a non-terminal in a production + fst = self._first(p.prod[i+1:]) + hasempty = False + for f in fst: + if f != '' and f not in self.Follow[B]: + self.Follow[B].append(f) + didadd = True + if f == '': + hasempty = True + if hasempty or i == (len(p.prod)-1): + # Add elements of follow(a) to follow(b) + for f in self.Follow[p.name]: + if f not in self.Follow[B]: + self.Follow[B].append(f) + didadd = True + if not didadd: + break + return self.Follow + + + # ----------------------------------------------------------------------------- + # build_lritems() + # + # This function walks the list of productions and builds a complete set of the + # LR items. The LR items are stored in two ways: First, they are uniquely + # numbered and placed in the list _lritems. Second, a linked list of LR items + # is built for each production. For example: + # + # E -> E PLUS E + # + # Creates the list + # + # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] + # ----------------------------------------------------------------------------- + + def build_lritems(self): + for p in self.Productions: + lastlri = p + i = 0 + lr_items = [] + while True: + if i > len(p): + lri = None + else: + lri = LRItem(p, i) + # Precompute the list of productions immediately following + try: + lri.lr_after = self.Prodnames[lri.prod[i+1]] + except (IndexError, KeyError): + lri.lr_after = [] + try: + lri.lr_before = lri.prod[i-1] + except IndexError: + lri.lr_before = None + + lastlri.lr_next = lri + if not lri: + break + lr_items.append(lri) + lastlri = lri + i += 1 + p.lr_items = lr_items + +# ----------------------------------------------------------------------------- +# == Class LRTable == +# +# This basic class represents a basic table of LR parsing information. +# Methods for generating the tables are not defined here. They are defined +# in the derived class LRGeneratedTable. +# ----------------------------------------------------------------------------- + +class VersionError(YaccError): + pass + +class LRTable(object): + def __init__(self): + self.lr_action = None + self.lr_goto = None + self.lr_productions = None + self.lr_method = None + + def read_table(self, module): + if isinstance(module, types.ModuleType): + parsetab = module + else: + exec('import %s' % module) + parsetab = sys.modules[module] + + if parsetab._tabversion != __tabversion__: + raise VersionError('yacc table file version is out of date') + + self.lr_action = parsetab._lr_action + self.lr_goto = parsetab._lr_goto + + self.lr_productions = [] + for p in parsetab._lr_productions: + self.lr_productions.append(MiniProduction(*p)) + + self.lr_method = parsetab._lr_method + return parsetab._lr_signature + + def read_pickle(self, filename): + try: + import cPickle as pickle + except ImportError: + import pickle + + if not os.path.exists(filename): + raise ImportError + + in_f = open(filename, 'rb') + + tabversion = pickle.load(in_f) + if tabversion != __tabversion__: + raise VersionError('yacc table file version is out of date') + self.lr_method = pickle.load(in_f) + signature = pickle.load(in_f) + self.lr_action = pickle.load(in_f) + self.lr_goto = pickle.load(in_f) + productions = pickle.load(in_f) + + self.lr_productions = [] + for p in productions: + self.lr_productions.append(MiniProduction(*p)) + + in_f.close() + return signature + + # Bind all production function names to callable objects in pdict + def bind_callables(self, pdict): + for p in self.lr_productions: + p.bind(pdict) + + +# ----------------------------------------------------------------------------- +# === LR Generator === +# +# The following classes and functions are used to generate LR parsing tables on +# a grammar. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# digraph() +# traverse() +# +# The following two functions are used to compute set valued functions +# of the form: +# +# F(x) = F'(x) U U{F(y) | x R y} +# +# This is used to compute the values of Read() sets as well as FOLLOW sets +# in LALR(1) generation. +# +# Inputs: X - An input set +# R - A relation +# FP - Set-valued function +# ------------------------------------------------------------------------------ + +def digraph(X, R, FP): + N = {} + for x in X: + N[x] = 0 + stack = [] + F = {} + for x in X: + if N[x] == 0: + traverse(x, N, stack, F, X, R, FP) + return F + +def traverse(x, N, stack, F, X, R, FP): + stack.append(x) + d = len(stack) + N[x] = d + F[x] = FP(x) # F(X) <- F'(x) + + rel = R(x) # Get y's related to x + for y in rel: + if N[y] == 0: + traverse(y, N, stack, F, X, R, FP) + N[x] = min(N[x], N[y]) + for a in F.get(y, []): + if a not in F[x]: + F[x].append(a) + if N[x] == d: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + while element != x: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + +class LALRError(YaccError): + pass + +# ----------------------------------------------------------------------------- +# == LRGeneratedTable == +# +# This class implements the LR table generation algorithm. There are no +# public methods except for write() +# ----------------------------------------------------------------------------- + +class LRGeneratedTable(LRTable): + def __init__(self, grammar, method='LALR', log=None): + if method not in ['SLR', 'LALR']: + raise LALRError('Unsupported method %s' % method) + + self.grammar = grammar + self.lr_method = method + + # Set up the logger + if not log: + log = NullLogger() + self.log = log + + # Internal attributes + self.lr_action = {} # Action table + self.lr_goto = {} # Goto table + self.lr_productions = grammar.Productions # Copy of grammar Production array + self.lr_goto_cache = {} # Cache of computed gotos + self.lr0_cidhash = {} # Cache of closures + + self._add_count = 0 # Internal counter used to detect cycles + + # Diagonistic information filled in by the table generator + self.sr_conflict = 0 + self.rr_conflict = 0 + self.conflicts = [] # List of conflicts + + self.sr_conflicts = [] + self.rr_conflicts = [] + + # Build the tables + self.grammar.build_lritems() + self.grammar.compute_first() + self.grammar.compute_follow() + self.lr_parse_table() + + # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. + + def lr0_closure(self, I): + self._add_count += 1 + + # Add everything in I to J + J = I[:] + didadd = True + while didadd: + didadd = False + for j in J: + for x in j.lr_after: + if getattr(x, 'lr0_added', 0) == self._add_count: + continue + # Add B --> .G to J + J.append(x.lr_next) + x.lr0_added = self._add_count + didadd = True + + return J + + # Compute the LR(0) goto function goto(I,X) where I is a set + # of LR(0) items and X is a grammar symbol. This function is written + # in a way that guarantees uniqueness of the generated goto sets + # (i.e. the same goto set will never be returned as two different Python + # objects). With uniqueness, we can later do fast set comparisons using + # id(obj) instead of element-wise comparison. + + def lr0_goto(self, I, x): + # First we look for a previously cached entry + g = self.lr_goto_cache.get((id(I), x)) + if g: + return g + + # Now we generate the goto set in a way that guarantees uniqueness + # of the result + + s = self.lr_goto_cache.get(x) + if not s: + s = {} + self.lr_goto_cache[x] = s + + gs = [] + for p in I: + n = p.lr_next + if n and n.lr_before == x: + s1 = s.get(id(n)) + if not s1: + s1 = {} + s[id(n)] = s1 + gs.append(n) + s = s1 + g = s.get('$end') + if not g: + if gs: + g = self.lr0_closure(gs) + s['$end'] = g + else: + s['$end'] = gs + self.lr_goto_cache[(id(I), x)] = g + return g + + # Compute the LR(0) sets of item function + def lr0_items(self): + C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] + i = 0 + for I in C: + self.lr0_cidhash[id(I)] = i + i += 1 + + # Loop over the items in C and each grammar symbols + i = 0 + while i < len(C): + I = C[i] + i += 1 + + # Collect all of the symbols that could possibly be in the goto(I,X) sets + asyms = {} + for ii in I: + for s in ii.usyms: + asyms[s] = None + + for x in asyms: + g = self.lr0_goto(I, x) + if not g or id(g) in self.lr0_cidhash: + continue + self.lr0_cidhash[id(g)] = len(C) + C.append(g) + + return C + + # ----------------------------------------------------------------------------- + # ==== LALR(1) Parsing ==== + # + # LALR(1) parsing is almost exactly the same as SLR except that instead of + # relying upon Follow() sets when performing reductions, a more selective + # lookahead set that incorporates the state of the LR(0) machine is utilized. + # Thus, we mainly just have to focus on calculating the lookahead sets. + # + # The method used here is due to DeRemer and Pennelo (1982). + # + # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) + # Lookahead Sets", ACM Transactions on Programming Languages and Systems, + # Vol. 4, No. 4, Oct. 1982, pp. 615-649 + # + # Further details can also be found in: + # + # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", + # McGraw-Hill Book Company, (1985). + # + # ----------------------------------------------------------------------------- + + # ----------------------------------------------------------------------------- + # compute_nullable_nonterminals() + # + # Creates a dictionary containing all of the non-terminals that might produce + # an empty production. + # ----------------------------------------------------------------------------- + + def compute_nullable_nonterminals(self): + nullable = set() + num_nullable = 0 + while True: + for p in self.grammar.Productions[1:]: + if p.len == 0: + nullable.add(p.name) + continue + for t in p.prod: + if t not in nullable: + break + else: + nullable.add(p.name) + if len(nullable) == num_nullable: + break + num_nullable = len(nullable) + return nullable + + # ----------------------------------------------------------------------------- + # find_nonterminal_trans(C) + # + # Given a set of LR(0) items, this functions finds all of the non-terminal + # transitions. These are transitions in which a dot appears immediately before + # a non-terminal. Returns a list of tuples of the form (state,N) where state + # is the state number and N is the nonterminal symbol. + # + # The input C is the set of LR(0) items. + # ----------------------------------------------------------------------------- + + def find_nonterminal_transitions(self, C): + trans = [] + for stateno, state in enumerate(C): + for p in state: + if p.lr_index < p.len - 1: + t = (stateno, p.prod[p.lr_index+1]) + if t[1] in self.grammar.Nonterminals: + if t not in trans: + trans.append(t) + return trans + + # ----------------------------------------------------------------------------- + # dr_relation() + # + # Computes the DR(p,A) relationships for non-terminal transitions. The input + # is a tuple (state,N) where state is a number and N is a nonterminal symbol. + # + # Returns a list of terminals. + # ----------------------------------------------------------------------------- + + def dr_relation(self, C, trans, nullable): + state, N = trans + terms = [] + + g = self.lr0_goto(C[state], N) + for p in g: + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index+1] + if a in self.grammar.Terminals: + if a not in terms: + terms.append(a) + + # This extra bit is to handle the start state + if state == 0 and N == self.grammar.Productions[0].prod[0]: + terms.append('$end') + + return terms + + # ----------------------------------------------------------------------------- + # reads_relation() + # + # Computes the READS() relation (p,A) READS (t,C). + # ----------------------------------------------------------------------------- + + def reads_relation(self, C, trans, empty): + # Look for empty transitions + rel = [] + state, N = trans + + g = self.lr0_goto(C[state], N) + j = self.lr0_cidhash.get(id(g), -1) + for p in g: + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index + 1] + if a in empty: + rel.append((j, a)) + + return rel + + # ----------------------------------------------------------------------------- + # compute_lookback_includes() + # + # Determines the lookback and includes relations + # + # LOOKBACK: + # + # This relation is determined by running the LR(0) state machine forward. + # For example, starting with a production "N : . A B C", we run it forward + # to obtain "N : A B C ." We then build a relationship between this final + # state and the starting state. These relationships are stored in a dictionary + # lookdict. + # + # INCLUDES: + # + # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). + # + # This relation is used to determine non-terminal transitions that occur + # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) + # if the following holds: + # + # B -> LAT, where T -> epsilon and p' -L-> p + # + # L is essentially a prefix (which may be empty), T is a suffix that must be + # able to derive an empty string. State p' must lead to state p with the string L. + # + # ----------------------------------------------------------------------------- + + def compute_lookback_includes(self, C, trans, nullable): + lookdict = {} # Dictionary of lookback relations + includedict = {} # Dictionary of include relations + + # Make a dictionary of non-terminal transitions + dtrans = {} + for t in trans: + dtrans[t] = 1 + + # Loop over all transitions and compute lookbacks and includes + for state, N in trans: + lookb = [] + includes = [] + for p in C[state]: + if p.name != N: + continue + + # Okay, we have a name match. We now follow the production all the way + # through the state machine until we get the . on the right hand side + + lr_index = p.lr_index + j = state + while lr_index < p.len - 1: + lr_index = lr_index + 1 + t = p.prod[lr_index] + + # Check to see if this symbol and state are a non-terminal transition + if (j, t) in dtrans: + # Yes. Okay, there is some chance that this is an includes relation + # the only way to know for certain is whether the rest of the + # production derives empty + + li = lr_index + 1 + while li < p.len: + if p.prod[li] in self.grammar.Terminals: + break # No forget it + if p.prod[li] not in nullable: + break + li = li + 1 + else: + # Appears to be a relation between (j,t) and (state,N) + includes.append((j, t)) + + g = self.lr0_goto(C[j], t) # Go to next set + j = self.lr0_cidhash.get(id(g), -1) # Go to next state + + # When we get here, j is the final state, now we have to locate the production + for r in C[j]: + if r.name != p.name: + continue + if r.len != p.len: + continue + i = 0 + # This look is comparing a production ". A B C" with "A B C ." + while i < r.lr_index: + if r.prod[i] != p.prod[i+1]: + break + i = i + 1 + else: + lookb.append((j, r)) + for i in includes: + if i not in includedict: + includedict[i] = [] + includedict[i].append((state, N)) + lookdict[(state, N)] = lookb + + return lookdict, includedict + + # ----------------------------------------------------------------------------- + # compute_read_sets() + # + # Given a set of LR(0) items, this function computes the read sets. + # + # Inputs: C = Set of LR(0) items + # ntrans = Set of nonterminal transitions + # nullable = Set of empty transitions + # + # Returns a set containing the read sets + # ----------------------------------------------------------------------------- + + def compute_read_sets(self, C, ntrans, nullable): + FP = lambda x: self.dr_relation(C, x, nullable) + R = lambda x: self.reads_relation(C, x, nullable) + F = digraph(ntrans, R, FP) + return F + + # ----------------------------------------------------------------------------- + # compute_follow_sets() + # + # Given a set of LR(0) items, a set of non-terminal transitions, a readset, + # and an include set, this function computes the follow sets + # + # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} + # + # Inputs: + # ntrans = Set of nonterminal transitions + # readsets = Readset (previously computed) + # inclsets = Include sets (previously computed) + # + # Returns a set containing the follow sets + # ----------------------------------------------------------------------------- + + def compute_follow_sets(self, ntrans, readsets, inclsets): + FP = lambda x: readsets[x] + R = lambda x: inclsets.get(x, []) + F = digraph(ntrans, R, FP) + return F + + # ----------------------------------------------------------------------------- + # add_lookaheads() + # + # Attaches the lookahead symbols to grammar rules. + # + # Inputs: lookbacks - Set of lookback relations + # followset - Computed follow set + # + # This function directly attaches the lookaheads to productions contained + # in the lookbacks set + # ----------------------------------------------------------------------------- + + def add_lookaheads(self, lookbacks, followset): + for trans, lb in lookbacks.items(): + # Loop over productions in lookback + for state, p in lb: + if state not in p.lookaheads: + p.lookaheads[state] = [] + f = followset.get(trans, []) + for a in f: + if a not in p.lookaheads[state]: + p.lookaheads[state].append(a) + + # ----------------------------------------------------------------------------- + # add_lalr_lookaheads() + # + # This function does all of the work of adding lookahead information for use + # with LALR parsing + # ----------------------------------------------------------------------------- + + def add_lalr_lookaheads(self, C): + # Determine all of the nullable nonterminals + nullable = self.compute_nullable_nonterminals() + + # Find all non-terminal transitions + trans = self.find_nonterminal_transitions(C) + + # Compute read sets + readsets = self.compute_read_sets(C, trans, nullable) + + # Compute lookback/includes relations + lookd, included = self.compute_lookback_includes(C, trans, nullable) + + # Compute LALR FOLLOW sets + followsets = self.compute_follow_sets(trans, readsets, included) + + # Add all of the lookaheads + self.add_lookaheads(lookd, followsets) + + # ----------------------------------------------------------------------------- + # lr_parse_table() + # + # This function constructs the parse tables for SLR or LALR + # ----------------------------------------------------------------------------- + def lr_parse_table(self): + Productions = self.grammar.Productions + Precedence = self.grammar.Precedence + goto = self.lr_goto # Goto array + action = self.lr_action # Action array + log = self.log # Logger for output + + actionp = {} # Action production array (temporary) + + log.info('Parsing method: %s', self.lr_method) + + # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items + # This determines the number of states + + C = self.lr0_items() + + if self.lr_method == 'LALR': + self.add_lalr_lookaheads(C) + + # Build the parser table, state by state + st = 0 + for I in C: + # Loop over each production in I + actlist = [] # List of actions + st_action = {} + st_actionp = {} + st_goto = {} + log.info('') + log.info('state %d', st) + log.info('') + for p in I: + log.info(' (%d) %s', p.number, p) + log.info('') + + for p in I: + if p.len == p.lr_index + 1: + if p.name == "S'": + # Start symbol. Accept! + st_action['$end'] = 0 + st_actionp['$end'] = p + else: + # We are at the end of a production. Reduce! + if self.lr_method == 'LALR': + laheads = p.lookaheads[st] + else: + laheads = self.grammar.Follow[p.name] + for a in laheads: + actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) + r = st_action.get(a) + if r is not None: + # Whoa. Have a shift/reduce or reduce/reduce conflict + if r > 0: + # Need to decide on shift or reduce here + # By default we favor shifting. Need to add + # some precedence rules here. + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from rule being reduced (p) + rprec, rlevel = Productions[p.number].prec + + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): + # We really need to reduce here. + st_action[a] = -p.number + st_actionp[a] = p + if not slevel and not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) + Productions[p.number].reduced += 1 + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None + else: + # Hmmm. Guess we'll keep the shift + if not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) + elif r < 0: + # Reduce/reduce conflict. In this case, we favor the rule + # that was defined first in the grammar file + oldp = Productions[-r] + pp = Productions[p.number] + if oldp.line > pp.line: + st_action[a] = -p.number + st_actionp[a] = p + chosenp, rejectp = pp, oldp + Productions[p.number].reduced += 1 + Productions[oldp.number].reduced -= 1 + else: + chosenp, rejectp = oldp, pp + self.rr_conflicts.append((st, chosenp, rejectp)) + log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', + a, st_actionp[a].number, st_actionp[a]) + else: + raise LALRError('Unknown conflict in state %d' % st) + else: + st_action[a] = -p.number + st_actionp[a] = p + Productions[p.number].reduced += 1 + else: + i = p.lr_index + a = p.prod[i+1] # Get symbol right after the "." + if a in self.grammar.Terminals: + g = self.lr0_goto(I, a) + j = self.lr0_cidhash.get(id(g), -1) + if j >= 0: + # We are in a shift state + actlist.append((a, p, 'shift and go to state %d' % j)) + r = st_action.get(a) + if r is not None: + # Whoa have a shift/reduce or shift/shift conflict + if r > 0: + if r != j: + raise LALRError('Shift/shift conflict in state %d' % st) + elif r < 0: + # Do a precedence check. + # - if precedence of reduce rule is higher, we reduce. + # - if precedence of reduce is same and left assoc, we reduce. + # - otherwise we shift + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from the rule that could have been reduced + rprec, rlevel = Productions[st_actionp[a].number].prec + + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): + # We decide to shift here... highest precedence to shift + Productions[st_actionp[a].number].reduced -= 1 + st_action[a] = j + st_actionp[a] = p + if not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None + else: + # Hmmm. Guess we'll keep the reduce + if not slevel and not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) + + else: + raise LALRError('Unknown conflict in state %d' % st) + else: + st_action[a] = j + st_actionp[a] = p + + # Print the actions associated with each terminal + _actprint = {} + for a, p, m in actlist: + if a in st_action: + if p is st_actionp[a]: + log.info(' %-15s %s', a, m) + _actprint[(a, m)] = 1 + log.info('') + # Print the actions that were not used. (debugging) + not_used = 0 + for a, p, m in actlist: + if a in st_action: + if p is not st_actionp[a]: + if not (a, m) in _actprint: + log.debug(' ! %-15s [ %s ]', a, m) + not_used = 1 + _actprint[(a, m)] = 1 + if not_used: + log.debug('') + + # Construct the goto table for this state + + nkeys = {} + for ii in I: + for s in ii.usyms: + if s in self.grammar.Nonterminals: + nkeys[s] = None + for n in nkeys: + g = self.lr0_goto(I, n) + j = self.lr0_cidhash.get(id(g), -1) + if j >= 0: + st_goto[n] = j + log.info(' %-30s shift and go to state %d', n, j) + + action[st] = st_action + actionp[st] = st_actionp + goto[st] = st_goto + st += 1 + + # ----------------------------------------------------------------------------- + # write() + # + # This function writes the LR parsing tables to a file + # ----------------------------------------------------------------------------- + + def write_table(self, tabmodule, outputdir='', signature=''): + if isinstance(tabmodule, types.ModuleType): + raise IOError("Won't overwrite existing tabmodule") + + basemodulename = tabmodule.split('.')[-1] + filename = os.path.join(outputdir, basemodulename) + '.py' + try: + f = open(filename, 'w') + + f.write(''' +# %s +# This file is automatically generated. Do not edit. +# pylint: disable=W,C,R +_tabversion = %r + +_lr_method = %r + +_lr_signature = %r + ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature)) + + # Change smaller to 0 to go back to original tables + smaller = 1 + + # Factor out names to try and make smaller + if smaller: + items = {} + + for s, nd in self.lr_action.items(): + for name, v in nd.items(): + i = items.get(name) + if not i: + i = ([], []) + items[name] = i + i[0].append(s) + i[1].append(v) + + f.write('\n_lr_action_items = {') + for k, v in items.items(): + f.write('%r:([' % k) + for i in v[0]: + f.write('%r,' % i) + f.write('],[') + for i in v[1]: + f.write('%r,' % i) + + f.write(']),') + f.write('}\n') + + f.write(''' +_lr_action = {} +for _k, _v in _lr_action_items.items(): + for _x,_y in zip(_v[0],_v[1]): + if not _x in _lr_action: _lr_action[_x] = {} + _lr_action[_x][_k] = _y +del _lr_action_items +''') + + else: + f.write('\n_lr_action = { ') + for k, v in self.lr_action.items(): + f.write('(%r,%r):%r,' % (k[0], k[1], v)) + f.write('}\n') + + if smaller: + # Factor out names to try and make smaller + items = {} + + for s, nd in self.lr_goto.items(): + for name, v in nd.items(): + i = items.get(name) + if not i: + i = ([], []) + items[name] = i + i[0].append(s) + i[1].append(v) + + f.write('\n_lr_goto_items = {') + for k, v in items.items(): + f.write('%r:([' % k) + for i in v[0]: + f.write('%r,' % i) + f.write('],[') + for i in v[1]: + f.write('%r,' % i) + + f.write(']),') + f.write('}\n') + + f.write(''' +_lr_goto = {} +for _k, _v in _lr_goto_items.items(): + for _x, _y in zip(_v[0], _v[1]): + if not _x in _lr_goto: _lr_goto[_x] = {} + _lr_goto[_x][_k] = _y +del _lr_goto_items +''') + else: + f.write('\n_lr_goto = { ') + for k, v in self.lr_goto.items(): + f.write('(%r,%r):%r,' % (k[0], k[1], v)) + f.write('}\n') + + # Write production table + f.write('_lr_productions = [\n') + for p in self.lr_productions: + if p.func: + f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len, + p.func, os.path.basename(p.file), p.line)) + else: + f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len)) + f.write(']\n') + f.close() + + except IOError as e: + raise + + + # ----------------------------------------------------------------------------- + # pickle_table() + # + # This function pickles the LR parsing tables to a supplied file object + # ----------------------------------------------------------------------------- + + def pickle_table(self, filename, signature=''): + try: + import cPickle as pickle + except ImportError: + import pickle + with open(filename, 'wb') as outf: + pickle.dump(__tabversion__, outf, pickle_protocol) + pickle.dump(self.lr_method, outf, pickle_protocol) + pickle.dump(signature, outf, pickle_protocol) + pickle.dump(self.lr_action, outf, pickle_protocol) + pickle.dump(self.lr_goto, outf, pickle_protocol) + + outp = [] + for p in self.lr_productions: + if p.func: + outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line)) + else: + outp.append((str(p), p.name, p.len, None, None, None)) + pickle.dump(outp, outf, pickle_protocol) + +# ----------------------------------------------------------------------------- +# === INTROSPECTION === +# +# The following functions and classes are used to implement the PLY +# introspection features followed by the yacc() function itself. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# get_caller_module_dict() +# +# This function returns a dictionary containing all of the symbols defined within +# a caller further down the call stack. This is used to get the environment +# associated with the yacc() call if none was provided. +# ----------------------------------------------------------------------------- + +def get_caller_module_dict(levels): + f = sys._getframe(levels) + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + return ldict + +# ----------------------------------------------------------------------------- +# parse_grammar() +# +# This takes a raw grammar rule string and parses it into production data +# ----------------------------------------------------------------------------- +def parse_grammar(doc, file, line): + grammar = [] + # Split the doc string into lines + pstrings = doc.splitlines() + lastp = None + dline = line + for ps in pstrings: + dline += 1 + p = ps.split() + if not p: + continue + try: + if p[0] == '|': + # This is a continuation of a previous rule + if not lastp: + raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) + prodname = lastp + syms = p[1:] + else: + prodname = p[0] + lastp = prodname + syms = p[2:] + assign = p[1] + if assign != ':' and assign != '::=': + raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) + + grammar.append((file, dline, prodname, syms)) + except SyntaxError: + raise + except Exception: + raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) + + return grammar + +# ----------------------------------------------------------------------------- +# ParserReflect() +# +# This class represents information extracted for building a parser including +# start symbol, error function, tokens, precedence list, action functions, +# etc. +# ----------------------------------------------------------------------------- +class ParserReflect(object): + def __init__(self, pdict, log=None): + self.pdict = pdict + self.start = None + self.error_func = None + self.tokens = None + self.modules = set() + self.grammar = [] + self.error = False + + if log is None: + self.log = PlyLogger(sys.stderr) + else: + self.log = log + + # Get all of the basic information + def get_all(self): + self.get_start() + self.get_error_func() + self.get_tokens() + self.get_precedence() + self.get_pfunctions() + + # Validate all of the information + def validate_all(self): + self.validate_start() + self.validate_error_func() + self.validate_tokens() + self.validate_precedence() + self.validate_pfunctions() + self.validate_modules() + return self.error + + # Compute a signature over the grammar + def signature(self): + parts = [] + try: + if self.start: + parts.append(self.start) + if self.prec: + parts.append(''.join([''.join(p) for p in self.prec])) + if self.tokens: + parts.append(' '.join(self.tokens)) + for f in self.pfuncs: + if f[3]: + parts.append(f[3]) + except (TypeError, ValueError): + pass + return ''.join(parts) + + # ----------------------------------------------------------------------------- + # validate_modules() + # + # This method checks to see if there are duplicated p_rulename() functions + # in the parser module file. Without this function, it is really easy for + # users to make mistakes by cutting and pasting code fragments (and it's a real + # bugger to try and figure out why the resulting parser doesn't work). Therefore, + # we just do a little regular expression pattern matching of def statements + # to try and detect duplicates. + # ----------------------------------------------------------------------------- + + def validate_modules(self): + # Match def p_funcname( + fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') + + for module in self.modules: + try: + lines, linen = inspect.getsourcelines(module) + except IOError: + continue + + counthash = {} + for linen, line in enumerate(lines): + linen += 1 + m = fre.match(line) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + filename = inspect.getsourcefile(module) + self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', + filename, linen, name, prev) + + # Get the start symbol + def get_start(self): + self.start = self.pdict.get('start') + + # Validate the start symbol + def validate_start(self): + if self.start is not None: + if not isinstance(self.start, string_types): + self.log.error("'start' must be a string") + + # Look for error handler + def get_error_func(self): + self.error_func = self.pdict.get('p_error') + + # Validate the error function + def validate_error_func(self): + if self.error_func: + if isinstance(self.error_func, types.FunctionType): + ismethod = 0 + elif isinstance(self.error_func, types.MethodType): + ismethod = 1 + else: + self.log.error("'p_error' defined, but is not a function or method") + self.error = True + return + + eline = self.error_func.__code__.co_firstlineno + efile = self.error_func.__code__.co_filename + module = inspect.getmodule(self.error_func) + self.modules.add(module) + + argcount = self.error_func.__code__.co_argcount - ismethod + if argcount != 1: + self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) + self.error = True + + # Get the tokens map + def get_tokens(self): + tokens = self.pdict.get('tokens') + if not tokens: + self.log.error('No token list is defined') + self.error = True + return + + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True + return + + if not tokens: + self.log.error('tokens is empty') + self.error = True + return + + self.tokens = sorted(tokens) + + # Validate the tokens + def validate_tokens(self): + # Validate the tokens. + if 'error' in self.tokens: + self.log.error("Illegal token name 'error'. Is a reserved word") + self.error = True + return + + terminals = set() + for n in self.tokens: + if n in terminals: + self.log.warning('Token %r multiply defined', n) + terminals.add(n) + + # Get the precedence map (if any) + def get_precedence(self): + self.prec = self.pdict.get('precedence') + + # Validate and parse the precedence map + def validate_precedence(self): + preclist = [] + if self.prec: + if not isinstance(self.prec, (list, tuple)): + self.log.error('precedence must be a list or tuple') + self.error = True + return + for level, p in enumerate(self.prec): + if not isinstance(p, (list, tuple)): + self.log.error('Bad precedence table') + self.error = True + return + + if len(p) < 2: + self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) + self.error = True + return + assoc = p[0] + if not isinstance(assoc, string_types): + self.log.error('precedence associativity must be a string') + self.error = True + return + for term in p[1:]: + if not isinstance(term, string_types): + self.log.error('precedence items must be strings') + self.error = True + return + preclist.append((term, assoc, level+1)) + self.preclist = preclist + + # Get all p_functions from the grammar + def get_pfunctions(self): + p_functions = [] + for name, item in self.pdict.items(): + if not name.startswith('p_') or name == 'p_error': + continue + if isinstance(item, (types.FunctionType, types.MethodType)): + line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) + module = inspect.getmodule(item) + p_functions.append((line, module, name, item.__doc__)) + + # Sort all of the actions by line number; make sure to stringify + # modules to make them sortable, since `line` may not uniquely sort all + # p functions + p_functions.sort(key=lambda p_function: ( + p_function[0], + str(p_function[1]), + p_function[2], + p_function[3])) + self.pfuncs = p_functions + + # Validate all of the p_functions + def validate_pfunctions(self): + grammar = [] + # Check for non-empty symbols + if len(self.pfuncs) == 0: + self.log.error('no rules of the form p_rulename are defined') + self.error = True + return + + for line, module, name, doc in self.pfuncs: + file = inspect.getsourcefile(module) + func = self.pdict[name] + if isinstance(func, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + if func.__code__.co_argcount > reqargs: + self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) + self.error = True + elif func.__code__.co_argcount < reqargs: + self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) + self.error = True + elif not func.__doc__: + self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', + file, line, func.__name__) + else: + try: + parsed_g = parse_grammar(doc, file, line) + for g in parsed_g: + grammar.append((name, g)) + except SyntaxError as e: + self.log.error(str(e)) + self.error = True + + # Looks like a valid grammar rule + # Mark the file in which defined. + self.modules.add(module) + + # Secondary validation step that looks for p_ definitions that are not functions + # or functions that look like they might be grammar rules. + + for n, v in self.pdict.items(): + if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): + continue + if n.startswith('t_'): + continue + if n.startswith('p_') and n != 'p_error': + self.log.warning('%r not defined as a function', n) + if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or + (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): + if v.__doc__: + try: + doc = v.__doc__.split(' ') + if doc[1] == ':': + self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', + v.__code__.co_filename, v.__code__.co_firstlineno, n) + except IndexError: + pass + + self.grammar = grammar + +# ----------------------------------------------------------------------------- +# yacc(module) +# +# Build a parser +# ----------------------------------------------------------------------------- + +def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, + check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file, + outputdir=None, debuglog=None, errorlog=None, picklefile=None): + + if tabmodule is None: + tabmodule = tab_module + + # Reference to the parsing method of the last built parser + global parse + + # If pickling is enabled, table files are not created + if picklefile: + write_tables = 0 + + if errorlog is None: + errorlog = PlyLogger(sys.stderr) + + # Get the module dictionary used for the parser + if module: + _items = [(k, getattr(module, k)) for k in dir(module)] + pdict = dict(_items) + # If no __file__ or __package__ attributes are available, try to obtain them + # from the __module__ instead + if '__file__' not in pdict: + pdict['__file__'] = sys.modules[pdict['__module__']].__file__ + if '__package__' not in pdict and '__module__' in pdict: + if hasattr(sys.modules[pdict['__module__']], '__package__'): + pdict['__package__'] = sys.modules[pdict['__module__']].__package__ + else: + pdict = get_caller_module_dict(2) + + if outputdir is None: + # If no output directory is set, the location of the output files + # is determined according to the following rules: + # - If tabmodule specifies a package, files go into that package directory + # - Otherwise, files go in the same directory as the specifying module + if isinstance(tabmodule, types.ModuleType): + srcfile = tabmodule.__file__ + else: + if '.' not in tabmodule: + srcfile = pdict['__file__'] + else: + parts = tabmodule.split('.') + pkgname = '.'.join(parts[:-1]) + exec('import %s' % pkgname) + srcfile = getattr(sys.modules[pkgname], '__file__', '') + outputdir = os.path.dirname(srcfile) + + # Determine if the module is package of a package or not. + # If so, fix the tabmodule setting so that tables load correctly + pkg = pdict.get('__package__') + if pkg and isinstance(tabmodule, str): + if '.' not in tabmodule: + tabmodule = pkg + '.' + tabmodule + + + + # Set start symbol if it's specified directly using an argument + if start is not None: + pdict['start'] = start + + # Collect parser information from the dictionary + pinfo = ParserReflect(pdict, log=errorlog) + pinfo.get_all() + + if pinfo.error: + raise YaccError('Unable to build parser') + + # Check signature against table files (if any) + signature = pinfo.signature() + + # Read the tables + try: + lr = LRTable() + if picklefile: + read_signature = lr.read_pickle(picklefile) + else: + read_signature = lr.read_table(tabmodule) + if optimize or (read_signature == signature): + try: + lr.bind_callables(pinfo.pdict) + parser = LRParser(lr, pinfo.error_func) + parse = parser.parse + return parser + except Exception as e: + errorlog.warning('There was a problem loading the table file: %r', e) + except VersionError as e: + errorlog.warning(str(e)) + except ImportError: + pass + + if debuglog is None: + if debug: + try: + debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w')) + except IOError as e: + errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) + debuglog = NullLogger() + else: + debuglog = NullLogger() + + debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__) + + errors = False + + # Validate the parser information + if pinfo.validate_all(): + raise YaccError('Unable to build parser') + + if not pinfo.error_func: + errorlog.warning('no p_error() function is defined') + + # Create a grammar object + grammar = Grammar(pinfo.tokens) + + # Set precedence level for terminals + for term, assoc, level in pinfo.preclist: + try: + grammar.set_precedence(term, assoc, level) + except GrammarError as e: + errorlog.warning('%s', e) + + # Add productions to the grammar + for funcname, gram in pinfo.grammar: + file, line, prodname, syms = gram + try: + grammar.add_production(prodname, syms, funcname, file, line) + except GrammarError as e: + errorlog.error('%s', e) + errors = True + + # Set the grammar start symbols + try: + if start is None: + grammar.set_start(pinfo.start) + else: + grammar.set_start(start) + except GrammarError as e: + errorlog.error(str(e)) + errors = True + + if errors: + raise YaccError('Unable to build parser') + + # Verify the grammar structure + undefined_symbols = grammar.undefined_symbols() + for sym, prod in undefined_symbols: + errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) + errors = True + + unused_terminals = grammar.unused_terminals() + if unused_terminals: + debuglog.info('') + debuglog.info('Unused terminals:') + debuglog.info('') + for term in unused_terminals: + errorlog.warning('Token %r defined, but not used', term) + debuglog.info(' %s', term) + + # Print out all productions to the debug log + if debug: + debuglog.info('') + debuglog.info('Grammar') + debuglog.info('') + for n, p in enumerate(grammar.Productions): + debuglog.info('Rule %-5d %s', n, p) + + # Find unused non-terminals + unused_rules = grammar.unused_rules() + for prod in unused_rules: + errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) + + if len(unused_terminals) == 1: + errorlog.warning('There is 1 unused token') + if len(unused_terminals) > 1: + errorlog.warning('There are %d unused tokens', len(unused_terminals)) + + if len(unused_rules) == 1: + errorlog.warning('There is 1 unused rule') + if len(unused_rules) > 1: + errorlog.warning('There are %d unused rules', len(unused_rules)) + + if debug: + debuglog.info('') + debuglog.info('Terminals, with rules where they appear') + debuglog.info('') + terms = list(grammar.Terminals) + terms.sort() + for term in terms: + debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) + + debuglog.info('') + debuglog.info('Nonterminals, with rules where they appear') + debuglog.info('') + nonterms = list(grammar.Nonterminals) + nonterms.sort() + for nonterm in nonterms: + debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) + debuglog.info('') + + if check_recursion: + unreachable = grammar.find_unreachable() + for u in unreachable: + errorlog.warning('Symbol %r is unreachable', u) + + infinite = grammar.infinite_cycles() + for inf in infinite: + errorlog.error('Infinite recursion detected for symbol %r', inf) + errors = True + + unused_prec = grammar.unused_precedence() + for term, assoc in unused_prec: + errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) + errors = True + + if errors: + raise YaccError('Unable to build parser') + + # Run the LRGeneratedTable on the grammar + if debug: + errorlog.debug('Generating %s tables', method) + + lr = LRGeneratedTable(grammar, method, debuglog) + + if debug: + num_sr = len(lr.sr_conflicts) + + # Report shift/reduce and reduce/reduce conflicts + if num_sr == 1: + errorlog.warning('1 shift/reduce conflict') + elif num_sr > 1: + errorlog.warning('%d shift/reduce conflicts', num_sr) + + num_rr = len(lr.rr_conflicts) + if num_rr == 1: + errorlog.warning('1 reduce/reduce conflict') + elif num_rr > 1: + errorlog.warning('%d reduce/reduce conflicts', num_rr) + + # Write out conflicts to the output file + if debug and (lr.sr_conflicts or lr.rr_conflicts): + debuglog.warning('') + debuglog.warning('Conflicts:') + debuglog.warning('') + + for state, tok, resolution in lr.sr_conflicts: + debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) + + already_reported = set() + for state, rule, rejected in lr.rr_conflicts: + if (state, id(rule), id(rejected)) in already_reported: + continue + debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + debuglog.warning('rejected rule (%s) in state %d', rejected, state) + errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + errorlog.warning('rejected rule (%s) in state %d', rejected, state) + already_reported.add((state, id(rule), id(rejected))) + + warned_never = [] + for state, rule, rejected in lr.rr_conflicts: + if not rejected.reduced and (rejected not in warned_never): + debuglog.warning('Rule (%s) is never reduced', rejected) + errorlog.warning('Rule (%s) is never reduced', rejected) + warned_never.append(rejected) + + # Write the table file if requested + if write_tables: + try: + lr.write_table(tabmodule, outputdir, signature) + if tabmodule in sys.modules: + del sys.modules[tabmodule] + except IOError as e: + errorlog.warning("Couldn't create %r. %s" % (tabmodule, e)) + + # Write a pickled version of the tables + if picklefile: + try: + lr.pickle_table(picklefile, signature) + except IOError as e: + errorlog.warning("Couldn't create %r. %s" % (picklefile, e)) + + # Build the parser + lr.bind_callables(pinfo.pdict) + parser = LRParser(lr, pinfo.error_func) + + parse = parser.parse + return parser