diff --git a/third_party/lss/DIR_METADATA b/third_party/lss/DIR_METADATA new file mode 100644 index 000000000000..eccfd35fab8f --- /dev/null +++ b/third_party/lss/DIR_METADATA @@ -0,0 +1,12 @@ +# Metadata information for this directory. +# +# For more information on DIR_METADATA files, see: +# https://source.chromium.org/chromium/infra/infra/+/HEAD:go/src/infra/tools/dirmd/README.md +# +# For the schema of this file, see Metadata message: +# https://source.chromium.org/chromium/infra/infra/+/HEAD:go/src/infra/tools/dirmd/proto/dir_metadata.proto + +os: LINUX +monorail { + project: "linux-syscall-support" +} diff --git a/third_party/lss/LICENSE b/third_party/lss/LICENSE new file mode 100644 index 000000000000..58ab3fba9d4c --- /dev/null +++ b/third_party/lss/LICENSE @@ -0,0 +1,28 @@ +Copyright (c) 2005-2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/third_party/lss/METADATA b/third_party/lss/METADATA index f8d3a787278e..02195ba108bd 100644 --- a/third_party/lss/METADATA +++ b/third_party/lss/METADATA @@ -6,13 +6,13 @@ description: third_party { identifier { type: "ChromiumVersion" - value: "86.0.4240.199" # from https://chromereleases.googleblog.com/2020/11/stable-channel-update-for-chrome-os_30.html + value: "114.0.5735.358" # from https://chromereleases.googleblog.com/2024/03/long-term-support-channel-update-for_26.html } identifier { type: "Git" value: "https://chromium.googlesource.com/linux-syscall-support.git" - version: "29f7c7e018f4ce706a709f0b0afbf8bacf869480" - # from https://chromium.googlesource.com/chromium/src/+/86.0.4240.199/DEPS#242 + version: "ce877209e11aa69dcfffbd53ef90ea1d07136521" + # from https://chromium.googlesource.com/chromium/src/+/114.0.5735.358/DEPS#339 } last_upgrade_date { year: 2020 diff --git a/third_party/lss/OWNERS b/third_party/lss/OWNERS new file mode 100644 index 000000000000..29b02fa1143e --- /dev/null +++ b/third_party/lss/OWNERS @@ -0,0 +1,2 @@ +mseaborn@chromium.org +vapier@chromium.org diff --git a/third_party/lss/README.md b/third_party/lss/README.md index 70cbc85311f9..e25abdd218fe 100644 --- a/third_party/lss/README.md +++ b/third_party/lss/README.md @@ -39,6 +39,7 @@ the header itself as this list might be out of date. * MIPS 32-bit o32 ABI * MIPS 32-bit n32 ABI * MIPS 64-bit n64 ABI +* LOONGARCH 64-bit ABI ## API @@ -130,7 +131,7 @@ It only compiles things (does not execute at all). If you commit a change to LSS, please also commit a Chromium change to update `lss_revision` in -[Chromium's DEPS](https://chromium.googlesource.com/chromium/src/+/master/DEPS) +[Chromium's DEPS](https://chromium.googlesource.com/chromium/src/+/HEAD/DEPS) file. This ensures that the LSS change gets tested, so that people who commit later diff --git a/third_party/lss/linux_syscall_support.h b/third_party/lss/linux_syscall_support.h index e4ac22644c0c..99a4b444918d 100644 --- a/third_party/lss/linux_syscall_support.h +++ b/third_party/lss/linux_syscall_support.h @@ -88,7 +88,8 @@ */ #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \ defined(__mips__) || defined(__PPC__) || defined(__ARM_EABI__) || \ - defined(__aarch64__) || defined(__s390__)) \ + defined(__aarch64__) || defined(__s390__) || defined(__e2k__) || \ + (defined(__riscv) && __riscv_xlen == 64) || defined(__loongarch_lp64)) \ && (defined(__linux) || defined(__ANDROID__)) #ifndef SYS_CPLUSPLUS @@ -266,6 +267,12 @@ struct kernel_timeval { long tv_usec; }; +/* include/linux/time.h */ +struct kernel_itimerval { + struct kernel_timeval it_interval; + struct kernel_timeval it_value; +}; + /* include/linux/resource.h */ struct kernel_rusage { struct kernel_timeval ru_utime; @@ -287,7 +294,8 @@ struct kernel_rusage { }; #if defined(__i386__) || defined(__ARM_EABI__) || defined(__ARM_ARCH_3__) \ - || defined(__PPC__) || (defined(__s390__) && !defined(__s390x__)) + || defined(__PPC__) || (defined(__s390__) && !defined(__s390x__)) \ + || defined(__e2k__) /* include/asm-{arm,i386,mips,ppc}/signal.h */ struct kernel_old_sigaction { @@ -301,8 +309,8 @@ struct kernel_old_sigaction { } __attribute__((packed,aligned(4))); #elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) #define kernel_old_sigaction kernel_sigaction -#elif defined(__aarch64__) - // No kernel_old_sigaction defined for arm64. +#elif defined(__aarch64__) || defined(__riscv) || defined(__loongarch_lp64) + // No kernel_old_sigaction defined for arm64 riscv and loongarch64. #endif /* Some kernel functions (e.g. sigaction() in 2.6.23) require that the @@ -341,7 +349,9 @@ struct kernel_sigaction { void (*sa_sigaction_)(int, siginfo_t *, void *); }; unsigned long sa_flags; +#if !defined(__riscv) && !defined(__loongarch_lp64) void (*sa_restorer)(void); +#endif struct kernel_sigset_t sa_mask; #endif }; @@ -355,6 +365,16 @@ struct kernel_sockaddr { /* include/asm-{arm,aarch64,i386,mips,ppc,s390}/stat.h */ #ifdef __mips__ #if _MIPS_SIM == _MIPS_SIM_ABI64 +typedef unsigned long long kernel_blkcnt_t; +typedef unsigned kernel_blksize_t; +typedef unsigned kernel_dev_t; +typedef unsigned kernel_gid_t; +typedef unsigned long long kernel_ino_t; +typedef unsigned kernel_mode_t; +typedef unsigned kernel_nlink_t; +typedef long long kernel_off_t; +typedef unsigned kernel_time_t; +typedef unsigned kernel_uid_t; struct kernel_stat { #else struct kernel_stat64 { @@ -401,6 +421,28 @@ struct kernel_stat64 { unsigned long __unused4; unsigned long __unused5; }; +#elif defined(__e2k__) +struct kernel_stat64 { + unsigned long long st_dev; + unsigned long long st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + unsigned long long st_rdev; + long long st_size; + int st_blksize; + int __pad2; + unsigned long long st_blocks; + int st_atime_; + unsigned int st_atime_nsec_; + int st_mtime_; + unsigned int st_mtime_nsec_; + int st_ctime_; + unsigned int st_ctime_nsec_; + unsigned int __unused4; + unsigned int __unused5; +}; #else struct kernel_stat64 { unsigned long long st_dev; @@ -427,165 +469,264 @@ struct kernel_stat64 { /* include/asm-{arm,aarch64,i386,mips,x86_64,ppc,s390}/stat.h */ #if defined(__i386__) || defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) +typedef unsigned kernel_blkcnt_t; +typedef unsigned kernel_blksize_t; +typedef unsigned short kernel_dev_t; +typedef unsigned short kernel_gid_t; +typedef unsigned kernel_ino_t; +typedef unsigned short kernel_mode_t; +typedef unsigned short kernel_nlink_t; +typedef unsigned kernel_off_t; +typedef unsigned kernel_time_t; +typedef unsigned short kernel_uid_t; struct kernel_stat { /* The kernel headers suggest that st_dev and st_rdev should be 32bit * quantities encoding 12bit major and 20bit minor numbers in an interleaved * format. In reality, we do not see useful data in the top bits. So, * we'll leave the padding in here, until we find a better solution. */ - unsigned short st_dev; + kernel_dev_t st_dev; short pad1; - unsigned st_ino; - unsigned short st_mode; - unsigned short st_nlink; - unsigned short st_uid; - unsigned short st_gid; - unsigned short st_rdev; + kernel_ino_t st_ino; + kernel_mode_t st_mode; + kernel_nlink_t st_nlink; + kernel_uid_t st_uid; + kernel_gid_t st_gid; + kernel_dev_t st_rdev; short pad2; - unsigned st_size; - unsigned st_blksize; - unsigned st_blocks; - unsigned st_atime_; + kernel_off_t st_size; + kernel_blksize_t st_blksize; + kernel_blkcnt_t st_blocks; + kernel_time_t st_atime_; unsigned st_atime_nsec_; - unsigned st_mtime_; + kernel_time_t st_mtime_; unsigned st_mtime_nsec_; - unsigned st_ctime_; + kernel_time_t st_ctime_; unsigned st_ctime_nsec_; unsigned __unused4; unsigned __unused5; }; #elif defined(__x86_64__) +typedef int64_t kernel_blkcnt_t; +typedef int64_t kernel_blksize_t; +typedef uint64_t kernel_dev_t; +typedef unsigned kernel_gid_t; +typedef uint64_t kernel_ino_t; +typedef unsigned kernel_mode_t; +typedef uint64_t kernel_nlink_t; +typedef int64_t kernel_off_t; +typedef uint64_t kernel_time_t; +typedef unsigned kernel_uid_t; struct kernel_stat { - uint64_t st_dev; - uint64_t st_ino; - uint64_t st_nlink; - unsigned st_mode; - unsigned st_uid; - unsigned st_gid; + kernel_dev_t st_dev; + kernel_ino_t st_ino; + kernel_nlink_t st_nlink; + kernel_mode_t st_mode; + kernel_uid_t st_uid; + kernel_gid_t st_gid; unsigned __pad0; - uint64_t st_rdev; - int64_t st_size; - int64_t st_blksize; - int64_t st_blocks; - uint64_t st_atime_; + kernel_dev_t st_rdev; + kernel_off_t st_size; + kernel_blksize_t st_blksize; + kernel_blkcnt_t st_blocks; + kernel_time_t st_atime_; uint64_t st_atime_nsec_; - uint64_t st_mtime_; + kernel_time_t st_mtime_; uint64_t st_mtime_nsec_; - uint64_t st_ctime_; + kernel_time_t st_ctime_; uint64_t st_ctime_nsec_; int64_t __unused4[3]; }; #elif defined(__PPC__) +typedef unsigned long kernel_blkcnt_t; +typedef unsigned long kernel_blksize_t; +typedef unsigned kernel_dev_t; +typedef unsigned kernel_gid_t; +typedef unsigned long kernel_ino_t; +typedef unsigned long kernel_mode_t; +typedef unsigned short kernel_nlink_t; +typedef long kernel_off_t; +typedef unsigned long kernel_time_t; +typedef unsigned kernel_uid_t; struct kernel_stat { - unsigned st_dev; - unsigned long st_ino; // ino_t - unsigned long st_mode; // mode_t - unsigned short st_nlink; // nlink_t - unsigned st_uid; // uid_t - unsigned st_gid; // gid_t - unsigned st_rdev; - long st_size; // off_t - unsigned long st_blksize; - unsigned long st_blocks; - unsigned long st_atime_; + kernel_dev_t st_dev; + kernel_ino_t st_ino; + kernel_mode_t st_mode; + kernel_nlink_t st_nlink; + kernel_gid_t st_uid; + kernel_uid_t st_gid; + kernel_dev_t st_rdev; + kernel_off_t st_size; + kernel_blksize_t st_blksize; + kernel_blkcnt_t st_blocks; + kernel_time_t st_atime_; unsigned long st_atime_nsec_; - unsigned long st_mtime_; + kernel_time_t st_mtime_; unsigned long st_mtime_nsec_; - unsigned long st_ctime_; + kernel_time_t st_ctime_; unsigned long st_ctime_nsec_; unsigned long __unused4; unsigned long __unused5; }; #elif (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64) +typedef int kernel_blkcnt_t; +typedef int kernel_blksize_t; +typedef unsigned kernel_dev_t; +typedef unsigned kernel_gid_t; +typedef unsigned kernel_ino_t; +typedef unsigned kernel_mode_t; +typedef unsigned kernel_nlink_t; +typedef long kernel_off_t; +typedef long kernel_time_t; +typedef unsigned kernel_uid_t; struct kernel_stat { - unsigned st_dev; + kernel_dev_t st_dev; int st_pad1[3]; - unsigned st_ino; - unsigned st_mode; - unsigned st_nlink; - unsigned st_uid; - unsigned st_gid; - unsigned st_rdev; + kernel_ino_t st_ino; + kernel_mode_t st_mode; + kernel_nlink_t st_nlink; + kernel_uid_t st_uid; + kernel_gid_t st_gid; + kernel_dev_t st_rdev; int st_pad2[2]; - long st_size; + kernel_off_t st_size; int st_pad3; - long st_atime_; + kernel_time_t st_atime_; long st_atime_nsec_; - long st_mtime_; + kernel_time_t st_mtime_; long st_mtime_nsec_; - long st_ctime_; + kernel_time_t st_ctime_; long st_ctime_nsec_; - int st_blksize; - int st_blocks; + kernel_blksize_t st_blksize; + kernel_blkcnt_t st_blocks; int st_pad4[14]; }; -#elif defined(__aarch64__) +#elif defined(__aarch64__) || defined(__riscv) || defined(__loongarch_lp64) +typedef long kernel_blkcnt_t; +typedef int kernel_blksize_t; +typedef unsigned long kernel_dev_t; +typedef unsigned int kernel_gid_t; +typedef unsigned long kernel_ino_t; +typedef unsigned int kernel_mode_t; +typedef unsigned int kernel_nlink_t; +typedef long kernel_off_t; +typedef long kernel_time_t; +typedef unsigned int kernel_uid_t; struct kernel_stat { - unsigned long st_dev; - unsigned long st_ino; - unsigned int st_mode; - unsigned int st_nlink; - unsigned int st_uid; - unsigned int st_gid; - unsigned long st_rdev; + kernel_dev_t st_dev; + kernel_ino_t st_ino; + kernel_mode_t st_mode; + kernel_nlink_t st_nlink; + kernel_uid_t st_uid; + kernel_gid_t st_gid; + kernel_dev_t st_rdev; unsigned long __pad1; - long st_size; - int st_blksize; + kernel_off_t st_size; + kernel_blksize_t st_blksize; int __pad2; - long st_blocks; - long st_atime_; + kernel_blkcnt_t st_blocks; + kernel_time_t st_atime_; unsigned long st_atime_nsec_; - long st_mtime_; + kernel_time_t st_mtime_; unsigned long st_mtime_nsec_; - long st_ctime_; + kernel_time_t st_ctime_; unsigned long st_ctime_nsec_; unsigned int __unused4; unsigned int __unused5; }; #elif defined(__s390x__) +typedef long kernel_blkcnt_t; +typedef unsigned long kernel_blksize_t; +typedef unsigned long kernel_dev_t; +typedef unsigned int kernel_gid_t; +typedef unsigned long kernel_ino_t; +typedef unsigned int kernel_mode_t; +typedef unsigned long kernel_nlink_t; +typedef unsigned long kernel_off_t; +typedef unsigned long kernel_time_t; +typedef unsigned int kernel_uid_t; struct kernel_stat { - unsigned long st_dev; - unsigned long st_ino; - unsigned long st_nlink; - unsigned int st_mode; - unsigned int st_uid; - unsigned int st_gid; + kernel_dev_t st_dev; + kernel_ino_t st_ino; + kernel_nlink_t st_nlink; + kernel_mode_t st_mode; + kernel_uid_t st_uid; + kernel_gid_t st_gid; unsigned int __pad1; - unsigned long st_rdev; - unsigned long st_size; - unsigned long st_atime_; + kernel_dev_t st_rdev; + kernel_off_t st_size; + kernel_time_t st_atime_; unsigned long st_atime_nsec_; - unsigned long st_mtime_; + kernel_time_t st_mtime_; unsigned long st_mtime_nsec_; - unsigned long st_ctime_; + kernel_time_t st_ctime_; unsigned long st_ctime_nsec_; - unsigned long st_blksize; - long st_blocks; + kernel_blksize_t st_blksize; + kernel_blkcnt_t st_blocks; unsigned long __unused[3]; }; #elif defined(__s390__) +typedef unsigned long kernel_blkcnt_t; +typedef unsigned long kernel_blksize_t; +typedef unsigned short kernel_dev_t; +typedef unsigned short kernel_gid_t; +typedef unsigned long kernel_ino_t; +typedef unsigned short kernel_mode_t; +typedef unsigned short kernel_nlink_t; +typedef unsigned long kernel_off_t; +typedef unsigned long kernel_time_t; +typedef unsigned short kernel_uid_t; struct kernel_stat { - unsigned short st_dev; + kernel_dev_t st_dev; unsigned short __pad1; - unsigned long st_ino; - unsigned short st_mode; - unsigned short st_nlink; - unsigned short st_uid; - unsigned short st_gid; - unsigned short st_rdev; + kernel_ino_t st_ino; + kernel_mode_t st_mode; + kernel_nlink_t st_nlink; + kernel_uid_t st_uid; + kernel_gid_t st_gid; + kernel_dev_t st_rdev; unsigned short __pad2; - unsigned long st_size; - unsigned long st_blksize; - unsigned long st_blocks; - unsigned long st_atime_; + kernel_off_t st_size; + kernel_blksize_t st_blksize; + kernel_blkcnt_t st_blocks; + kernel_time_t st_atime_; unsigned long st_atime_nsec_; - unsigned long st_mtime_; + kernel_time_t st_mtime_; unsigned long st_mtime_nsec_; - unsigned long st_ctime_; + kernel_time_t st_ctime_; unsigned long st_ctime_nsec_; unsigned long __unused4; unsigned long __unused5; }; +#elif defined(__e2k__) +typedef unsigned long kernel_blkcnt_t; +typedef unsigned long kernel_blksize_t; +typedef unsigned long kernel_dev_t; +typedef unsigned int kernel_gid_t; +typedef unsigned long kernel_ino_t; +typedef unsigned int kernel_mode_t; +typedef unsigned long kernel_nlink_t; +typedef unsigned long kernel_off_t; +typedef unsigned long kernel_time_t; +typedef unsigned int kernel_uid_t; +struct kernel_stat { + kernel_dev_t st_dev; + kernel_ino_t st_ino; + kernel_mode_t st_mode; + kernel_nlink_t st_nlink; + kernel_uid_t st_uid; + kernel_gid_t st_gid; + kernel_dev_t st_rdev; + kernel_off_t st_size; + kernel_blksize_t st_blksize; + kernel_blkcnt_t st_blocks; + kernel_time_t st_atime_; + unsigned long st_atime_nsec_; + kernel_time_t st_mtime_; + unsigned long st_mtime_nsec_; + kernel_time_t st_ctime_; + unsigned long st_ctime_nsec_; +}; #endif /* include/asm-{arm,aarch64,i386,mips,x86_64,ppc,s390}/statfs.h */ @@ -701,6 +842,37 @@ struct kernel_statfs { }; #endif +struct kernel_statx_timestamp { + int64_t tv_sec; + uint32_t tv_nsec; + int32_t __reserved; +}; + +struct kernel_statx { + uint32_t stx_mask; + uint32_t stx_blksize; + uint64_t stx_attributes; + uint32_t stx_nlink; + uint32_t stx_uid; + uint32_t stx_gid; + uint16_t stx_mode; + uint16_t __spare0[1]; + uint64_t stx_ino; + uint64_t stx_size; + uint64_t stx_blocks; + uint64_t stx_attributes_mask; + struct kernel_statx_timestamp stx_atime; + struct kernel_statx_timestamp stx_btime; + struct kernel_statx_timestamp stx_ctime; + struct kernel_statx_timestamp stx_mtime; + uint32_t stx_rdev_major; + uint32_t stx_rdev_minor; + uint32_t stx_dev_major; + uint32_t stx_dev_minor; + uint64_t stx_mnt_id; + uint64_t __spare2; + uint64_t __spare3[12]; +}; /* Definitions missing from the standard header files */ #ifndef O_DIRECTORY @@ -737,6 +909,18 @@ struct kernel_statfs { #ifndef AT_REMOVEDIR #define AT_REMOVEDIR 0x200 #endif +#ifndef AT_NO_AUTOMOUNT +#define AT_NO_AUTOMOUNT 0x800 +#endif +#ifndef AT_EMPTY_PATH +#define AT_EMPTY_PATH 0x1000 +#endif +#ifndef STATX_BASIC_STATS +#define STATX_BASIC_STATS 0x000007ffU +#endif +#ifndef AT_STATX_SYNC_AS_STAT +#define AT_STATX_SYNC_AS_STAT 0x0000 +#endif #ifndef MREMAP_FIXED #define MREMAP_FIXED 2 #endif @@ -1055,6 +1239,9 @@ struct kernel_statfs { #ifndef __NR_ioprio_get #define __NR_ioprio_get (__NR_SYSCALL_BASE + 315) #endif +#ifndef __NR_fstatat64 +#define __NR_fstatat64 (__NR_SYSCALL_BASE + 327) +#endif #ifndef __NR_move_pages #define __NR_move_pages (__NR_SYSCALL_BASE + 344) #endif @@ -1065,7 +1252,7 @@ struct kernel_statfs { #define __NR_getrandom (__NR_SYSCALL_BASE + 384) #endif /* End of ARM 3/EABI definitions */ -#elif defined(__aarch64__) +#elif defined(__aarch64__) || defined(__riscv) || defined(__loongarch_lp64) #ifndef __NR_setxattr #define __NR_setxattr 5 #endif @@ -1121,9 +1308,11 @@ struct kernel_statfs { #ifndef __NR_readlinkat #define __NR_readlinkat 78 #endif +#if !defined(__loongarch_lp64) #ifndef __NR_newfstatat #define __NR_newfstatat 79 #endif +#endif #ifndef __NR_set_tid_address #define __NR_set_tid_address 96 #endif @@ -1164,7 +1353,9 @@ struct kernel_statfs { #ifndef __NR_getrandom #define __NR_getrandom 278 #endif -/* End of aarch64 definitions */ +#ifndef __NR_statx +#define __NR_statx 291 +#endif #elif defined(__x86_64__) #ifndef __NR_pread64 #define __NR_pread64 17 @@ -1440,8 +1631,8 @@ struct kernel_statfs { #ifndef __NR_ioprio_get #define __NR_ioprio_get (__NR_Linux + 274) #endif -#ifndef __NT_getrandom -#define (__NR_Linux + 313) +#ifndef __NR_getrandom +#define __NR_getrandom (__NR_Linux + 313) #endif /* End of MIPS (64bit API) definitions */ #else @@ -1879,15 +2070,16 @@ struct kernel_statfs { #endif #undef LSS_RETURN - #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) \ - || defined(__ARM_EABI__) || defined(__aarch64__) || defined(__s390__)) + #if defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) \ + || defined(__ARM_EABI__) || defined(__aarch64__) || defined(__s390__) \ + || defined(__e2k__) || defined(__riscv) || defined(__loongarch_lp64) /* Failing system calls return a negative result in the range of * -1..-4095. These are "errno" values with the sign inverted. */ #define LSS_RETURN(type, res) \ do { \ if ((unsigned long)(res) >= (unsigned long)(-4095)) { \ - LSS_ERRNO = -(res); \ + LSS_ERRNO = (int)(-(res)); \ res = -1; \ } \ return (type) (res); \ @@ -2241,7 +2433,7 @@ struct kernel_statfs { #define _LSS_RETURN(type, res, cast) \ do { \ if ((uint64_t)(res) >= (uint64_t)(-4095)) { \ - LSS_ERRNO = -(res); \ + LSS_ERRNO = (int)(-(res)); \ res = -1; \ } \ return (type)(cast)(res); \ @@ -2790,7 +2982,7 @@ struct kernel_statfs { void *newtls, int *child_tidptr) { int64_t __res; { - register uint64_t __flags __asm__("x0") = flags; + register uint64_t __flags __asm__("x0") = (uint64_t)flags; register void *__stack __asm__("x1") = child_stack; register void *__ptid __asm__("x2") = parent_tidptr; register void *__tls __asm__("x3") = newtls; @@ -3373,6 +3565,503 @@ struct kernel_statfs { } LSS_RETURN(int, __ret); } + #elif defined(__riscv) && __riscv_xlen == 64 + #undef LSS_REG + #define LSS_REG(r,a) register int64_t __r##r __asm__("a"#r) = (int64_t)a + #undef LSS_BODY + #define LSS_BODY(type,name,args...) \ + register int64_t __res_a0 __asm__("a0"); \ + register int64_t __a7 __asm__("a7") = __NR_##name; \ + int64_t __res; \ + __asm__ __volatile__ ("scall\n" \ + : "=r"(__res_a0) \ + : "r"(__a7) , ## args \ + : "memory"); \ + __res = __res_a0; \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__r0)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4)); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4), "r"(__r5)); \ + } + + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + int64_t __res; + { + register int64_t __res_a0 __asm__("a0"); + register uint64_t __flags __asm__("a0") = flags; + register void *__stack __asm__("a1") = child_stack; + register void *__ptid __asm__("a2") = parent_tidptr; + register void *__tls __asm__("a3") = newtls; + register int *__ctid __asm__("a4") = child_tidptr; + __asm__ __volatile__(/* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "addi %2,%2,-16\n" + "sd %1, 0(%2)\n" + "sd %4, 8(%2)\n" + + /* %a0 = syscall(%a0 = flags, + * %a1 = child_stack, + * %a2 = parent_tidptr, + * %a3 = newtls, + * %a4 = child_tidptr) + */ + "li a7, %8\n" + "scall\n" + + /* if (%a0 != 0) + * return %a0; + */ + "bnez %0, 1f\n" + + /* In the child, now. Call "fn(arg)". + */ + "ld a1, 0(sp)\n" + "ld a0, 8(sp)\n" + "jalr a1\n" + + /* Call _exit(%a0). + */ + "li a7, %9\n" + "scall\n" + "1:\n" + : "=r" (__res_a0) + : "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), + "r"(__ptid), "r"(__tls), "r"(__ctid), + "i"(__NR_clone), "i"(__NR_exit) + : "cc", "memory"); + __res = __res_a0; + } + LSS_RETURN(int, __res); + } + #elif defined(__e2k__) + + #undef _LSS_BODY + #define _LSS_BODY(nr, type, name, ...) \ + register unsigned long long __res; \ + __asm__ __volatile__ \ + ( \ + "{\n\t" \ + " sdisp %%ctpr1, 0x3\n\t" \ + " addd, s 0x0, %[sys_num], %%b[0]\n\t" \ + LSS_BODY_ASM##nr \ + "}\n\t" \ + "{\n\t" \ + " call %%ctpr1, wbs = %#\n\t" \ + "}\n\t" \ + "{\n\t" \ + " addd, s 0x0, %%b[0], %[res]\n\t" \ + "}\n\t" \ + : [res] "=r" (__res) \ + : \ + LSS_BODY_ARG##nr(__VA_ARGS__) \ + [sys_num] "ri" (__NR_##name) \ + : "ctpr1", "ctpr2", "ctpr3", \ + "b[0]", "b[1]", "b[2]", "b[3]", \ + "b[4]", "b[5]", "b[6]", "b[7]" \ + ); \ + LSS_RETURN(type, __res); + + #undef LSS_BODY + #define LSS_BODY(nr, type, name, args...) \ + _LSS_BODY(nr, type, name, ## args) + + #undef LSS_BODY_ASM0 + #undef LSS_BODY_ASM1 + #undef LSS_BODY_ASM2 + #undef LSS_BODY_ASM3 + #undef LSS_BODY_ASM4 + #undef LSS_BODY_ASM5 + #undef LSS_BODY_ASM6 + + #define LSS_BODY_ASM0 + #define LSS_BODY_ASM1 LSS_BODY_ASM0 \ + " addd, s 0x0, %[arg1], %%b[1]\n\t" + #define LSS_BODY_ASM2 LSS_BODY_ASM1 \ + " addd, s 0x0, %[arg2], %%b[2]\n\t" + #define LSS_BODY_ASM3 LSS_BODY_ASM2 \ + " addd, s 0x0, %[arg3], %%b[3]\n\t" + #define LSS_BODY_ASM4 LSS_BODY_ASM3 \ + " addd, s 0x0, %[arg4], %%b[4]\n\t" + #define LSS_BODY_ASM5 LSS_BODY_ASM4 \ + " addd, s 0x0, %[arg5], %%b[5]\n\t" + #define LSS_BODY_ASM6 LSS_BODY_ASM5 \ + "}\n\t" \ + "{\n\t" \ + " addd, s 0x0, %[arg6], %%b[6]\n\t" + + #undef LSS_SYSCALL_ARG + #define LSS_SYSCALL_ARG(a) ((unsigned long long)(uintptr_t)(a)) + + #undef LSS_BODY_ARG0 + #undef LSS_BODY_ARG1 + #undef LSS_BODY_ARG2 + #undef LSS_BODY_ARG3 + #undef LSS_BODY_ARG4 + #undef LSS_BODY_ARG5 + #undef LSS_BODY_ARG6 + + #define LSS_BODY_ARG0() + #define LSS_BODY_ARG1(_arg1) \ + [arg1] "ri" LSS_SYSCALL_ARG(_arg1), + #define LSS_BODY_ARG2(_arg1, _arg2) \ + LSS_BODY_ARG1(_arg1) \ + [arg2] "ri" LSS_SYSCALL_ARG(_arg2), + #define LSS_BODY_ARG3(_arg1, _arg2, _arg3) \ + LSS_BODY_ARG2(_arg1, _arg2) \ + [arg3] "ri" LSS_SYSCALL_ARG(_arg3), + #define LSS_BODY_ARG4(_arg1, _arg2, _arg3, _arg4) \ + LSS_BODY_ARG3(_arg1, _arg2, _arg3) \ + [arg4] "ri" LSS_SYSCALL_ARG(_arg4), + #define LSS_BODY_ARG5(_arg1, _arg2, _arg3, _arg4, _arg5) \ + LSS_BODY_ARG4(_arg1, _arg2, _arg3, _arg4) \ + [arg5] "ri" LSS_SYSCALL_ARG(_arg5), + #define LSS_BODY_ARG6(_arg1, _arg2, _arg3, _arg4, _arg5, _arg6) \ + LSS_BODY_ARG5(_arg1, _arg2, _arg3, _arg4, _arg5) \ + [arg6] "ri" LSS_SYSCALL_ARG(_arg6), + + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(0, type, name); \ + } + + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(1, type, name, arg1) \ + } + + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_BODY(2, type, name, arg1, arg2) \ + } + + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_BODY(3, type, name, arg1, arg2, arg3) \ + } + + #undef _syscall4 + #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(4, type, name, arg1, arg2, arg3, arg4) \ + } + + #undef _syscall5 + #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5) \ + } + + #undef _syscall6 + #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6) \ + } + + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + unsigned long long __res; + + __asm__ __volatile__ ( + "{\n\t" + " addd,s 0x0, %[nr_clone], %%b[0]\n\t" + " addd,s 0x0, %[flags], %%db[1]\n\t" + " addd,s 0x0, %[child_stack], %%db[2]\n\t" + " addd,s 0x0, %[parent_tidptr], %%db[3]\n\t" + " addd,s 0x0, %[child_tidptr], %%db[4]\n\t" + " addd,s 0x0, %[newtls], %%db[5]\n\t" + "}\n\t" + /* if (fn == NULL) + * return -EINVAL; + */ + + "{\n\t" + " disp %%ctpr1, .L1\n\t" + "}\n\t" + "{\n\t" + " cmpesb,s 0x0, %[fn], %%pred0\n\t" + "}\n\t" + "{\n\t" + " ct %%ctpr1 ? %%pred0\n\t" + "}\n\t" + + /* if (child_stack == NULL) + * return -EINVAL; + */ + "{\n\t" + " cmpesb,s 0x0, %%db[2], %%pred0\n\t" + "}\n\t" + "{\n\t" + " ct %%ctpr1 ? %%pred0\n\t" + "}\n\t" + + /* b[0] = syscall(%b[0] = __NR_clone, + * %db[1] = flags, + * %db[2] = child_stack, + * %db[3] = parent_tidptr, + * %db[4] = child_tidptr, + * %db[5] = newtls) + */ + "{\n\t" + " sdisp %%ctpr1, 0x3\n\t" + "}\n\t" + "{\n\t" + " call %%ctpr1, wbs = %#\n\t" + "}\n\t" + + /* if (%[b0] != 0) + * return %b[0]; + */ + "{\n\t" + " disp %%ctpr1, .L2\n\t" + " cmpesb,s 0x0, %%b[0], %%pred0\n\t" + "}\n\t" + "{\n\t" + " ct %%ctpr1 ? ~%%pred0\n\t" + "}\n\t" + /* In the child, now. Call "fn(arg)". + */ + + "{\n\t" + " movtd,s %[fn], %%ctpr1\n\t" + "}\n\t" + "{\n\t" + " addd,s 0x0, %[arg], %%db[0]\n\t" + "}\n\t" + "{\n\t" + " call %%ctpr1, wbs = %#\n\t" + "}\n\t" + /* Call _exit(%b[0]). + */ + + "{\n\t" + " sdisp %%ctpr1, 0x3\n\t" + " addd,s 0x0, %%b[0], %%b[1]\n\t" + "}\n\t" + "{\n\t" + " addd,s 0x0, %[nr_exit], %%b[0]\n\t" + "}\n\t" + "{\n\t" + " call %%ctpr1, wbs = %#\n\t" + "}\n\t" + "{\n\t" + " disp %%ctpr1, .L2\n\t" + " adds,s 0x0, 0x0, %%b[0]\n\t" + "}\n\t" + "{\n\t" + " ct %%ctpr1\n\t" + "}\n\t" + ".L1:\n\t" + "{\n\t" + " addd,s 0x0, %[einval], %%b[0]\n\t" + "}\n\t" + ".L2:\n\t" + "{\n\t" + " addd,s 0x0, %%b[0], %[res]\n\t" + "}\n\t" + : [res] "=r" LSS_SYSCALL_ARG(__res) + : [nr_clone] "ri" LSS_SYSCALL_ARG(__NR_clone) + [arg] "ri" LSS_SYSCALL_ARG(arg) + [nr_exit] "ri" LSS_SYSCALL_ARG(__NR_exit) + [flags] "ri" LSS_SYSCALL_ARG(flags) + [child_stack] "ri" LSS_SYSCALL_ARG(child_stack) + [parent_tidptr] "ri" + LSS_SYSCALL_ARG(parent_tidptr) + [newtls] "ri" LSS_SYSCALL_ARG(newtls) + [child_tidptr] "ri" + LSS_SYSCALL_ARG(child_tidptr) + [fn] "ri" LSS_SYSCALL_ARG(fn) + [einval] "ri" LSS_SYSCALL_ARG(-EINVAL) + : "ctpr1", "b[0]", "b[1]", "b[2]", "b[3]", + "b[4]", "b[5]", "pred0"); + LSS_RETURN(int, __res); + } + #elif defined(__loongarch_lp64) + /* Most definitions of _syscallX() neglect to mark "memory" as being + * clobbered. This causes problems with compilers, that do a better job + * at optimizing across __asm__ calls. + * So, we just have to redefine all of the _syscallX() macros. + */ + #undef LSS_REG + #define LSS_REG(ar,a) register int64_t __r##ar __asm__("a"#ar) = (int64_t)a + /* syscall is like subroutine calls, all caller-saved registers may be + * clobbered, we should add them to the |Clobbers| list. + * a0 is not included because it's in the output list. + */ + #define LSS_SYSCALL_CLOBBERS "t0", "t1", "t2", "t3", "t4", "t5", "t6", \ + "t7", "t8", "memory" + #undef LSS_BODY + #define LSS_BODY(type,name,args...) \ + register int64_t __res_a0 __asm__("a0"); \ + int64_t __res; \ + __asm__ __volatile__ ("li.d $a7, %1\n" \ + "syscall 0x0\n" \ + : "=r"(__res_a0) \ + : "i"(__NR_##name) , ## args \ + : LSS_SYSCALL_CLOBBERS); \ + __res = __res_a0; \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__r0)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4)); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4), "r"(__r5)); \ + } + + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + int64_t __res; + { + register int64_t __res_a0 __asm__("a0"); + register uint64_t __flags __asm__("a0") = flags; + register void *__stack __asm__("a1") = child_stack; + register void *__ptid __asm__("a2") = parent_tidptr; + register void *__tls __asm__("a3") = newtls; + register int *__ctid __asm__("a4") = child_tidptr; + __asm__ __volatile__(/* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "addi.d %2, %2, -16\n" + "st.d %1, %2, 8\n" + "st.d %4, %2, 0\n" + + /* %a0 = syscall(%a0 = flags, + * %a1 = child_stack, + * %a2 = parent_tidptr, + * %a3 = newtls, + * %a4 = child_tidptr) + */ + "li.d $a7, %8\n" + "syscall 0x0\n" + + /* if (%a0 != 0) + * return %a0; + */ + "bnez $a0, 1f\n" + + /* In the child, now. Call "fn(arg)". + */ + "ld.d $a0, $sp, 0\n" + "ld.d $a1, $sp, 8\n" + "addi.d $sp, $sp, 16\n" + "jirl $ra, $a1, 0\n" + + /* Call _exit(%a0). + */ + "li.d $a7, %9\n" + "syscall 0x0\n" + "1:\n" + : "=r" (__res_a0) + : "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), + "r"(__ptid), "r"(__tls), "r"(__ctid), + "i"(__NR_clone), "i"(__NR_exit) + : LSS_SYSCALL_CLOBBERS); + __res = __res_a0; + } + LSS_RETURN(int, __res); + } + #endif #define __NR__exit __NR_exit #define __NR__gettid __NR_gettid @@ -3403,8 +4092,10 @@ struct kernel_statfs { // fork is polyfilled below when not available. LSS_INLINE _syscall0(pid_t, fork) #endif + #if defined(__NR_fstat) LSS_INLINE _syscall2(int, fstat, int, f, struct kernel_stat*, b) + #endif LSS_INLINE _syscall2(int, fstatfs, int, f, struct kernel_statfs*, b) #if defined(__x86_64__) @@ -3426,6 +4117,8 @@ struct kernel_statfs { struct kernel_dirent64*, d, int, c) LSS_INLINE _syscall0(gid_t, getegid) LSS_INLINE _syscall0(uid_t, geteuid) + LSS_INLINE _syscall2(int, getitimer, int, w, + struct kernel_itimerval*, c) #if defined(__NR_getpgrp) LSS_INLINE _syscall0(pid_t, getpgrp) #endif @@ -3437,10 +4130,10 @@ struct kernel_statfs { gid_t *, e, gid_t *, s) LSS_INLINE _syscall3(int, getresuid, uid_t *, r, uid_t *, e, uid_t *, s) -#if !defined(__ARM_EABI__) + #if defined(__NR_getrlimit) LSS_INLINE _syscall2(int, getrlimit, int, r, struct kernel_rlimit*, l) -#endif + #endif LSS_INLINE _syscall1(pid_t, getsid, pid_t, p) LSS_INLINE _syscall0(pid_t, _gettid) LSS_INLINE _syscall2(pid_t, gettimeofday, struct kernel_timeval*, t, @@ -3547,6 +4240,9 @@ struct kernel_statfs { LSS_INLINE _syscall1(int, setfsuid, uid_t, u) LSS_INLINE _syscall1(int, setuid, uid_t, u) LSS_INLINE _syscall1(int, setgid, gid_t, g) + LSS_INLINE _syscall3(int, setitimer, int, w, + const struct kernel_itimerval*, n, + struct kernel_itimerval*, o) LSS_INLINE _syscall2(int, setpgid, pid_t, p, pid_t, g) LSS_INLINE _syscall3(int, setpriority, int, a, @@ -3555,8 +4251,10 @@ struct kernel_statfs { gid_t, e, gid_t, s) LSS_INLINE _syscall3(int, setresuid, uid_t, r, uid_t, e, uid_t, s) + #if defined(__NR_setrlimit) LSS_INLINE _syscall2(int, setrlimit, int, r, const struct kernel_rlimit*, l) + #endif LSS_INLINE _syscall0(pid_t, setsid) LSS_INLINE _syscall2(int, sigaltstack, const stack_t*, s, const stack_t*, o) @@ -3564,10 +4262,14 @@ struct kernel_statfs { LSS_INLINE _syscall1(int, sigreturn, unsigned long, u) #endif #if defined(__NR_stat) - // stat is polyfilled below when not available. + // stat and lstat are polyfilled below when not available. LSS_INLINE _syscall2(int, stat, const char*, f, struct kernel_stat*, b) #endif + #if defined(__NR_lstat) + LSS_INLINE _syscall2(int, lstat, const char*, f, + struct kernel_stat*, b) + #endif LSS_INLINE _syscall2(int, statfs, const char*, f, struct kernel_statfs*, b) LSS_INLINE _syscall3(int, tgkill, pid_t, p, @@ -3586,23 +4288,6 @@ struct kernel_statfs { LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu, unsigned *, node, void *, unused) #endif - #if defined(__x86_64__) || \ - (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) - LSS_INLINE _syscall3(int, recvmsg, int, s, - struct kernel_msghdr*, m, int, f) - LSS_INLINE _syscall3(int, sendmsg, int, s, - const struct kernel_msghdr*, m, int, f) - LSS_INLINE _syscall6(int, sendto, int, s, - const void*, m, size_t, l, - int, f, - const struct kernel_sockaddr*, a, int, t) - LSS_INLINE _syscall2(int, shutdown, int, s, - int, h) - LSS_INLINE _syscall3(int, socket, int, d, - int, t, int, p) - LSS_INLINE _syscall4(int, socketpair, int, d, - int, t, int, p, int*, s) - #endif #if defined(__NR_fadvise64) #if defined(__x86_64__) /* Need to make sure loff_t isn't truncated to 32-bits under x32. */ @@ -3685,6 +4370,12 @@ struct kernel_statfs { const char *, p, struct kernel_stat*, b, int, f) #endif + #if defined(__NR_statx) + LSS_INLINE _syscall5(int, statx, int, d, + const char *, p, + int, f, int, m, + struct kernel_statx*, b) + #endif #if defined(__x86_64__) || defined(__s390x__) LSS_INLINE int LSS_NAME(getresgid32)(gid_t *rgid, gid_t *egid, @@ -3891,43 +4582,43 @@ struct kernel_statfs { LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set, int signum) { - if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { + if (signum < 1 || (size_t)signum > (8*sizeof(set->sig))) { LSS_ERRNO = EINVAL; return -1; } else { - set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] - |= 1UL << ((signum - 1) % (8*sizeof(set->sig[0]))); + set->sig[(size_t)(signum - 1)/(8*sizeof(set->sig[0]))] + |= 1UL << ((size_t)(signum - 1) % (8*sizeof(set->sig[0]))); return 0; } } LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set, int signum) { - if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { + if (signum < 1 || (size_t)signum > (8*sizeof(set->sig))) { LSS_ERRNO = EINVAL; return -1; } else { - set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] - &= ~(1UL << ((signum - 1) % (8*sizeof(set->sig[0])))); + set->sig[(size_t)(signum - 1)/(8*sizeof(set->sig[0]))] + &= ~(1UL << ((size_t)(signum - 1) % (8*sizeof(set->sig[0])))); return 0; } } LSS_INLINE int LSS_NAME(sigismember)(struct kernel_sigset_t *set, int signum) { - if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { + if (signum < 1 || (size_t)signum > (8*sizeof(set->sig))) { LSS_ERRNO = EINVAL; return -1; } else { - return !!(set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] & - (1UL << ((signum - 1) % (8*sizeof(set->sig[0]))))); + return !!(set->sig[(size_t)(signum - 1)/(8*sizeof(set->sig[0]))] & + (1UL << ((size_t)(signum - 1) % (8*sizeof(set->sig[0]))))); } } #if defined(__i386__) || \ defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) || \ (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || \ defined(__PPC__) || \ - (defined(__s390__) && !defined(__s390x__)) + (defined(__s390__) && !defined(__s390x__)) || defined(__e2k__) #define __NR__sigaction __NR_sigaction #define __NR__sigpending __NR_sigpending #define __NR__sigsuspend __NR_sigsuspend @@ -4181,23 +4872,31 @@ struct kernel_statfs { LSS_SC_BODY(4, int, 8, d, type, protocol, sv); } #endif - #if defined(__ARM_EABI__) || defined (__aarch64__) + #if defined(__NR_recvmsg) LSS_INLINE _syscall3(ssize_t, recvmsg, int, s, struct kernel_msghdr*, msg, int, flags) + #endif + #if defined(__NR_sendmsg) LSS_INLINE _syscall3(ssize_t, sendmsg, int, s, const struct kernel_msghdr*, msg, int, flags) + #endif + #if defined(__NR_sendto) LSS_INLINE _syscall6(ssize_t, sendto, int, s, const void*, buf, size_t,len, int, flags, const struct kernel_sockaddr*, to, unsigned int, tolen) + #endif + #if defined(__NR_shutdown) LSS_INLINE _syscall2(int, shutdown, int, s, int, how) + #endif + #if defined(__NR_socket) LSS_INLINE _syscall3(int, socket, int, domain, int, type, int, protocol) + #endif + #if defined(__NR_socketpair) LSS_INLINE _syscall4(int, socketpair, int, d, int, type, int, protocol, int*, sv) #endif - #if defined(__i386__) || defined(__ARM_ARCH_3__) || \ - (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || \ - defined(__s390__) - #define __NR__socketcall __NR_socketcall + + #if defined(__NR_socketcall) LSS_INLINE _syscall2(int, _socketcall, int, c, va_list, a) LSS_INLINE int LSS_NAME(socketcall)(int op, ...) { @@ -4209,36 +4908,43 @@ struct kernel_statfs { return rc; } + # if !defined(__NR_recvmsg) LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg, int flags){ return (ssize_t)LSS_NAME(socketcall)(17, s, msg, flags); } - + # endif + # if !defined(__NR_sendmsg) LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s, const struct kernel_msghdr *msg, int flags) { return (ssize_t)LSS_NAME(socketcall)(16, s, msg, flags); } - + # endif + # if !defined(__NR_sendto) LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len, int flags, const struct kernel_sockaddr *to, unsigned int tolen) { return (ssize_t)LSS_NAME(socketcall)(11, s, buf, len, flags, to, tolen); } - + # endif + # if !defined(__NR_shutdown) LSS_INLINE int LSS_NAME(shutdown)(int s, int how) { return LSS_NAME(socketcall)(13, s, how); } - + # endif + # if !defined(__NR_socket) LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { return LSS_NAME(socketcall)(1, domain, type, protocol); } - + # endif + # if !defined(__NR_socketpair) LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol, int sv[2]) { return LSS_NAME(socketcall)(8, d, type, protocol, sv); } + # endif #endif #if defined(__NR_fstatat64) LSS_INLINE _syscall4(int, fstatat64, int, d, @@ -4320,12 +5026,12 @@ struct kernel_statfs { va_start(ap, flags); new_address = va_arg(ap, void *); rc = LSS_NAME(_mremap)(old_address, old_size, new_size, - flags, new_address); + (unsigned long)flags, new_address); va_end(ap); return rc; } - LSS_INLINE int LSS_NAME(ptrace_detach)(pid_t pid) { + LSS_INLINE long LSS_NAME(ptrace_detach)(pid_t pid) { /* PTRACE_DETACH can sometimes forget to wake up the tracee and it * then sends job control signals to the real parent, rather than to * the tracer. We reduce the risk of this happening by starting a @@ -4336,7 +5042,8 @@ struct kernel_statfs { * detached. Large multi threaded apps can take a long time in the kernel * processing SIGCONT. */ - int rc, err; + long rc; + int err; LSS_NAME(sched_yield)(); rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0); err = LSS_ERRNO; @@ -4369,7 +5076,7 @@ struct kernel_statfs { LSS_SYSCALL_ARG(c), (uint64_t)(o)); } - LSS_INLINE int LSS_NAME(readahead)(int f, loff_t o, unsigned c) { + LSS_INLINE int LSS_NAME(readahead)(int f, loff_t o, size_t c) { LSS_BODY(3, int, readahead, LSS_SYSCALL_ARG(f), (uint64_t)(o), LSS_SYSCALL_ARG(c)); } @@ -4408,7 +5115,7 @@ struct kernel_statfs { unsigned, o2) LSS_INLINE _syscall5(ssize_t, _pwrite64, int, f, const void *, b, size_t, c, unsigned, o1, - long, o2) + unsigned, o2) LSS_INLINE _syscall4(int, _readahead, int, f, unsigned, o1, unsigned, o2, size_t, c) #endif @@ -4429,9 +5136,9 @@ struct kernel_statfs { return LSS_NAME(_pwrite64)(fd, buf, count, LSS_LLARG_PAD o.arg[0], o.arg[1]); } - LSS_INLINE int LSS_NAME(readahead)(int fd, loff_t off, int len) { + LSS_INLINE int LSS_NAME(readahead)(int fd, loff_t off, size_t count) { union { loff_t off; unsigned arg[2]; } o = { off }; - return LSS_NAME(_readahead)(fd, LSS_LLARG_PAD o.arg[0], o.arg[1], len); + return LSS_NAME(_readahead)(fd, LSS_LLARG_PAD o.arg[0], o.arg[1], count); } #endif #endif @@ -4486,10 +5193,79 @@ struct kernel_statfs { } #endif +#if defined(__NR_statx) + /* copy the contents of kernel_statx to the kernel_stat structure. */ + LSS_INLINE void LSS_NAME(cp_stat_statx)(struct kernel_stat *to, + struct kernel_statx *from) { + memset(to, 0, sizeof(struct kernel_stat)); + to->st_dev = (kernel_dev_t)((from->stx_dev_minor & 0xff) | + ((from->stx_dev_major & 0xfff) << 8) | + ((from->stx_dev_minor & ~0xffu) << 12)); + to->st_rdev = (kernel_dev_t)((from->stx_rdev_minor & 0xff) | + ((from->stx_rdev_major & 0xfff) << 8) | + ((from->stx_rdev_minor & ~0xffu) << 12)); + to->st_ino = (kernel_ino_t)from->stx_ino; + to->st_mode = (kernel_mode_t)from->stx_mode; + to->st_nlink = (kernel_nlink_t)from->stx_nlink; + to->st_uid = (kernel_uid_t)from->stx_uid; + to->st_gid = (kernel_gid_t)from->stx_gid; + to->st_atime_ = (kernel_time_t)(from->stx_atime.tv_sec); + to->st_atime_nsec_ = from->stx_atime.tv_nsec; + to->st_mtime_ = (kernel_time_t)(from->stx_mtime.tv_sec); + to->st_mtime_nsec_ = from->stx_mtime.tv_nsec; + to->st_ctime_ = (kernel_time_t)(from->stx_ctime.tv_sec); + to->st_ctime_nsec_ = from->stx_ctime.tv_nsec; + to->st_size = (kernel_off_t)(from->stx_size); + to->st_blocks = (kernel_blkcnt_t)(from->stx_blocks); + to->st_blksize = (kernel_blksize_t)from->stx_blksize; + } +#endif + +#if !defined(__NR_fstat) + LSS_INLINE int LSS_NAME(fstat)(int fd, + struct kernel_stat *buf) { + #if defined(__NR_newfstatat) + return LSS_NAME(newfstatat)(fd, "", buf, AT_EMPTY_PATH); + #elif defined(__NR_statx) + struct kernel_statx stx; + int flags = AT_NO_AUTOMOUNT | AT_EMPTY_PATH; + int mask = STATX_BASIC_STATS; + int res = LSS_NAME(statx)(fd, "", flags, mask, &stx); + LSS_NAME(cp_stat_statx)(buf, &stx); + return res; + #endif + } +#endif + #if !defined(__NR_stat) LSS_INLINE int LSS_NAME(stat)(const char *pathname, struct kernel_stat *buf) { - return LSS_NAME(newfstatat)(AT_FDCWD, pathname, buf, 0); + #if defined(__NR_newfstatat) + return LSS_NAME(newfstatat)(AT_FDCWD, pathname, buf, 0); + #elif defined(__NR_statx) + struct kernel_statx stx; + int flags = AT_NO_AUTOMOUNT | AT_STATX_SYNC_AS_STAT; + int mask = STATX_BASIC_STATS; + int res = LSS_NAME(statx)(AT_FDCWD, pathname, flags, mask, &stx); + LSS_NAME(cp_stat_statx)(buf, &stx); + return res; + #endif + } +#endif + +#if !defined(__NR_lstat) + LSS_INLINE int LSS_NAME(lstat)(const char *pathname, + struct kernel_stat *buf) { + #if defined(__NR_newfstatat) + return LSS_NAME(newfstatat)(AT_FDCWD, pathname, buf, AT_SYMLINK_NOFOLLOW); + #elif defined(__NR_statx) + struct kernel_statx stx; + int flags = AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW; + int mask = STATX_BASIC_STATS; + int res = LSS_NAME(statx)(AT_FDCWD, pathname, flags, mask, &stx); + LSS_NAME(cp_stat_statx)(buf, &stx); + return res; + #endif } #endif @@ -4503,7 +5279,7 @@ struct kernel_statfs { // TODO: define this in an arch-independant way instead of inlining the clone // syscall body. -# if defined(__aarch64__) +# if defined(__aarch64__) || defined(__riscv) || defined(__loongarch_lp64) LSS_INLINE pid_t LSS_NAME(fork)(void) { // No fork syscall on aarch64 - implement by means of the clone syscall. // Note that this does not reset glibc's cached view of the PID/TID, so diff --git a/third_party/lss/tests/Makefile b/third_party/lss/tests/Makefile index 60ec06e97db6..554813d8bb69 100644 --- a/third_party/lss/tests/Makefile +++ b/third_party/lss/tests/Makefile @@ -41,9 +41,13 @@ LDFLAGS += -static TESTS = \ fallocate \ - sigaction \ + getitimer \ getrandom \ + lstat \ + setitimer \ + sigaction \ sigtimedwait \ + stat \ unlink \ all: check @@ -51,22 +55,31 @@ all: check %_test: %.c test_skel.h $(top_srcdir)/linux_syscall_support.h $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o $@ $< +# Force building C as C++ code to improve compile-time coverage. +%_cc_test: %.c test_skel.h $(top_srcdir)/linux_syscall_support.h + $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) -o $@ $< + %_test: %.cc test_skel.h $(top_srcdir)/linux_syscall_support.h $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) -o $@ $< %_run: %_test @t=$(@:_run=_test); \ echo "./$$t"; \ - if ! env -i ./$$t; then \ + env -i ./$$t; \ + exit_status=$$?; \ + if [ $$exit_status = 77 ]; then \ + echo "SKIP: $$t"; \ + elif [ $$exit_status != 0 ]; then \ + echo "FAIL: $$t"; \ env -i strace -f -v ./$$t; \ echo "TRY: gdb -q -ex r -ex bt ./$$t"; \ exit 1; \ fi -ALL_TEST_TARGETS = $(TESTS:=_test) +ALL_TEST_TARGETS = $(TESTS:=_test) $(TESTS:=_cc_test) compile_tests: $(ALL_TEST_TARGETS) -ALL_RUN_TARGETS = $(TESTS:=_run) +ALL_RUN_TARGETS = $(TESTS:=_run) $(TESTS:=_cc_run) check: $(ALL_RUN_TARGETS) # The "tempfile" targets are the names we use with temp files. @@ -107,6 +120,7 @@ cross: "mips64-unknown-linux-gnu-gcc -mabi=n32" \ "s390-ibm-linux-gnu-gcc" \ "s390x-ibm-linux-gnu-gcc" \ + "loongarch64-unknown-linux-gnu-gcc" \ ; do \ cxx=`echo "$$cc" | sed 's:-gcc:-g++:'`; \ $(MAKE) --no-print-directory CC="$$cc" CXX="$$cxx" cross_compile; \ diff --git a/third_party/lss/tests/fallocate.c b/third_party/lss/tests/fallocate.c index c156e58986cb..f6589070f7f0 100644 --- a/third_party/lss/tests/fallocate.c +++ b/third_party/lss/tests/fallocate.c @@ -63,5 +63,7 @@ int main(int argc, char *argv[]) { #endif assert(st.st_size == offset + len); + sys_unlink(filename); + return 0; } diff --git a/third_party/lss/tests/getitimer.c b/third_party/lss/tests/getitimer.c new file mode 100644 index 000000000000..507948fea089 --- /dev/null +++ b/third_party/lss/tests/getitimer.c @@ -0,0 +1,84 @@ +/* Copyright 2022, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test_skel.h" + +int main(int argc, char *argv[]) { + // We need an invalid timer value. The assert()'s below should + // be static asserts but it is not avalible in older C versions. +#define kInvalidTimer 9999 + assert(kInvalidTimer != ITIMER_REAL); + assert(kInvalidTimer != ITIMER_VIRTUAL); + assert(kInvalidTimer != ITIMER_PROF); + + // This should fail with EINVAL. + struct kernel_itimerval curr_itimer; + assert(sys_getitimer(kInvalidTimer, &curr_itimer) == -1); + assert(errno == EINVAL); + + // Create a read-only page. + size_t page_size = getpagesize(); + void* read_only_page = sys_mmap(NULL, page_size, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + assert(read_only_page != MAP_FAILED); + + // This should fail with EFAULT. + assert(sys_getitimer(ITIMER_REAL, + (struct kernel_itimerval*) read_only_page) == -1); + assert(errno == EFAULT); + + // This should complete without an error. + assert(sys_getitimer(ITIMER_REAL, &curr_itimer) == 0); + + // Set up a real time timer with very long interval and value so that + // we do not need to handle SIGALARM in test. + struct kernel_itimerval new_itimer; + const time_t kIntervalSec = 60 * 60 * 24 * 365; // One year. + const long kIntervalUSec = 123; + new_itimer.it_interval.tv_sec = kIntervalSec; + new_itimer.it_interval.tv_usec = kIntervalUSec; + new_itimer.it_value = new_itimer.it_interval; + assert(sys_setitimer(ITIMER_REAL, &new_itimer, NULL) == 0); + + assert(sys_getitimer(ITIMER_REAL, &curr_itimer) == 0); + assert(kernel_timeval_eq(&curr_itimer.it_interval, &new_itimer.it_interval)); + + // Disable timer. + struct kernel_itimerval empty_itimer; + empty_itimer.it_interval.tv_sec = 0; + empty_itimer.it_interval.tv_usec = 0; + empty_itimer.it_value = empty_itimer.it_interval; + assert(sys_setitimer(ITIMER_REAL, &empty_itimer, NULL) == 0); + + // We should read back an empty itimer. + assert(sys_getitimer(ITIMER_REAL, &curr_itimer) == 0); + assert(kernel_itimerval_eq(&curr_itimer, &empty_itimer)); + + return 0; +} diff --git a/third_party/lss/tests/lstat.c b/third_party/lss/tests/lstat.c new file mode 100644 index 000000000000..33848a9817fe --- /dev/null +++ b/third_party/lss/tests/lstat.c @@ -0,0 +1,97 @@ +/* Copyright 2021, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test_skel.h" + +int main(int argc, char *argv[]) { + int exit_status = 0; + + // Get two unique paths to play with. + char foo[] = "tempfile.XXXXXX"; + char bar[] = "tempfile.XXXXXX"; + int fd_foo = mkstemp(foo); + int fd_bar = mkstemp(bar); + assert(fd_foo != -1); + assert(fd_bar != -1); + + // Then delete foo. + assert(sys_unlink(foo) == 0); + + // Now make foo a symlink to bar. + assert(symlink(bar, foo) == 0); + + // Make sure sys_stat() and sys_lstat() implementation return different + // information. + + // We need to check our stat syscalls for EOVERFLOW, as sometimes the integer + // types used in the stat structures are too small to fit the actual value. + // E.g. on some systems st_ino is 32-bit, but some filesystems have 64-bit + // inodes. + + struct kernel_stat lstat_info; + int rc = sys_lstat(foo, &lstat_info); + if (rc < 0 && errno == EOVERFLOW) { + // Bail out since we had an overflow in the stat structure. + exit_status = SKIP_TEST_EXIT_STATUS; + goto cleanup; + } + assert(rc == 0); + + struct kernel_stat stat_info; + rc = sys_stat(foo, &stat_info); + if (rc < 0 && errno == EOVERFLOW) { + // Bail out since we had an overflow in the stat structure. + exit_status = SKIP_TEST_EXIT_STATUS; + goto cleanup; + } + assert(rc == 0); + + struct kernel_stat bar_stat_info; + rc = sys_stat(bar, &bar_stat_info); + if (rc < 0 && errno == EOVERFLOW) { + // Bail out since we had an overflow in the stat structure. + exit_status = SKIP_TEST_EXIT_STATUS; + goto cleanup; + } + assert(rc == 0); + + // lstat should produce information about a symlink. + assert((lstat_info.st_mode & S_IFMT) == S_IFLNK); + + // stat-ing foo and bar should produce the same inode. + assert(stat_info.st_ino == bar_stat_info.st_ino); + + // lstat-ing foo should give a different inode than stat-ing foo. + assert(stat_info.st_ino != lstat_info.st_ino); + +cleanup: + sys_unlink(foo); + sys_unlink(bar); + return exit_status; +} diff --git a/third_party/lss/tests/setitimer.c b/third_party/lss/tests/setitimer.c new file mode 100644 index 000000000000..1bd71141de12 --- /dev/null +++ b/third_party/lss/tests/setitimer.c @@ -0,0 +1,90 @@ +/* Copyright 2022, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test_skel.h" + +int main(int argc, char *argv[]) { + // We need an invalid timer value. The assert()'s below should + // be static asserts but it is not avalible in older C versions. +#define kInvalidTimer 9999 + assert(kInvalidTimer != ITIMER_REAL); + assert(kInvalidTimer != ITIMER_VIRTUAL); + assert(kInvalidTimer != ITIMER_PROF); + + // Invalid timer returns EINVAL. + assert(sys_setitimer(kInvalidTimer, NULL, NULL) == -1); + assert(errno == EINVAL); + + const int kSignal = SIGALRM; + const size_t kSigsetSize = sizeof(struct kernel_sigset_t); + + // Block SIGALRM. + struct kernel_sigset_t sigalarm_only; + struct kernel_sigset_t old_sigset; + assert(sys_sigemptyset(&sigalarm_only) == 0); + assert(sys_sigaddset(&sigalarm_only, kSignal) == 0); + assert(sys_rt_sigprocmask(SIG_BLOCK, &sigalarm_only, &old_sigset, + kSigsetSize) == 0); + + // Set up a real time timer. + struct kernel_itimerval new_itimer = {}; + const long kIntervalUSec = 123; + new_itimer.it_interval.tv_sec = 0; + new_itimer.it_interval.tv_usec = kIntervalUSec; + new_itimer.it_value = new_itimer.it_interval; + assert(sys_setitimer(ITIMER_REAL, &new_itimer, NULL) == 0); + + // Wait for alarm. + struct timespec timeout; + const unsigned long kNanoSecsPerSec = 1000000000; + const unsigned long kNanoSecsPerMicroSec = 1000; + + // Use a timeout 3 times of the timer interval. + unsigned long duration_ns = kIntervalUSec * kNanoSecsPerMicroSec * 3; + timeout.tv_sec = duration_ns / kNanoSecsPerSec ; + timeout.tv_nsec = duration_ns % kNanoSecsPerSec; + + int sig; + do { + sig = sys_sigtimedwait(&sigalarm_only, NULL, &timeout); + } while (sig == -1 && errno == EINTR); + assert(sig == kSignal); + + // Disable timer, check saving of old timer value. + struct kernel_itimerval empty_itimer = {}; + struct kernel_itimerval old_itimer; + empty_itimer.it_interval.tv_sec = 0; + empty_itimer.it_interval.tv_usec = 0; + empty_itimer.it_value = empty_itimer.it_interval; + + assert(sys_setitimer(ITIMER_REAL, &empty_itimer, &old_itimer) == 0); + assert(kernel_timeval_eq(&old_itimer.it_interval, &new_itimer.it_interval)); + + return 0; +} diff --git a/third_party/lss/tests/stat.c b/third_party/lss/tests/stat.c new file mode 100644 index 000000000000..48cac620cbcd --- /dev/null +++ b/third_party/lss/tests/stat.c @@ -0,0 +1,67 @@ +/* Copyright 2021, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test_skel.h" + +int main(int argc, char *argv[]) { + int exit_status = 0; + + // Get two unique paths to play with. + char foo[] = "tempfile.XXXXXX"; + int fd_foo = mkstemp(foo); + assert(fd_foo != -1); + + // Make sure it exists. + assert(access(foo, F_OK) == 0); + + // Make sure sys_stat() and a libc stat() implementation return the same + // information. + struct stat libc_stat; + assert(stat(foo, &libc_stat) == 0); + + struct kernel_stat raw_stat; + // We need to check our stat syscall for EOVERFLOW, as sometimes the integer + // types used in the stat structures are too small to fit the actual value. + // E.g. on some systems st_ino is 32-bit, but some filesystems have 64-bit + // inodes. + int rc = sys_stat(foo, &raw_stat); + if (rc < 0 && errno == EOVERFLOW) { + // Bail out since we had an overflow in the stat structure. + exit_status = SKIP_TEST_EXIT_STATUS; + goto cleanup; + } + assert(rc == 0); + + assert(libc_stat.st_ino == raw_stat.st_ino); + + +cleanup: + sys_unlink(foo); + return exit_status; +} diff --git a/third_party/lss/tests/test_skel.h b/third_party/lss/tests/test_skel.h index 13d8beab90f2..024906cc9b55 100644 --- a/third_party/lss/tests/test_skel.h +++ b/third_party/lss/tests/test_skel.h @@ -54,11 +54,14 @@ #include #include #include +#include #include #include "linux_syscall_support.h" +#define SKIP_TEST_EXIT_STATUS 77 + void assert_buffers_eq_len(const void *buf1, const void *buf2, size_t len) { const uint8_t *u8_1 = (const uint8_t *)buf1; const uint8_t *u8_2 = (const uint8_t *)buf2; @@ -70,3 +73,17 @@ void assert_buffers_eq_len(const void *buf1, const void *buf2, size_t len) { } } #define assert_buffers_eq(obj1, obj2) assert_buffers_eq_len(obj1, obj2, sizeof(*obj1)) + +// Returns true iff pointed timevals are equal. +static inline bool kernel_timeval_eq(const struct kernel_timeval* lhs, + const struct kernel_timeval* rhs) { + return (lhs->tv_sec == rhs->tv_sec) && (lhs->tv_usec == rhs->tv_usec); +} + +// Returns true iff pointed itimervals are equal. +static inline bool kernel_itimerval_eq(const struct kernel_itimerval* lhs, + const struct kernel_itimerval* rhs) { + return kernel_timeval_eq(&lhs->it_interval, &rhs->it_interval) && + kernel_timeval_eq(&lhs->it_value, &rhs->it_value); +} + diff --git a/third_party/ply/DIR_METADATA b/third_party/ply/DIR_METADATA new file mode 100644 index 000000000000..983ea0140f45 --- /dev/null +++ b/third_party/ply/DIR_METADATA @@ -0,0 +1,3 @@ +monorail { + component: "Tools" +} diff --git a/third_party/ply/METADATA b/third_party/ply/METADATA index 3ff16701952c..63d375c159bb 100644 --- a/third_party/ply/METADATA +++ b/third_party/ply/METADATA @@ -6,12 +6,12 @@ description: third_party { identifier { type: "ChromiumVersion" - value: "74.0.3729.169" # from https://chromereleases.googleblog.com/2019/05/stable-channel-update-for-desktop_21.html + value: "114.0.5735.358" # from https://chromereleases.googleblog.com/2024/03/long-term-support-channel-update-for_26.html } identifier { type: "Git" value: "https://chromium.googlesource.com/chromium/src.git" - version: "84108231f6e6e0772fb9a4643679ce76aa771e67" + version: "1759c6ae9316996b9f150c0ce9d0ca78a3d15c02" } identifier { type: "UpstreamSubdir" diff --git a/third_party/ply/OWNERS b/third_party/ply/OWNERS new file mode 100644 index 000000000000..93a641dd7a3e --- /dev/null +++ b/third_party/ply/OWNERS @@ -0,0 +1,3 @@ +rockot@google.com + +file://tools/idl_parser/OWNERS diff --git a/third_party/ply/README.chromium b/third_party/ply/README.chromium index 6466e5428a5e..45e10b87b6b6 100644 --- a/third_party/ply/README.chromium +++ b/third_party/ply/README.chromium @@ -1,21 +1,25 @@ Name: PLY (Python Lex-Yacc) -Current version: 3.4 -URL: http://www.dabeaz.com/ply/ply-3.4.tar.gz +Current version: 3.11 +URL: http://www.dabeaz.com/ply/ply-3.11.tar.gz License: BSD License File: LICENSE Security Critical: no -Version: 3.4 +Version: 3.11 -This directory contains a copy of these ply-3.4 components: +PLY is used by (at least) the Mojo python bindings, the PPAPI +IDL generator, and the Blink IDL generator. -README ply-3.4/README -Sources ply-3.4/ply/__init__.py - ply-3.4/ply/lex.py - ply-3.4/ply/yacc.py +This directory contains a copy of these ply-3.11 components: + +README ply-3.11/README.md +Sources ply-3.11/ply/__init__.py + ply-3.11/ply/lex.py + ply-3.11/ply/yacc.py The license is in LICENSE. -Modifications made with initial commit: +Modifications made: - Added the file README.chromium (this file) - Applies license.patch +- Added ply.gni to list sources diff --git a/third_party/ply/README b/third_party/ply/README.md similarity index 64% rename from third_party/ply/README rename to third_party/ply/README.md index f384d1a93853..05df32a5b9fc 100644 --- a/third_party/ply/README +++ b/third_party/ply/README.md @@ -1,6 +1,8 @@ -PLY (Python Lex-Yacc) Version 3.4 +# PLY (Python Lex-Yacc) Version 3.11 -Copyright (C) 2001-2011, +[![Build Status](https://travis-ci.org/dabeaz/ply.svg?branch=master)](https://travis-ci.org/dabeaz/ply) + +Copyright (C) 2001-2018 David M. Beazley (Dabeaz LLC) All rights reserved. @@ -96,7 +98,7 @@ A simple example is found at the end of this document Requirements ============ -PLY requires the use of Python 2.2 or greater. However, you should +PLY requires the use of Python 2.6 or greater. However, you should use the latest Python release if possible. It should work on just about any platform. PLY has been tested with both CPython and Jython. It also seems to work with IronPython. @@ -112,7 +114,11 @@ book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and Ullman. The topics found in "Lex & Yacc" by Levine, Mason, and Brown may also be useful. -A Google group for PLY can be found at +The GitHub page for PLY can be found at: + + https://github.com/dabeaz/ply + +An old and relatively inactive discussion group for PLY is found at: http://groups.google.com/group/ply-hack @@ -130,7 +136,7 @@ and testing a revised LALR(1) implementation for PLY-2.0. Special Note for PLY-3.0 ======================== PLY-3.0 the first PLY release to support Python 3. However, backwards -compatibility with Python 2.2 is still preserved. PLY provides dual +compatibility with Python 2.6 is still preserved. PLY provides dual Python 2/3 compatibility by restricting its implementation to a common subset of basic language features. You should not convert PLY using 2to3--it is not necessary and may in fact break the implementation. @@ -141,109 +147,109 @@ Example Here is a simple example showing a PLY implementation of a calculator with variables. -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. -# ----------------------------------------------------------------------------- - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - t.value = int(t.value) - return t - -# Ignored characters -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex() - -# Precedence rules for the arithmetic operators -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names (for storing variables) -names = { } - -def p_statement_assign(p): - 'statement : NAME EQUALS expression' - names[p[1]] = p[3] - -def p_statement_expr(p): - 'statement : expression' - print(p[1]) - -def p_expression_binop(p): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - -def p_expression_uminus(p): - 'expression : MINUS expression %prec UMINUS' - p[0] = -p[2] - -def p_expression_group(p): - 'expression : LPAREN expression RPAREN' - p[0] = p[2] - -def p_expression_number(p): - 'expression : NUMBER' - p[0] = p[1] - -def p_expression_name(p): - 'expression : NAME' - try: - p[0] = names[p[1]] - except LookupError: - print("Undefined name '%s'" % p[1]) - p[0] = 0 - -def p_error(p): - print("Syntax error at '%s'" % p.value) - -import ply.yacc as yacc -yacc.yacc() - -while 1: - try: - s = raw_input('calc > ') # use input() on Python 3 - except EOFError: - break - yacc.parse(s) + # ----------------------------------------------------------------------------- + # calc.py + # + # A simple calculator with variables. + # ----------------------------------------------------------------------------- + + tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + + # Tokens + + t_PLUS = r'\+' + t_MINUS = r'-' + t_TIMES = r'\*' + t_DIVIDE = r'/' + t_EQUALS = r'=' + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + + def t_NUMBER(t): + r'\d+' + t.value = int(t.value) + return t + + # Ignored characters + t_ignore = " \t" + + def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + + def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + + # Build the lexer + import ply.lex as lex + lex.lex() + + # Precedence rules for the arithmetic operators + precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + + # dictionary of names (for storing variables) + names = { } + + def p_statement_assign(p): + 'statement : NAME EQUALS expression' + names[p[1]] = p[3] + + def p_statement_expr(p): + 'statement : expression' + print(p[1]) + + def p_expression_binop(p): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if p[2] == '+' : p[0] = p[1] + p[3] + elif p[2] == '-': p[0] = p[1] - p[3] + elif p[2] == '*': p[0] = p[1] * p[3] + elif p[2] == '/': p[0] = p[1] / p[3] + + def p_expression_uminus(p): + 'expression : MINUS expression %prec UMINUS' + p[0] = -p[2] + + def p_expression_group(p): + 'expression : LPAREN expression RPAREN' + p[0] = p[2] + + def p_expression_number(p): + 'expression : NUMBER' + p[0] = p[1] + + def p_expression_name(p): + 'expression : NAME' + try: + p[0] = names[p[1]] + except LookupError: + print("Undefined name '%s'" % p[1]) + p[0] = 0 + + def p_error(p): + print("Syntax error at '%s'" % p.value) + + import ply.yacc as yacc + yacc.yacc() + + while True: + try: + s = raw_input('calc > ') # use input() on Python 3 + except EOFError: + break + yacc.parse(s) Bug Reports and Patches @@ -252,12 +258,10 @@ My goal with PLY is to simply have a decent lex/yacc implementation for Python. As a general rule, I don't spend huge amounts of time working on it unless I receive very specific bug reports and/or patches to fix problems. I also try to incorporate submitted feature -requests and enhancements into each new version. To contact me about -bugs and/or new features, please send email to dave@dabeaz.com. - -In addition there is a Google group for discussing PLY related issues at - - http://groups.google.com/group/ply-hack +requests and enhancements into each new version. Please visit the PLY +github page at https://github.com/dabeaz/ply to submit issues and pull +requests. To contact me about bugs and/or new features, please send +email to dave@dabeaz.com. -- Dave diff --git a/third_party/ply/__init__.py b/third_party/ply/__init__.py index f3da03eade07..e48880f50fbc 100644 --- a/third_party/ply/__init__.py +++ b/third_party/ply/__init__.py @@ -1,9 +1,9 @@ # PLY package # Author: David Beazley (dave@dabeaz.com) # ----------------------------------------------------------------------------- -# ply: yacc.py +# ply: __init__.py # -# Copyright (C) 2001-2011, +# Copyright (C) 2001-2018 # David M. Beazley (Dabeaz LLC) # All rights reserved. # @@ -33,4 +33,5 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- +__version__ = '3.11' __all__ = ['lex','yacc'] diff --git a/third_party/ply/lex.py b/third_party/ply/lex.py index bd32da932762..f95bcdbf1bb5 100644 --- a/third_party/ply/lex.py +++ b/third_party/ply/lex.py @@ -1,22 +1,22 @@ # ----------------------------------------------------------------------------- # ply: lex.py # -# Copyright (C) 2001-2011, +# Copyright (C) 2001-2018 # David M. Beazley (Dabeaz LLC) # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. +# and/or other materials provided with the distribution. # * Neither the name of the David Beazley or Dabeaz LLC may be used to # endorse or promote products derived from this software without -# specific prior written permission. +# specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT @@ -31,10 +31,15 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- -__version__ = "3.4" -__tabversion__ = "3.2" # Version of table file used +__version__ = '3.11' +__tabversion__ = '3.10' -import re, sys, types, copy, os +import re +import sys +import types +import copy +import os +import inspect # This tuple contains known string types try: @@ -44,59 +49,55 @@ # Python 3.0 StringTypes = (str, bytes) -# Extract the code attribute of a function. Different implementations -# are for Python 2/3 compatibility. - -if sys.version_info[0] < 3: - def func_code(f): - return f.func_code -else: - def func_code(f): - return f.__code__ - # This regular expression is used to match valid token names _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') # Exception thrown when invalid token encountered and no default error # handler is defined. - class LexError(Exception): - def __init__(self,message,s): - self.args = (message,) - self.text = s + def __init__(self, message, s): + self.args = (message,) + self.text = s + # Token class. This class is used to represent the tokens produced. class LexToken(object): def __str__(self): - return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) + return 'LexToken(%s,%r,%d,%d)' % (self.type, self.value, self.lineno, self.lexpos) + def __repr__(self): return str(self) -# This object is a stand-in for a logging object created by the -# logging module. + +# This object is a stand-in for a logging object created by the +# logging module. class PlyLogger(object): - def __init__(self,f): + def __init__(self, f): self.f = f - def critical(self,msg,*args,**kwargs): - self.f.write((msg % args) + "\n") - def warning(self,msg,*args,**kwargs): - self.f.write("WARNING: "+ (msg % args) + "\n") + def critical(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') + + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') - def error(self,msg,*args,**kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') info = critical debug = critical + # Null logger is used when no output is generated. Does nothing. class NullLogger(object): - def __getattribute__(self,name): + def __getattribute__(self, name): return self - def __call__(self,*args,**kwargs): + + def __call__(self, *args, **kwargs): return self + # ----------------------------------------------------------------------------- # === Lexing Engine === # @@ -114,31 +115,33 @@ def __call__(self,*args,**kwargs): class Lexer: def __init__(self): self.lexre = None # Master regular expression. This is a list of - # tuples (re,findex) where re is a compiled + # tuples (re, findex) where re is a compiled # regular expression and findex is a list # mapping regex group numbers to rules self.lexretext = None # Current regular expression strings self.lexstatere = {} # Dictionary mapping lexer states to master regexs self.lexstateretext = {} # Dictionary mapping lexer states to regex strings self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names - self.lexstate = "INITIAL" # Current lexer state + self.lexstate = 'INITIAL' # Current lexer state self.lexstatestack = [] # Stack of lexer states self.lexstateinfo = None # State information self.lexstateignore = {} # Dictionary of ignored characters for each state self.lexstateerrorf = {} # Dictionary of error functions for each state + self.lexstateeoff = {} # Dictionary of eof functions for each state self.lexreflags = 0 # Optional re compile flags self.lexdata = None # Actual input data (as a string) self.lexpos = 0 # Current position in input text self.lexlen = 0 # Length of the input text self.lexerrorf = None # Error rule (if any) + self.lexeoff = None # EOF rule (if any) self.lextokens = None # List of valid tokens - self.lexignore = "" # Ignored characters - self.lexliterals = "" # Literal characters that can be passed through + self.lexignore = '' # Ignored characters + self.lexliterals = '' # Literal characters that can be passed through self.lexmodule = None # Module self.lineno = 1 # Current line number - self.lexoptimize = 0 # Optimized mode + self.lexoptimize = False # Optimized mode - def clone(self,object=None): + def clone(self, object=None): c = copy.copy(self) # If the object parameter has been supplied, it means we are attaching the @@ -146,113 +149,110 @@ def clone(self,object=None): # the lexstatere and lexstateerrorf tables. if object: - newtab = { } + newtab = {} for key, ritem in self.lexstatere.items(): newre = [] for cre, findex in ritem: - newfindex = [] - for f in findex: - if not f or not f[0]: - newfindex.append(f) - continue - newfindex.append((getattr(object,f[0].__name__),f[1])) - newre.append((cre,newfindex)) + newfindex = [] + for f in findex: + if not f or not f[0]: + newfindex.append(f) + continue + newfindex.append((getattr(object, f[0].__name__), f[1])) + newre.append((cre, newfindex)) newtab[key] = newre c.lexstatere = newtab - c.lexstateerrorf = { } + c.lexstateerrorf = {} for key, ef in self.lexstateerrorf.items(): - c.lexstateerrorf[key] = getattr(object,ef.__name__) + c.lexstateerrorf[key] = getattr(object, ef.__name__) c.lexmodule = object return c # ------------------------------------------------------------ # writetab() - Write lexer information to a table file # ------------------------------------------------------------ - def writetab(self,tabfile,outputdir=""): - if isinstance(tabfile,types.ModuleType): - return - basetabfilename = tabfile.split(".")[-1] - filename = os.path.join(outputdir,basetabfilename)+".py" - tf = open(filename,"w") - tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) - tf.write("_tabversion = %s\n" % repr(__version__)) - tf.write("_lextokens = %s\n" % repr(self.lextokens)) - tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) - tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) - tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) - - tabre = { } - # Collect all functions in the initial state - initial = self.lexstatere["INITIAL"] - initialfuncs = [] - for part in initial: - for f in part[1]: - if f and f[0]: - initialfuncs.append(f) - - for key, lre in self.lexstatere.items(): - titem = [] - for i in range(len(lre)): - titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i]))) - tabre[key] = titem - - tf.write("_lexstatere = %s\n" % repr(tabre)) - tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) - - taberr = { } - for key, ef in self.lexstateerrorf.items(): - if ef: - taberr[key] = ef.__name__ - else: - taberr[key] = None - tf.write("_lexstateerrorf = %s\n" % repr(taberr)) - tf.close() + def writetab(self, lextab, outputdir=''): + if isinstance(lextab, types.ModuleType): + raise IOError("Won't overwrite existing lextab module") + basetabmodule = lextab.split('.')[-1] + filename = os.path.join(outputdir, basetabmodule) + '.py' + with open(filename, 'w') as tf: + tf.write('# %s.py. This file automatically created by PLY (version %s). Don\'t edit!\n' % (basetabmodule, __version__)) + tf.write('_tabversion = %s\n' % repr(__tabversion__)) + tf.write('_lextokens = set(%s)\n' % repr(tuple(sorted(self.lextokens)))) + tf.write('_lexreflags = %s\n' % repr(int(self.lexreflags))) + tf.write('_lexliterals = %s\n' % repr(self.lexliterals)) + tf.write('_lexstateinfo = %s\n' % repr(self.lexstateinfo)) + + # Rewrite the lexstatere table, replacing function objects with function names + tabre = {} + for statename, lre in self.lexstatere.items(): + titem = [] + for (pat, func), retext, renames in zip(lre, self.lexstateretext[statename], self.lexstaterenames[statename]): + titem.append((retext, _funcs_to_names(func, renames))) + tabre[statename] = titem + + tf.write('_lexstatere = %s\n' % repr(tabre)) + tf.write('_lexstateignore = %s\n' % repr(self.lexstateignore)) + + taberr = {} + for statename, ef in self.lexstateerrorf.items(): + taberr[statename] = ef.__name__ if ef else None + tf.write('_lexstateerrorf = %s\n' % repr(taberr)) + + tabeof = {} + for statename, ef in self.lexstateeoff.items(): + tabeof[statename] = ef.__name__ if ef else None + tf.write('_lexstateeoff = %s\n' % repr(tabeof)) # ------------------------------------------------------------ # readtab() - Read lexer information from a tab file # ------------------------------------------------------------ - def readtab(self,tabfile,fdict): - if isinstance(tabfile,types.ModuleType): + def readtab(self, tabfile, fdict): + if isinstance(tabfile, types.ModuleType): lextab = tabfile else: - if sys.version_info[0] < 3: - exec("import %s as lextab" % tabfile) - else: - env = { } - exec("import %s as lextab" % tabfile, env,env) - lextab = env['lextab'] + exec('import %s' % tabfile) + lextab = sys.modules[tabfile] - if getattr(lextab,"_tabversion","0.0") != __version__: - raise ImportError("Inconsistent PLY version") + if getattr(lextab, '_tabversion', '0.0') != __tabversion__: + raise ImportError('Inconsistent PLY version') self.lextokens = lextab._lextokens self.lexreflags = lextab._lexreflags self.lexliterals = lextab._lexliterals + self.lextokens_all = self.lextokens | set(self.lexliterals) self.lexstateinfo = lextab._lexstateinfo self.lexstateignore = lextab._lexstateignore - self.lexstatere = { } - self.lexstateretext = { } - for key,lre in lextab._lexstatere.items(): - titem = [] - txtitem = [] - for i in range(len(lre)): - titem.append((re.compile(lre[i][0],lextab._lexreflags | re.VERBOSE),_names_to_funcs(lre[i][1],fdict))) - txtitem.append(lre[i][0]) - self.lexstatere[key] = titem - self.lexstateretext[key] = txtitem - self.lexstateerrorf = { } - for key,ef in lextab._lexstateerrorf.items(): - self.lexstateerrorf[key] = fdict[ef] + self.lexstatere = {} + self.lexstateretext = {} + for statename, lre in lextab._lexstatere.items(): + titem = [] + txtitem = [] + for pat, func_name in lre: + titem.append((re.compile(pat, lextab._lexreflags), _names_to_funcs(func_name, fdict))) + + self.lexstatere[statename] = titem + self.lexstateretext[statename] = txtitem + + self.lexstateerrorf = {} + for statename, ef in lextab._lexstateerrorf.items(): + self.lexstateerrorf[statename] = fdict[ef] + + self.lexstateeoff = {} + for statename, ef in lextab._lexstateeoff.items(): + self.lexstateeoff[statename] = fdict[ef] + self.begin('INITIAL') # ------------------------------------------------------------ # input() - Push a new string into the lexer # ------------------------------------------------------------ - def input(self,s): + def input(self, s): # Pull off the first character to see if s looks like a string c = s[:1] - if not isinstance(c,StringTypes): - raise ValueError("Expected a string") + if not isinstance(c, StringTypes): + raise ValueError('Expected a string') self.lexdata = s self.lexpos = 0 self.lexlen = len(s) @@ -260,19 +260,20 @@ def input(self,s): # ------------------------------------------------------------ # begin() - Changes the lexing state # ------------------------------------------------------------ - def begin(self,state): - if not state in self.lexstatere: - raise ValueError("Undefined state") + def begin(self, state): + if state not in self.lexstatere: + raise ValueError('Undefined state') self.lexre = self.lexstatere[state] self.lexretext = self.lexstateretext[state] - self.lexignore = self.lexstateignore.get(state,"") - self.lexerrorf = self.lexstateerrorf.get(state,None) + self.lexignore = self.lexstateignore.get(state, '') + self.lexerrorf = self.lexstateerrorf.get(state, None) + self.lexeoff = self.lexstateeoff.get(state, None) self.lexstate = state # ------------------------------------------------------------ # push_state() - Changes the lexing state and saves old on stack # ------------------------------------------------------------ - def push_state(self,state): + def push_state(self, state): self.lexstatestack.append(self.lexstate) self.begin(state) @@ -291,7 +292,7 @@ def current_state(self): # ------------------------------------------------------------ # skip() - Skip ahead n characters # ------------------------------------------------------------ - def skip(self,n): + def skip(self, n): self.lexpos += n # ------------------------------------------------------------ @@ -315,9 +316,10 @@ def token(self): continue # Look for a regular expression match - for lexre,lexindexfunc in self.lexre: - m = lexre.match(lexdata,lexpos) - if not m: continue + for lexre, lexindexfunc in self.lexre: + m = lexre.match(lexdata, lexpos) + if not m: + continue # Create a token for return tok = LexToken() @@ -326,16 +328,16 @@ def token(self): tok.lexpos = lexpos i = m.lastindex - func,tok.type = lexindexfunc[i] + func, tok.type = lexindexfunc[i] if not func: - # If no token type was set, it's an ignored token - if tok.type: - self.lexpos = m.end() - return tok - else: - lexpos = m.end() - break + # If no token type was set, it's an ignored token + if tok.type: + self.lexpos = m.end() + return tok + else: + lexpos = m.end() + break lexpos = m.end() @@ -355,10 +357,10 @@ def token(self): # Verify type of the token. If not in the token map, raise an error if not self.lexoptimize: - if not newtok.type in self.lextokens: + if newtok.type not in self.lextokens_all: raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( - func_code(func).co_filename, func_code(func).co_firstlineno, - func.__name__, newtok.type),lexdata[lexpos:]) + func.__code__.co_filename, func.__code__.co_firstlineno, + func.__name__, newtok.type), lexdata[lexpos:]) return newtok else: @@ -377,7 +379,7 @@ def token(self): tok = LexToken() tok.value = self.lexdata[lexpos:] tok.lineno = self.lineno - tok.type = "error" + tok.type = 'error' tok.lexer = self tok.lexpos = lexpos self.lexpos = lexpos @@ -386,15 +388,27 @@ def token(self): # Error method didn't change text position at all. This is an error. raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) lexpos = self.lexpos - if not newtok: continue + if not newtok: + continue return newtok self.lexpos = lexpos - raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) + raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos], lexpos), lexdata[lexpos:]) + + if self.lexeoff: + tok = LexToken() + tok.type = 'eof' + tok.value = '' + tok.lineno = self.lineno + tok.lexpos = lexpos + tok.lexer = self + self.lexpos = lexpos + newtok = self.lexeoff(tok) + return newtok self.lexpos = lexpos + 1 if self.lexdata is None: - raise RuntimeError("No input string given with input()") + raise RuntimeError('No input string given with input()') return None # Iterator interface @@ -416,6 +430,15 @@ def next(self): # and build a Lexer object from it. # ----------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- +# _get_regex(func) +# +# Returns the regular expression assigned to a function either as a doc string +# or as a .regex attribute attached by the @TOKEN decorator. +# ----------------------------------------------------------------------------- +def _get_regex(func): + return getattr(func, 'regex', func.__doc__) + # ----------------------------------------------------------------------------- # get_caller_module_dict() # @@ -423,21 +446,12 @@ def next(self): # a caller further down the call stack. This is used to get the environment # associated with the yacc() call if none was provided. # ----------------------------------------------------------------------------- - def get_caller_module_dict(levels): - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - while levels > 0: - f = f.f_back - levels -= 1 - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - - return ldict + f = sys._getframe(levels) + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + return ldict # ----------------------------------------------------------------------------- # _funcs_to_names() @@ -445,14 +459,13 @@ def get_caller_module_dict(levels): # Given a list of regular expression functions, this converts it to a list # suitable for output to a table file # ----------------------------------------------------------------------------- - -def _funcs_to_names(funclist,namelist): +def _funcs_to_names(funclist, namelist): result = [] - for f,name in zip(funclist,namelist): - if f and f[0]: - result.append((name, f[1])) - else: - result.append(f) + for f, name in zip(funclist, namelist): + if f and f[0]: + result.append((name, f[1])) + else: + result.append(f) return result # ----------------------------------------------------------------------------- @@ -461,15 +474,14 @@ def _funcs_to_names(funclist,namelist): # Given a list of regular expression function names, this converts it back to # functions. # ----------------------------------------------------------------------------- - -def _names_to_funcs(namelist,fdict): - result = [] - for n in namelist: - if n and n[0]: - result.append((fdict[n[0]],n[1])) - else: - result.append(n) - return result +def _names_to_funcs(namelist, fdict): + result = [] + for n in namelist: + if n and n[0]: + result.append((fdict[n[0]], n[1])) + else: + result.append(n) + return result # ----------------------------------------------------------------------------- # _form_master_re() @@ -478,36 +490,37 @@ def _names_to_funcs(namelist,fdict): # form the master regular expression. Given limitations in the Python re # module, it may be necessary to break the master regex into separate expressions. # ----------------------------------------------------------------------------- - -def _form_master_re(relist,reflags,ldict,toknames): - if not relist: return [] - regex = "|".join(relist) +def _form_master_re(relist, reflags, ldict, toknames): + if not relist: + return [] + regex = '|'.join(relist) try: - lexre = re.compile(regex,re.VERBOSE | reflags) + lexre = re.compile(regex, reflags) # Build the index to function map for the matching engine - lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) + lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) lexindexnames = lexindexfunc[:] - for f,i in lexre.groupindex.items(): - handle = ldict.get(f,None) + for f, i in lexre.groupindex.items(): + handle = ldict.get(f, None) if type(handle) in (types.FunctionType, types.MethodType): - lexindexfunc[i] = (handle,toknames[f]) + lexindexfunc[i] = (handle, toknames[f]) lexindexnames[i] = f elif handle is not None: lexindexnames[i] = f - if f.find("ignore_") > 0: - lexindexfunc[i] = (None,None) + if f.find('ignore_') > 0: + lexindexfunc[i] = (None, None) else: lexindexfunc[i] = (None, toknames[f]) - - return [(lexre,lexindexfunc)],[regex],[lexindexnames] + + return [(lexre, lexindexfunc)], [regex], [lexindexnames] except Exception: m = int(len(relist)/2) - if m == 0: m = 1 - llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames) - rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames) - return llist+rlist, lre+rre, lnames+rnames + if m == 0: + m = 1 + llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames) + rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) + return (llist+rlist), (lre+rre), (lnames+rnames) # ----------------------------------------------------------------------------- # def _statetoken(s,names) @@ -517,22 +530,22 @@ def _form_master_re(relist,reflags,ldict,toknames): # is a tuple of state names and tokenname is the name of the token. For example, # calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') # ----------------------------------------------------------------------------- +def _statetoken(s, names): + parts = s.split('_') + for i, part in enumerate(parts[1:], 1): + if part not in names and part != 'ANY': + break -def _statetoken(s,names): - nonstate = 1 - parts = s.split("_") - for i in range(1,len(parts)): - if not parts[i] in names and parts[i] != 'ANY': break if i > 1: - states = tuple(parts[1:i]) + states = tuple(parts[1:i]) else: - states = ('INITIAL',) + states = ('INITIAL',) if 'ANY' in states: - states = tuple(names) + states = tuple(names) - tokenname = "_".join(parts[i:]) - return (states,tokenname) + tokenname = '_'.join(parts[i:]) + return (states, tokenname) # ----------------------------------------------------------------------------- @@ -542,19 +555,15 @@ def _statetoken(s,names): # user's input file. # ----------------------------------------------------------------------------- class LexerReflect(object): - def __init__(self,ldict,log=None,reflags=0): + def __init__(self, ldict, log=None, reflags=0): self.ldict = ldict self.error_func = None self.tokens = [] self.reflags = reflags - self.stateinfo = { 'INITIAL' : 'inclusive'} - self.files = {} - self.error = 0 - - if log is None: - self.log = PlyLogger(sys.stderr) - else: - self.log = log + self.stateinfo = {'INITIAL': 'inclusive'} + self.modules = set() + self.error = False + self.log = PlyLogger(sys.stderr) if log is None else log # Get all of the basic information def get_all(self): @@ -562,7 +571,7 @@ def get_all(self): self.get_literals() self.get_states() self.get_rules() - + # Validate all of the information def validate_all(self): self.validate_tokens() @@ -572,20 +581,20 @@ def validate_all(self): # Get the tokens map def get_tokens(self): - tokens = self.ldict.get("tokens",None) + tokens = self.ldict.get('tokens', None) if not tokens: - self.log.error("No token list is defined") - self.error = 1 + self.log.error('No token list is defined') + self.error = True return - if not isinstance(tokens,(list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = 1 + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True return - + if not tokens: - self.log.error("tokens is empty") - self.error = 1 + self.log.error('tokens is empty') + self.error = True return self.tokens = tokens @@ -595,280 +604,274 @@ def validate_tokens(self): terminals = {} for n in self.tokens: if not _is_identifier.match(n): - self.log.error("Bad token name '%s'",n) - self.error = 1 + self.log.error("Bad token name '%s'", n) + self.error = True if n in terminals: self.log.warning("Token '%s' multiply defined", n) terminals[n] = 1 # Get the literals specifier def get_literals(self): - self.literals = self.ldict.get("literals","") + self.literals = self.ldict.get('literals', '') + if not self.literals: + self.literals = '' # Validate literals def validate_literals(self): try: for c in self.literals: - if not isinstance(c,StringTypes) or len(c) > 1: - self.log.error("Invalid literal %s. Must be a single character", repr(c)) - self.error = 1 - continue + if not isinstance(c, StringTypes) or len(c) > 1: + self.log.error('Invalid literal %s. Must be a single character', repr(c)) + self.error = True except TypeError: - self.log.error("Invalid literals specification. literals must be a sequence of characters") - self.error = 1 + self.log.error('Invalid literals specification. literals must be a sequence of characters') + self.error = True def get_states(self): - self.states = self.ldict.get("states",None) + self.states = self.ldict.get('states', None) # Build statemap if self.states: - if not isinstance(self.states,(tuple,list)): - self.log.error("states must be defined as a tuple or list") - self.error = 1 - else: - for s in self.states: - if not isinstance(s,tuple) or len(s) != 2: - self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s)) - self.error = 1 - continue - name, statetype = s - if not isinstance(name,StringTypes): - self.log.error("State name %s must be a string", repr(name)) - self.error = 1 - continue - if not (statetype == 'inclusive' or statetype == 'exclusive'): - self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name) - self.error = 1 - continue - if name in self.stateinfo: - self.log.error("State '%s' already defined",name) - self.error = 1 - continue - self.stateinfo[name] = statetype + if not isinstance(self.states, (tuple, list)): + self.log.error('states must be defined as a tuple or list') + self.error = True + else: + for s in self.states: + if not isinstance(s, tuple) or len(s) != 2: + self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')", repr(s)) + self.error = True + continue + name, statetype = s + if not isinstance(name, StringTypes): + self.log.error('State name %s must be a string', repr(name)) + self.error = True + continue + if not (statetype == 'inclusive' or statetype == 'exclusive'): + self.log.error("State type for state %s must be 'inclusive' or 'exclusive'", name) + self.error = True + continue + if name in self.stateinfo: + self.log.error("State '%s' already defined", name) + self.error = True + continue + self.stateinfo[name] = statetype # Get all of the symbols with a t_ prefix and sort them into various # categories (functions, strings, error functions, and ignore characters) def get_rules(self): - tsymbols = [f for f in self.ldict if f[:2] == 't_' ] + tsymbols = [f for f in self.ldict if f[:2] == 't_'] # Now build up a list of functions and a list of strings - - self.toknames = { } # Mapping of symbols to token names - self.funcsym = { } # Symbols defined as functions - self.strsym = { } # Symbols defined as strings - self.ignore = { } # Ignore strings by state - self.errorf = { } # Error functions by state + self.toknames = {} # Mapping of symbols to token names + self.funcsym = {} # Symbols defined as functions + self.strsym = {} # Symbols defined as strings + self.ignore = {} # Ignore strings by state + self.errorf = {} # Error functions by state + self.eoff = {} # EOF functions by state for s in self.stateinfo: - self.funcsym[s] = [] - self.strsym[s] = [] + self.funcsym[s] = [] + self.strsym[s] = [] if len(tsymbols) == 0: - self.log.error("No rules of the form t_rulename are defined") - self.error = 1 + self.log.error('No rules of the form t_rulename are defined') + self.error = True return for f in tsymbols: t = self.ldict[f] - states, tokname = _statetoken(f,self.stateinfo) + states, tokname = _statetoken(f, self.stateinfo) self.toknames[f] = tokname - if hasattr(t,"__call__"): + if hasattr(t, '__call__'): if tokname == 'error': for s in states: self.errorf[s] = t + elif tokname == 'eof': + for s in states: + self.eoff[s] = t elif tokname == 'ignore': - line = func_code(t).co_firstlineno - file = func_code(t).co_filename - self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__) - self.error = 1 + line = t.__code__.co_firstlineno + file = t.__code__.co_filename + self.log.error("%s:%d: Rule '%s' must be defined as a string", file, line, t.__name__) + self.error = True else: - for s in states: - self.funcsym[s].append((f,t)) + for s in states: + self.funcsym[s].append((f, t)) elif isinstance(t, StringTypes): if tokname == 'ignore': for s in states: self.ignore[s] = t - if "\\" in t: - self.log.warning("%s contains a literal backslash '\\'",f) + if '\\' in t: + self.log.warning("%s contains a literal backslash '\\'", f) elif tokname == 'error': self.log.error("Rule '%s' must be defined as a function", f) - self.error = 1 + self.error = True else: - for s in states: - self.strsym[s].append((f,t)) + for s in states: + self.strsym[s].append((f, t)) else: - self.log.error("%s not defined as a function or string", f) - self.error = 1 + self.log.error('%s not defined as a function or string', f) + self.error = True # Sort the functions by line number for f in self.funcsym.values(): - if sys.version_info[0] < 3: - f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno)) - else: - # Python 3.0 - f.sort(key=lambda x: func_code(x[1]).co_firstlineno) + f.sort(key=lambda x: x[1].__code__.co_firstlineno) # Sort the strings by regular expression length for s in self.strsym.values(): - if sys.version_info[0] < 3: - s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) - else: - # Python 3.0 - s.sort(key=lambda x: len(x[1]),reverse=True) + s.sort(key=lambda x: len(x[1]), reverse=True) - # Validate all of the t_rules collected + # Validate all of the t_rules collected def validate_rules(self): for state in self.stateinfo: # Validate all rules defined by functions - - for fname, f in self.funcsym[state]: - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - self.files[file] = 1 + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) tokname = self.toknames[fname] if isinstance(f, types.MethodType): reqargs = 2 else: reqargs = 1 - nargs = func_code(f).co_argcount + nargs = f.__code__.co_argcount if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) + self.error = True continue if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) + self.error = True continue - if not f.__doc__: - self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__) - self.error = 1 + if not _get_regex(f): + self.log.error("%s:%d: No regular expression defined for rule '%s'", file, line, f.__name__) + self.error = True continue try: - c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags) - if c.match(""): - self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__) - self.error = 1 - except re.error: - _etype, e, _etrace = sys.exc_info() - self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e) - if '#' in f.__doc__: - self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__) - self.error = 1 + c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) + if c.match(''): + self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file, line, f.__name__) + self.error = True + except re.error as e: + self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) + if '#' in _get_regex(f): + self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'", file, line, f.__name__) + self.error = True # Validate all rules defined by strings - for name,r in self.strsym[state]: + for name, r in self.strsym[state]: tokname = self.toknames[name] if tokname == 'error': self.log.error("Rule '%s' must be defined as a function", name) - self.error = 1 + self.error = True continue - if not tokname in self.tokens and tokname.find("ignore_") < 0: - self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname) - self.error = 1 + if tokname not in self.tokens and tokname.find('ignore_') < 0: + self.log.error("Rule '%s' defined for an unspecified token %s", name, tokname) + self.error = True continue try: - c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags) - if (c.match("")): - self.log.error("Regular expression for rule '%s' matches empty string",name) - self.error = 1 - except re.error: - _etype, e, _etrace = sys.exc_info() - self.log.error("Invalid regular expression for rule '%s'. %s",name,e) + c = re.compile('(?P<%s>%s)' % (name, r), self.reflags) + if (c.match('')): + self.log.error("Regular expression for rule '%s' matches empty string", name) + self.error = True + except re.error as e: + self.log.error("Invalid regular expression for rule '%s'. %s", name, e) if '#' in r: - self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name) - self.error = 1 + self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'", name) + self.error = True if not self.funcsym[state] and not self.strsym[state]: - self.log.error("No rules defined for state '%s'",state) - self.error = 1 + self.log.error("No rules defined for state '%s'", state) + self.error = True # Validate the error function - efunc = self.errorf.get(state,None) + efunc = self.errorf.get(state, None) if efunc: f = efunc - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - self.files[file] = 1 + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) if isinstance(f, types.MethodType): reqargs = 2 else: reqargs = 1 - nargs = func_code(f).co_argcount + nargs = f.__code__.co_argcount if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) + self.error = True if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) - self.error = 1 - - for f in self.files: - self.validate_file(f) + self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) + self.error = True + for module in self.modules: + self.validate_module(module) # ----------------------------------------------------------------------------- - # validate_file() + # validate_module() # # This checks to see if there are duplicated t_rulename() functions or strings # in the parser input file. This is done using a simple regular expression - # match on each line in the given file. + # match on each line in the source code of the given module. # ----------------------------------------------------------------------------- - def validate_file(self,filename): - import os.path - base,ext = os.path.splitext(filename) - if ext != '.py': return # No idea what the file is. Return OK - + def validate_module(self, module): try: - f = open(filename) - lines = f.readlines() - f.close() + lines, linen = inspect.getsourcelines(module) except IOError: - return # Couldn't find the file. Don't worry about it + return fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') - counthash = { } - linen = 1 - for l in lines: - m = fre.match(l) + counthash = {} + linen += 1 + for line in lines: + m = fre.match(line) if not m: - m = sre.match(l) + m = sre.match(line) if m: name = m.group(1) prev = counthash.get(name) if not prev: counthash[name] = linen else: - self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev) - self.error = 1 + filename = inspect.getsourcefile(module) + self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev) + self.error = True linen += 1 - + # ----------------------------------------------------------------------------- # lex(module) # # Build all of the regular expression rules from definitions in the supplied module # ----------------------------------------------------------------------------- -def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None): +def lex(module=None, object=None, debug=False, optimize=False, lextab='lextab', + reflags=int(re.VERBOSE), nowarn=False, outputdir=None, debuglog=None, errorlog=None): + + if lextab is None: + lextab = 'lextab' + global lexer + ldict = None - stateinfo = { 'INITIAL' : 'inclusive'} + stateinfo = {'INITIAL': 'inclusive'} lexobj = Lexer() lexobj.lexoptimize = optimize - global token,input + global token, input if errorlog is None: errorlog = PlyLogger(sys.stderr) @@ -878,16 +881,28 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now debuglog = PlyLogger(sys.stderr) # Get the module dictionary used for the lexer - if object: module = object + if object: + module = object + # Get the module dictionary used for the parser if module: - _items = [(k,getattr(module,k)) for k in dir(module)] + _items = [(k, getattr(module, k)) for k in dir(module)] ldict = dict(_items) + # If no __file__ attribute is available, try to obtain it from the __module__ instead + if '__file__' not in ldict: + ldict['__file__'] = sys.modules[ldict['__module__']].__file__ else: ldict = get_caller_module_dict(2) + # Determine if the module is package of a package or not. + # If so, fix the tabmodule setting so that tables load correctly + pkg = ldict.get('__package__') + if pkg and isinstance(lextab, str): + if '.' not in lextab: + lextab = pkg + '.' + lextab + # Collect parser information from the dictionary - linfo = LexerReflect(ldict,log=errorlog,reflags=reflags) + linfo = LexerReflect(ldict, log=errorlog, reflags=reflags) linfo.get_all() if not optimize: if linfo.validate_all(): @@ -895,7 +910,7 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now if optimize and lextab: try: - lexobj.readtab(lextab,ldict) + lexobj.readtab(lextab, ldict) token = lexobj.token input = lexobj.input lexer = lexobj @@ -906,93 +921,97 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now # Dump some basic debugging information if debug: - debuglog.info("lex: tokens = %r", linfo.tokens) - debuglog.info("lex: literals = %r", linfo.literals) - debuglog.info("lex: states = %r", linfo.stateinfo) + debuglog.info('lex: tokens = %r', linfo.tokens) + debuglog.info('lex: literals = %r', linfo.literals) + debuglog.info('lex: states = %r', linfo.stateinfo) # Build a dictionary of valid token names - lexobj.lextokens = { } + lexobj.lextokens = set() for n in linfo.tokens: - lexobj.lextokens[n] = 1 + lexobj.lextokens.add(n) # Get literals specification - if isinstance(linfo.literals,(list,tuple)): + if isinstance(linfo.literals, (list, tuple)): lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) else: lexobj.lexliterals = linfo.literals + lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals) + # Get the stateinfo dictionary stateinfo = linfo.stateinfo - regexs = { } + regexs = {} # Build the master regular expressions for state in stateinfo: regex_list = [] # Add rules defined by functions first for fname, f in linfo.funcsym[state]: - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - regex_list.append("(?P<%s>%s)" % (fname,f.__doc__)) + regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state) + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state) # Now add all of the simple rules - for name,r in linfo.strsym[state]: - regex_list.append("(?P<%s>%s)" % (name,r)) + for name, r in linfo.strsym[state]: + regex_list.append('(?P<%s>%s)' % (name, r)) if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state) + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) regexs[state] = regex_list # Build the master regular expressions if debug: - debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====") + debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====') for state in regexs: - lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames) + lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames) lexobj.lexstatere[state] = lexre lexobj.lexstateretext[state] = re_text lexobj.lexstaterenames[state] = re_names if debug: - for i in range(len(re_text)): - debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i]) + for i, text in enumerate(re_text): + debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) # For inclusive states, we need to add the regular expressions from the INITIAL state - for state,stype in stateinfo.items(): - if state != "INITIAL" and stype == 'inclusive': - lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) - lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) - lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) + for state, stype in stateinfo.items(): + if state != 'INITIAL' and stype == 'inclusive': + lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) + lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) + lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) lexobj.lexstateinfo = stateinfo - lexobj.lexre = lexobj.lexstatere["INITIAL"] - lexobj.lexretext = lexobj.lexstateretext["INITIAL"] + lexobj.lexre = lexobj.lexstatere['INITIAL'] + lexobj.lexretext = lexobj.lexstateretext['INITIAL'] lexobj.lexreflags = reflags # Set up ignore variables lexobj.lexstateignore = linfo.ignore - lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") + lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '') # Set up error functions lexobj.lexstateerrorf = linfo.errorf - lexobj.lexerrorf = linfo.errorf.get("INITIAL",None) + lexobj.lexerrorf = linfo.errorf.get('INITIAL', None) if not lexobj.lexerrorf: - errorlog.warning("No t_error rule is defined") + errorlog.warning('No t_error rule is defined') + + # Set up eof functions + lexobj.lexstateeoff = linfo.eoff + lexobj.lexeoff = linfo.eoff.get('INITIAL', None) # Check state information for ignore and error rules - for s,stype in stateinfo.items(): + for s, stype in stateinfo.items(): if stype == 'exclusive': - if not s in linfo.errorf: - errorlog.warning("No error rule is defined for exclusive state '%s'", s) - if not s in linfo.ignore and lexobj.lexignore: - errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) + if s not in linfo.errorf: + errorlog.warning("No error rule is defined for exclusive state '%s'", s) + if s not in linfo.ignore and lexobj.lexignore: + errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) elif stype == 'inclusive': - if not s in linfo.errorf: - linfo.errorf[s] = linfo.errorf.get("INITIAL",None) - if not s in linfo.ignore: - linfo.ignore[s] = linfo.ignore.get("INITIAL","") + if s not in linfo.errorf: + linfo.errorf[s] = linfo.errorf.get('INITIAL', None) + if s not in linfo.ignore: + linfo.ignore[s] = linfo.ignore.get('INITIAL', '') # Create global versions of the token() and input() functions token = lexobj.token @@ -1001,7 +1020,28 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now # If in optimize mode, we write the lextab if lextab and optimize: - lexobj.writetab(lextab,outputdir) + if outputdir is None: + # If no output directory is set, the location of the output files + # is determined according to the following rules: + # - If lextab specifies a package, files go into that package directory + # - Otherwise, files go in the same directory as the specifying module + if isinstance(lextab, types.ModuleType): + srcfile = lextab.__file__ + else: + if '.' not in lextab: + srcfile = ldict['__file__'] + else: + parts = lextab.split('.') + pkgname = '.'.join(parts[:-1]) + exec('import %s' % pkgname) + srcfile = getattr(sys.modules[pkgname], '__file__', '') + outputdir = os.path.dirname(srcfile) + try: + lexobj.writetab(lextab, outputdir) + if lextab in sys.modules: + del sys.modules[lextab] + except IOError as e: + errorlog.warning("Couldn't write lextab module %r. %s" % (lextab, e)) return lexobj @@ -1011,7 +1051,7 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now # This runs the lexer as a main program # ----------------------------------------------------------------------------- -def runmain(lexer=None,data=None): +def runmain(lexer=None, data=None): if not data: try: filename = sys.argv[1] @@ -1019,7 +1059,7 @@ def runmain(lexer=None,data=None): data = f.read() f.close() except IndexError: - sys.stdout.write("Reading from standard input (type EOF to end):\n") + sys.stdout.write('Reading from standard input (type EOF to end):\n') data = sys.stdin.read() if lexer: @@ -1032,10 +1072,11 @@ def runmain(lexer=None,data=None): else: _token = token - while 1: + while True: tok = _token() - if not tok: break - sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos)) + if not tok: + break + sys.stdout.write('(%s,%r,%d,%d)\n' % (tok.type, tok.value, tok.lineno, tok.lexpos)) # ----------------------------------------------------------------------------- # @TOKEN(regex) @@ -1045,14 +1086,13 @@ def runmain(lexer=None,data=None): # ----------------------------------------------------------------------------- def TOKEN(r): - def set_doc(f): - if hasattr(r,"__call__"): - f.__doc__ = r.__doc__ + def set_regex(f): + if hasattr(r, '__call__'): + f.regex = _get_regex(r) else: - f.__doc__ = r + f.regex = r return f - return set_doc + return set_regex # Alternative spelling of the TOKEN decorator Token = TOKEN - diff --git a/third_party/ply/license.patch b/third_party/ply/license.patch index 7b2621fc46b0..b6cdff196c4b 100644 --- a/third_party/ply/license.patch +++ b/third_party/ply/license.patch @@ -6,9 +6,9 @@ index 853a985..f3da03e 100644 # PLY package # Author: David Beazley (dave@dabeaz.com) +# ----------------------------------------------------------------------------- -+# ply: yacc.py ++# ply: __init__.py +# -+# Copyright (C) 2001-2011, ++# Copyright (C) 2001-2018 +# David M. Beazley (Dabeaz LLC) +# All rights reserved. +# @@ -38,4 +38,5 @@ index 853a985..f3da03e 100644 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- + __version__ = '3.11' __all__ = ['lex','yacc'] diff --git a/third_party/ply/ply.gni b/third_party/ply/ply.gni new file mode 100644 index 000000000000..6a2db436f125 --- /dev/null +++ b/third_party/ply/ply.gni @@ -0,0 +1,5 @@ +ply_sources = [ + "//third_party/ply/__init__.py", + "//third_party/ply/lex.py", + "//third_party/ply/yacc.py", +] diff --git a/third_party/ply/yacc.py b/third_party/ply/yacc.py index f70439ea5e1c..88188a1e8ead 100644 --- a/third_party/ply/yacc.py +++ b/third_party/ply/yacc.py @@ -1,22 +1,22 @@ # ----------------------------------------------------------------------------- # ply: yacc.py # -# Copyright (C) 2001-2011, +# Copyright (C) 2001-2018 # David M. Beazley (Dabeaz LLC) # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. +# and/or other materials provided with the distribution. # * Neither the name of the David Beazley or Dabeaz LLC may be used to # endorse or promote products derived from this software without -# specific prior written permission. +# specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT @@ -32,7 +32,7 @@ # ----------------------------------------------------------------------------- # # This implements an LR parser that is constructed from grammar rules defined -# as Python functions. The grammer is specified by supplying the BNF inside +# as Python functions. The grammar is specified by supplying the BNF inside # Python documentation strings. The inspiration for this technique was borrowed # from John Aycock's Spark parsing system. PLY might be viewed as cross between # Spark and the GNU bison utility. @@ -59,8 +59,15 @@ # own risk! # ---------------------------------------------------------------------------- -__version__ = "3.4" -__tabversion__ = "3.2" # Table version +import re +import types +import sys +import os.path +import inspect +import warnings + +__version__ = '3.11' +__tabversion__ = '3.10' #----------------------------------------------------------------------------- # === User configurable parameters === @@ -68,7 +75,7 @@ # Change these to modify the default behavior of yacc (if you wish) #----------------------------------------------------------------------------- -yaccdebug = 1 # Debugging mode. If set, yacc generates a +yaccdebug = True # Debugging mode. If set, yacc generates a # a 'parser.out' file in the current directory debug_file = 'parser.out' # Default name of the debugging file @@ -77,86 +84,117 @@ error_count = 3 # Number of symbols that must be shifted to leave recovery mode -yaccdevel = 0 # Set to True if developing yacc. This turns off optimized +yaccdevel = False # Set to True if developing yacc. This turns off optimized # implementations of certain functions. resultlimit = 40 # Size limit of results when running in debug mode. pickle_protocol = 0 # Protocol to use when writing pickle files -import re, types, sys, os.path - -# Compatibility function for python 2.6/3.0 +# String type-checking compatibility if sys.version_info[0] < 3: - def func_code(f): - return f.func_code + string_types = basestring else: - def func_code(f): - return f.__code__ - -# Compatibility -try: - MAXINT = sys.maxint -except AttributeError: - MAXINT = sys.maxsize - -# Python 2.x/3.0 compatibility. -def load_ply_lex(): - if sys.version_info[0] < 3: - import lex - else: - import ply.lex as lex - return lex + string_types = str + +MAXINT = sys.maxsize -# This object is a stand-in for a logging object created by the +# This object is a stand-in for a logging object created by the # logging module. PLY will use this by default to create things # such as the parser.out file. If a user wants more detailed # information, they can create their own logging object and pass # it into PLY. class PlyLogger(object): - def __init__(self,f): + def __init__(self, f): self.f = f - def debug(self,msg,*args,**kwargs): - self.f.write((msg % args) + "\n") - info = debug - def warning(self,msg,*args,**kwargs): - self.f.write("WARNING: "+ (msg % args) + "\n") + def debug(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') + + info = debug - def error(self,msg,*args,**kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') critical = debug # Null logger is used when no output is generated. Does nothing. class NullLogger(object): - def __getattribute__(self,name): + def __getattribute__(self, name): return self - def __call__(self,*args,**kwargs): + + def __call__(self, *args, **kwargs): return self - + # Exception raised for yacc-related errors -class YaccError(Exception): pass +class YaccError(Exception): + pass # Format the result message that the parser produces when running in debug mode. def format_result(r): repr_str = repr(r) - if '\n' in repr_str: repr_str = repr(repr_str) + if '\n' in repr_str: + repr_str = repr(repr_str) if len(repr_str) > resultlimit: - repr_str = repr_str[:resultlimit]+" ..." - result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) + repr_str = repr_str[:resultlimit] + ' ...' + result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) return result - # Format stack entries when the parser is running in debug mode def format_stack_entry(r): repr_str = repr(r) - if '\n' in repr_str: repr_str = repr(repr_str) + if '\n' in repr_str: + repr_str = repr(repr_str) if len(repr_str) < 16: return repr_str else: - return "<%s @ 0x%x>" % (type(r).__name__,id(r)) + return '<%s @ 0x%x>' % (type(r).__name__, id(r)) + +# Panic mode error recovery support. This feature is being reworked--much of the +# code here is to offer a deprecation/backwards compatible transition + +_errok = None +_token = None +_restart = None +_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error(). +Instead, invoke the methods on the associated parser instance: + + def p_error(p): + ... + # Use parser.errok(), parser.token(), parser.restart() + ... + + parser = yacc.yacc() +''' + +def errok(): + warnings.warn(_warnmsg) + return _errok() + +def restart(): + warnings.warn(_warnmsg) + return _restart() + +def token(): + warnings.warn(_warnmsg) + return _token() + +# Utility function to call the p_error() function with some deprecation hacks +def call_errorfunc(errorfunc, token, parser): + global _errok, _token, _restart + _errok = parser.errok + _token = parser.token + _restart = parser.restart + r = errorfunc(token) + try: + del _errok, _token, _restart + except NameError: + pass + return r #----------------------------------------------------------------------------- # === LR Parsing Engine === @@ -176,8 +214,11 @@ def format_stack_entry(r): # .endlexpos = Ending lex position (optional, set automatically) class YaccSymbol: - def __str__(self): return self.type - def __repr__(self): return str(self) + def __str__(self): + return self.type + + def __repr__(self): + return str(self) # This class is a wrapper around the objects actually passed to each # grammar rule. Index lookup and assignment actually assign the @@ -189,46 +230,53 @@ def __repr__(self): return str(self) # representing the range of positional information for a symbol. class YaccProduction: - def __init__(self,s,stack=None): + def __init__(self, s, stack=None): self.slice = s self.stack = stack self.lexer = None - self.parser= None - def __getitem__(self,n): - if n >= 0: return self.slice[n].value - else: return self.stack[n].value + self.parser = None - def __setitem__(self,n,v): + def __getitem__(self, n): + if isinstance(n, slice): + return [s.value for s in self.slice[n]] + elif n >= 0: + return self.slice[n].value + else: + return self.stack[n].value + + def __setitem__(self, n, v): self.slice[n].value = v - def __getslice__(self,i,j): + def __getslice__(self, i, j): return [s.value for s in self.slice[i:j]] def __len__(self): return len(self.slice) - def lineno(self,n): - return getattr(self.slice[n],"lineno",0) + def lineno(self, n): + return getattr(self.slice[n], 'lineno', 0) - def set_lineno(self,n,lineno): + def set_lineno(self, n, lineno): self.slice[n].lineno = lineno - def linespan(self,n): - startline = getattr(self.slice[n],"lineno",0) - endline = getattr(self.slice[n],"endlineno",startline) - return startline,endline + def linespan(self, n): + startline = getattr(self.slice[n], 'lineno', 0) + endline = getattr(self.slice[n], 'endlineno', startline) + return startline, endline - def lexpos(self,n): - return getattr(self.slice[n],"lexpos",0) + def lexpos(self, n): + return getattr(self.slice[n], 'lexpos', 0) - def lexspan(self,n): - startpos = getattr(self.slice[n],"lexpos",0) - endpos = getattr(self.slice[n],"endlexpos",startpos) - return startpos,endpos + def set_lexpos(self, n, lexpos): + self.slice[n].lexpos = lexpos - def error(self): - raise SyntaxError + def lexspan(self, n): + startpos = getattr(self.slice[n], 'lexpos', 0) + endpos = getattr(self.slice[n], 'endlexpos', startpos) + return startpos, endpos + def error(self): + raise SyntaxError # ----------------------------------------------------------------------------- # == LRParser == @@ -237,14 +285,16 @@ def error(self): # ----------------------------------------------------------------------------- class LRParser: - def __init__(self,lrtab,errorf): + def __init__(self, lrtab, errorf): self.productions = lrtab.lr_productions - self.action = lrtab.lr_action - self.goto = lrtab.lr_goto - self.errorfunc = errorf + self.action = lrtab.lr_action + self.goto = lrtab.lr_goto + self.errorfunc = errorf + self.set_defaulted_states() + self.errorok = True def errok(self): - self.errorok = 1 + self.errorok = True def restart(self): del self.statestack[:] @@ -254,24 +304,42 @@ def restart(self): self.symstack.append(sym) self.statestack.append(0) - def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): + # Defaulted state support. + # This method identifies parser states where there is only one possible reduction action. + # For such states, the parser can make a choose to make a rule reduction without consuming + # the next look-ahead token. This delayed invocation of the tokenizer can be useful in + # certain kinds of advanced parsing situations where the lexer and parser interact with + # each other or change states (i.e., manipulation of scope, lexer states, etc.). + # + # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions + def set_defaulted_states(self): + self.defaulted_states = {} + for state, actions in self.action.items(): + rules = list(actions.values()) + if len(rules) == 1 and rules[0] < 0: + self.defaulted_states[state] = rules[0] + + def disable_defaulted_states(self): + self.defaulted_states = {} + + def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): if debug or yaccdevel: - if isinstance(debug,int): + if isinstance(debug, int): debug = PlyLogger(sys.stderr) - return self.parsedebug(input,lexer,debug,tracking,tokenfunc) + return self.parsedebug(input, lexer, debug, tracking, tokenfunc) elif tracking: - return self.parseopt(input,lexer,debug,tracking,tokenfunc) + return self.parseopt(input, lexer, debug, tracking, tokenfunc) else: - return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc) - + return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # parsedebug(). # # This is the debugging enabled version of parse(). All changes made to the - # parsing engine should be made here. For the non-debugging version, - # copy this code to a method parseopt() and delete all of the sections - # enclosed in: + # parsing engine should be made here. Optimized versions of this function + # are automatically created by the ply/ygen.py script. This script cuts out + # sections enclosed in markers such as this: # # #--! DEBUG # statements @@ -279,22 +347,24 @@ def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): # # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - # --! DEBUG - debug.info("PLY: PARSE DEBUG START") - # --! DEBUG + def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parsedebug-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + + #--! DEBUG + debug.info('PLY: PARSE DEBUG START') + #--! DEBUG # If no lexer was given, we will try to use the lex module if not lexer: - lex = load_ply_lex() + from . import lex lexer = lex.lexer # Set up the lexer and parser objects on pslice @@ -306,16 +376,19 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): lexer.input(input) if tokenfunc is None: - # Tokenize function - get_token = lexer.token + # Tokenize function + get_token = lexer.token else: - get_token = tokenfunc + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token # Set up the state and symbol stacks - statestack = [ ] # Stack of parsing states + statestack = [] # Stack of parsing states self.statestack = statestack - symstack = [ ] # Stack of grammar symbols + symstack = [] # Stack of grammar symbols self.symstack = symstack pslice.stack = symstack # Put in the production @@ -325,52 +398,59 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): statestack.append(0) sym = YaccSymbol() - sym.type = "$end" + sym.type = '$end' symstack.append(sym) state = 0 - while 1: + while True: # Get the next symbol on the input. If a lookahead symbol # is already set, we just use that. Otherwise, we'll pull # the next token off of the lookaheadstack or from the lexer - # --! DEBUG + #--! DEBUG debug.debug('') debug.debug('State : %s', state) - # --! DEBUG + #--! DEBUG - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() + if state not in defaulted_states: if not lookahead: - lookahead = YaccSymbol() - lookahead.type = "$end" + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + #--! DEBUG + debug.debug('Defaulted state %s: Reduce using %d', state, -t) + #--! DEBUG - # --! DEBUG + #--! DEBUG debug.debug('Stack : %s', - ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - # --! DEBUG - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + #--! DEBUG if t is not None: if t > 0: # shift a symbol on the stack statestack.append(t) state = t - - # --! DEBUG - debug.debug("Action : Shift and goto state %s", t) - # --! DEBUG + + #--! DEBUG + debug.debug('Action : Shift and goto state %s', t) + #--! DEBUG symstack.append(lookahead) lookahead = None # Decrease error count on successful shift - if errorcount: errorcount -=1 + if errorcount: + errorcount -= 1 continue if t < 0: @@ -384,72 +464,77 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): sym.type = pname # Production name sym.value = None - # --! DEBUG + #--! DEBUG if plen: - debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, + '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', + goto[statestack[-1-plen]][pname]) else: - debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) - - # --! DEBUG + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], + goto[statestack[-1]][pname]) + + #--! DEBUG if plen: targ = symstack[-plen-1:] targ[0] = sym - # --! TRACKING + #--! TRACKING if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1,"endlineno",t1.lineno) - sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) - - # --! TRACKING + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + #--! TRACKING # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # below as a performance optimization. Make sure # changes get made in both locations. pslice.slice = targ - + try: # Call the grammar rule with our special slice object del symstack[-plen:] - del statestack[-plen:] + self.state = state p.callable(pslice) - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG + del statestack[-plen:] + #--! DEBUG + debug.info('Result : %s', format_result(pslice[0])) + #--! DEBUG symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - + else: - # --! TRACKING + #--! TRACKING if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + #--! TRACKING - targ = [ sym ] + targ = [sym] # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # above as a performance optimization. Make sure # changes get made in both locations. @@ -457,41 +542,43 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): try: # Call the grammar rule with our special slice object + self.state = state p.callable(pslice) - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG + #--! DEBUG + debug.info('Result : %s', format_result(pslice[0])) + #--! DEBUG symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if t == 0: n = symstack[-1] - result = getattr(n,"value",None) - # --! DEBUG - debug.info("Done : Returning %s", format_result(result)) - debug.info("PLY: PARSE DEBUG END") - # --! DEBUG + result = getattr(n, 'value', None) + #--! DEBUG + debug.info('Done : Returning %s', format_result(result)) + debug.info('PLY: PARSE DEBUG END') + #--! DEBUG return result - if t == None: + if t is None: - # --! DEBUG + #--! DEBUG debug.error('Error : %s', - ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - # --! DEBUG + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + #--! DEBUG # We have some kind of parsing error here. To handle # this, we are going to push the current token onto @@ -505,20 +592,15 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): # errorcount == 0. if errorcount == 0 or self.errorok: errorcount = error_count - self.errorok = 0 + self.errorok = False errtoken = lookahead - if errtoken.type == "$end": + if errtoken.type == '$end': errtoken = None # End of file! if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): + if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic # mode recovery on their own. The @@ -528,14 +610,16 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): continue else: if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) else: - sys.stderr.write("yacc: Parse error in input. EOF\n") + sys.stderr.write('yacc: Parse error in input. EOF\n') return else: @@ -545,7 +629,7 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): # entire parse has been rolled back and we're completely hosed. The token is # discarded and we just keep going. - if len(statestack) <= 1 and lookahead.type != "$end": + if len(statestack) <= 1 and lookahead.type != '$end': lookahead = None errtoken = None state = 0 @@ -557,7 +641,7 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): # at the end of the file. nuke the top entry and generate an error token # Start nuking entries on the stack - if lookahead.type == "$end": + if lookahead.type == '$end': # Whoa. We're really hosed here. Bail out return @@ -566,48 +650,67 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): if sym.type == 'error': # Hmmm. Error is on top of stack, we'll just nuke input # symbol and continue + #--! TRACKING + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) + #--! TRACKING lookahead = None continue + + # Create the error symbol for the first time and make it the new lookahead symbol t = YaccSymbol() t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos t.value = lookahead lookaheadstack.append(lookahead) lookahead = t else: - symstack.pop() + sym = symstack.pop() + #--! TRACKING + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos + #--! TRACKING statestack.pop() - state = statestack[-1] # Potential bug fix + state = statestack[-1] continue # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parsedebug-end # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # parseopt(). # - # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. - # Edit the debug version above, then copy any modifications to the method - # below while removing #--! DEBUG sections. + # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY! + # This code is automatically generated by the ply/ygen.py script. Make + # changes to the parsedebug() method instead. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parseopt-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery - def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery # If no lexer was given, we will try to use the lex module if not lexer: - lex = load_ply_lex() + from . import lex lexer = lex.lexer - + # Set up the lexer and parser objects on pslice pslice.lexer = lexer pslice.parser = self @@ -617,16 +720,19 @@ def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): lexer.input(input) if tokenfunc is None: - # Tokenize function - get_token = lexer.token + # Tokenize function + get_token = lexer.token else: - get_token = tokenfunc + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token # Set up the state and symbol stacks - statestack = [ ] # Stack of parsing states + statestack = [] # Stack of parsing states self.statestack = statestack - symstack = [ ] # Stack of grammar symbols + symstack = [] # Stack of grammar symbols self.symstack = symstack pslice.stack = symstack # Put in the production @@ -639,23 +745,28 @@ def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): sym.type = '$end' symstack.append(sym) state = 0 - while 1: + while True: # Get the next symbol on the input. If a lookahead symbol # is already set, we just use that. Otherwise, we'll pull # the next token off of the lookaheadstack or from the lexer - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() + + if state not in defaulted_states: if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) if t is not None: if t > 0: @@ -663,11 +774,13 @@ def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): statestack.append(t) state = t + symstack.append(lookahead) lookahead = None # Decrease error count on successful shift - if errorcount: errorcount -=1 + if errorcount: + errorcount -= 1 continue if t < 0: @@ -681,61 +794,64 @@ def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): sym.type = pname # Production name sym.value = None + if plen: targ = symstack[-plen-1:] targ[0] = sym - # --! TRACKING + #--! TRACKING if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1,"endlineno",t1.lineno) - sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) - - # --! TRACKING + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + #--! TRACKING # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # below as a performance optimization. Make sure # changes get made in both locations. pslice.slice = targ - + try: # Call the grammar rule with our special slice object del symstack[-plen:] - del statestack[-plen:] + self.state = state p.callable(pslice) + del statestack[-plen:] symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - + else: - # --! TRACKING + #--! TRACKING if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + #--! TRACKING - targ = [ sym ] + targ = [sym] # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # above as a performance optimization. Make sure # changes get made in both locations. @@ -743,28 +859,32 @@ def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): try: # Call the grammar rule with our special slice object + self.state = state p.callable(pslice) symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if t == 0: n = symstack[-1] - return getattr(n,"value",None) + result = getattr(n, 'value', None) + return result + + if t is None: - if t == None: # We have some kind of parsing error here. To handle # this, we are going to push the current token onto @@ -778,20 +898,15 @@ def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): # errorcount == 0. if errorcount == 0 or self.errorok: errorcount = error_count - self.errorok = 0 + self.errorok = False errtoken = lookahead if errtoken.type == '$end': errtoken = None # End of file! if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): + if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic # mode recovery on their own. The @@ -801,14 +916,16 @@ def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): continue else: if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) else: - sys.stderr.write("yacc: Parse error in input. EOF\n") + sys.stderr.write('yacc: Parse error in input. EOF\n') return else: @@ -839,47 +956,67 @@ def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): if sym.type == 'error': # Hmmm. Error is on top of stack, we'll just nuke input # symbol and continue + #--! TRACKING + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) + #--! TRACKING lookahead = None continue + + # Create the error symbol for the first time and make it the new lookahead symbol t = YaccSymbol() t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos t.value = lookahead lookaheadstack.append(lookahead) lookahead = t else: - symstack.pop() + sym = symstack.pop() + #--! TRACKING + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos + #--! TRACKING statestack.pop() - state = statestack[-1] # Potential bug fix + state = statestack[-1] continue # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parseopt-end # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # parseopt_notrack(). # - # Optimized version of parseopt() with line number tracking removed. - # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove - # code in the #--! TRACKING sections + # Optimized version of parseopt() with line number tracking removed. + # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated + # by the ply/ygen.py script. Make changes to the parsedebug() method instead. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery + def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parseopt-notrack-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + # If no lexer was given, we will try to use the lex module if not lexer: - lex = load_ply_lex() + from . import lex lexer = lex.lexer - + # Set up the lexer and parser objects on pslice pslice.lexer = lexer pslice.parser = self @@ -889,16 +1026,19 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non lexer.input(input) if tokenfunc is None: - # Tokenize function - get_token = lexer.token + # Tokenize function + get_token = lexer.token else: - get_token = tokenfunc + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token # Set up the state and symbol stacks - statestack = [ ] # Stack of parsing states + statestack = [] # Stack of parsing states self.statestack = statestack - symstack = [ ] # Stack of grammar symbols + symstack = [] # Stack of grammar symbols self.symstack = symstack pslice.stack = symstack # Put in the production @@ -911,23 +1051,28 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non sym.type = '$end' symstack.append(sym) state = 0 - while 1: + while True: # Get the next symbol on the input. If a lookahead symbol # is already set, we just use that. Otherwise, we'll pull # the next token off of the lookaheadstack or from the lexer - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() + + if state not in defaulted_states: if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) if t is not None: if t > 0: @@ -935,11 +1080,13 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non statestack.append(t) state = t + symstack.append(lookahead) lookahead = None # Decrease error count on successful shift - if errorcount: errorcount -=1 + if errorcount: + errorcount -= 1 continue if t < 0: @@ -953,44 +1100,50 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non sym.type = pname # Production name sym.value = None + if plen: targ = symstack[-plen-1:] targ[0] = sym + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # below as a performance optimization. Make sure # changes get made in both locations. pslice.slice = targ - + try: # Call the grammar rule with our special slice object del symstack[-plen:] - del statestack[-plen:] + self.state = state p.callable(pslice) + del statestack[-plen:] symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - + else: - targ = [ sym ] + + targ = [sym] # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # above as a performance optimization. Make sure # changes get made in both locations. @@ -998,28 +1151,32 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non try: # Call the grammar rule with our special slice object + self.state = state p.callable(pslice) symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if t == 0: n = symstack[-1] - return getattr(n,"value",None) + result = getattr(n, 'value', None) + return result + + if t is None: - if t == None: # We have some kind of parsing error here. To handle # this, we are going to push the current token onto @@ -1033,20 +1190,15 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non # errorcount == 0. if errorcount == 0 or self.errorok: errorcount = error_count - self.errorok = 0 + self.errorok = False errtoken = lookahead if errtoken.type == '$end': errtoken = None # End of file! if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): + if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic # mode recovery on their own. The @@ -1056,14 +1208,16 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non continue else: if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) else: - sys.stderr.write("yacc: Parse error in input. EOF\n") + sys.stderr.write('yacc: Parse error in input. EOF\n') return else: @@ -1096,32 +1250,37 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non # symbol and continue lookahead = None continue + + # Create the error symbol for the first time and make it the new lookahead symbol t = YaccSymbol() t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos t.value = lookahead lookaheadstack.append(lookahead) lookahead = t else: - symstack.pop() + sym = symstack.pop() statestack.pop() - state = statestack[-1] # Potential bug fix + state = statestack[-1] continue # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parseopt-notrack-end # ----------------------------------------------------------------------------- # === Grammar Representation === # # The following functions, classes, and variables are used to represent and -# manipulate the rules that make up a grammar. +# manipulate the rules that make up a grammar. # ----------------------------------------------------------------------------- -import re - # regex matching identifiers _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') @@ -1131,7 +1290,7 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non # This class stores the raw information about a single production or grammar rule. # A grammar rule refers to a specification such as this: # -# expr : expr PLUS term +# expr : expr PLUS term # # Here are the basic attributes defined on all productions # @@ -1151,7 +1310,7 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non class Production(object): reduced = 0 - def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0): + def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): self.name = name self.prod = tuple(prod) self.number = number @@ -1162,11 +1321,11 @@ def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line self.prec = precedence # Internal settings used during table construction - + self.len = len(self.prod) # Length of the production # Create a list of unique production symbols used in the production - self.usyms = [ ] + self.usyms = [] for s in self.prod: if s not in self.usyms: self.usyms.append(s) @@ -1177,15 +1336,15 @@ def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line # Create a string representation if self.prod: - self.str = "%s -> %s" % (self.name," ".join(self.prod)) + self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) else: - self.str = "%s -> " % self.name + self.str = '%s -> ' % self.name def __str__(self): return self.str def __repr__(self): - return "Production("+str(self)+")" + return 'Production(' + str(self) + ')' def __len__(self): return len(self.prod) @@ -1193,28 +1352,27 @@ def __len__(self): def __nonzero__(self): return 1 - def __getitem__(self,index): + def __getitem__(self, index): return self.prod[index] - - # Return the nth lr_item from the production (or None if at the end) - def lr_item(self,n): - if n > len(self.prod): return None - p = LRItem(self,n) - # Precompute the list of productions immediately following. Hack. Remove later + # Return the nth lr_item from the production (or None if at the end) + def lr_item(self, n): + if n > len(self.prod): + return None + p = LRItem(self, n) + # Precompute the list of productions immediately following. try: - p.lr_after = Prodnames[p.prod[n+1]] - except (IndexError,KeyError): + p.lr_after = self.Prodnames[p.prod[n+1]] + except (IndexError, KeyError): p.lr_after = [] try: p.lr_before = p.prod[n-1] except IndexError: p.lr_before = None - return p - + # Bind the production function name to a callable - def bind(self,pdict): + def bind(self, pdict): if self.func: self.callable = pdict[self.func] @@ -1223,7 +1381,7 @@ def bind(self,pdict): # actually used by the LR parsing engine, plus some additional # debugging information. class MiniProduction(object): - def __init__(self,str,name,len,func,file,line): + def __init__(self, str, name, len, func, file, line): self.name = name self.len = len self.func = func @@ -1231,13 +1389,15 @@ def __init__(self,str,name,len,func,file,line): self.file = file self.line = line self.str = str + def __str__(self): return self.str + def __repr__(self): - return "MiniProduction(%s)" % self.str + return 'MiniProduction(%s)' % self.str # Bind the production function name to a callable - def bind(self,pdict): + def bind(self, pdict): if self.func: self.callable = pdict[self.func] @@ -1246,9 +1406,9 @@ def bind(self,pdict): # class LRItem # # This class represents a specific stage of parsing a production rule. For -# example: +# example: # -# expr : expr . PLUS term +# expr : expr . PLUS term # # In the above, the "." represents the current location of the parse. Here # basic attributes: @@ -1267,26 +1427,26 @@ def bind(self,pdict): # ----------------------------------------------------------------------------- class LRItem(object): - def __init__(self,p,n): + def __init__(self, p, n): self.name = p.name self.prod = list(p.prod) self.number = p.number self.lr_index = n - self.lookaheads = { } - self.prod.insert(n,".") + self.lookaheads = {} + self.prod.insert(n, '.') self.prod = tuple(self.prod) self.len = len(self.prod) self.usyms = p.usyms def __str__(self): if self.prod: - s = "%s -> %s" % (self.name," ".join(self.prod)) + s = '%s -> %s' % (self.name, ' '.join(self.prod)) else: - s = "%s -> " % self.name + s = '%s -> ' % self.name return s def __repr__(self): - return "LRItem("+str(self)+")" + return 'LRItem(' + str(self) + ')' # ----------------------------------------------------------------------------- # rightmost_terminal() @@ -1309,21 +1469,22 @@ def rightmost_terminal(symbols, terminals): # This data is used for critical parts of the table generation process later. # ----------------------------------------------------------------------------- -class GrammarError(YaccError): pass +class GrammarError(YaccError): + pass class Grammar(object): - def __init__(self,terminals): + def __init__(self, terminals): self.Productions = [None] # A list of all of the productions. The first # entry is always reserved for the purpose of # building an augmented grammar - self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all + self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all # productions of that nonterminal. - self.Prodmap = { } # A dictionary that is only used to detect duplicate + self.Prodmap = {} # A dictionary that is only used to detect duplicate # productions. - self.Terminals = { } # A dictionary mapping the names of terminal symbols to a + self.Terminals = {} # A dictionary mapping the names of terminal symbols to a # list of the rules where they are used. for term in terminals: @@ -1331,17 +1492,17 @@ def __init__(self,terminals): self.Terminals['error'] = [] - self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list + self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list # of rule numbers where they are used. - self.First = { } # A dictionary of precomputed FIRST(x) symbols + self.First = {} # A dictionary of precomputed FIRST(x) symbols - self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols + self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols - self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the + self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the # form ('right',level) or ('nonassoc', level) or ('left',level) - self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer. + self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. # This is only used to provide error checking and to generate # a warning about unused precedence rules. @@ -1351,7 +1512,7 @@ def __init__(self,terminals): def __len__(self): return len(self.Productions) - def __getitem__(self,index): + def __getitem__(self, index): return self.Productions[index] # ----------------------------------------------------------------------------- @@ -1362,14 +1523,14 @@ def __getitem__(self,index): # # ----------------------------------------------------------------------------- - def set_precedence(self,term,assoc,level): - assert self.Productions == [None],"Must call set_precedence() before add_production()" + def set_precedence(self, term, assoc, level): + assert self.Productions == [None], 'Must call set_precedence() before add_production()' if term in self.Precedence: - raise GrammarError("Precedence already specified for terminal '%s'" % term) - if assoc not in ['left','right','nonassoc']: + raise GrammarError('Precedence already specified for terminal %r' % term) + if assoc not in ['left', 'right', 'nonassoc']: raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") - self.Precedence[term] = (assoc,level) - + self.Precedence[term] = (assoc, level) + # ----------------------------------------------------------------------------- # add_production() # @@ -1387,72 +1548,74 @@ def set_precedence(self,term,assoc,level): # are valid and that %prec is used correctly. # ----------------------------------------------------------------------------- - def add_production(self,prodname,syms,func=None,file='',line=0): + def add_production(self, prodname, syms, func=None, file='', line=0): if prodname in self.Terminals: - raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) if prodname == 'error': - raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) if not _is_identifier.match(prodname): - raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) - # Look for literal tokens - for n,s in enumerate(syms): + # Look for literal tokens + for n, s in enumerate(syms): if s[0] in "'\"": - try: - c = eval(s) - if (len(c) > 1): - raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname)) - if not c in self.Terminals: - self.Terminals[c] = [] - syms[n] = c - continue - except SyntaxError: - pass + try: + c = eval(s) + if (len(c) > 1): + raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % + (file, line, s, prodname)) + if c not in self.Terminals: + self.Terminals[c] = [] + syms[n] = c + continue + except SyntaxError: + pass if not _is_identifier.match(s) and s != '%prec': - raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)) - + raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) + # Determine the precedence level if '%prec' in syms: if syms[-1] == '%prec': - raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line)) + raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) if syms[-2] != '%prec': - raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line)) + raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % + (file, line)) precname = syms[-1] - prodprec = self.Precedence.get(precname,None) + prodprec = self.Precedence.get(precname) if not prodprec: - raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname)) + raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) else: - self.UsedPrecedence[precname] = 1 + self.UsedPrecedence.add(precname) del syms[-2:] # Drop %prec from the rule else: # If no %prec, precedence is determined by the rightmost terminal symbol - precname = rightmost_terminal(syms,self.Terminals) - prodprec = self.Precedence.get(precname,('right',0)) - + precname = rightmost_terminal(syms, self.Terminals) + prodprec = self.Precedence.get(precname, ('right', 0)) + # See if the rule is already in the rulemap - map = "%s -> %s" % (prodname,syms) + map = '%s -> %s' % (prodname, syms) if map in self.Prodmap: m = self.Prodmap[map] - raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + - "Previous definition at %s:%d" % (m.file, m.line)) + raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + + 'Previous definition at %s:%d' % (m.file, m.line)) # From this point on, everything is valid. Create a new Production instance pnumber = len(self.Productions) - if not prodname in self.Nonterminals: - self.Nonterminals[prodname] = [ ] + if prodname not in self.Nonterminals: + self.Nonterminals[prodname] = [] # Add the production number to Terminals and Nonterminals for t in syms: if t in self.Terminals: self.Terminals[t].append(pnumber) else: - if not t in self.Nonterminals: - self.Nonterminals[t] = [ ] + if t not in self.Nonterminals: + self.Nonterminals[t] = [] self.Nonterminals[t].append(pnumber) # Create a production and add it to the list of productions - p = Production(pnumber,prodname,syms,prodprec,func,file,line) + p = Production(pnumber, prodname, syms, prodprec, func, file, line) self.Productions.append(p) self.Prodmap[map] = p @@ -1460,22 +1623,21 @@ def add_production(self,prodname,syms,func=None,file='',line=0): try: self.Prodnames[prodname].append(p) except KeyError: - self.Prodnames[prodname] = [ p ] - return 0 + self.Prodnames[prodname] = [p] # ----------------------------------------------------------------------------- # set_start() # - # Sets the starting symbol and creates the augmented grammar. Production + # Sets the starting symbol and creates the augmented grammar. Production # rule 0 is S' -> start where start is the start symbol. # ----------------------------------------------------------------------------- - def set_start(self,start=None): + def set_start(self, start=None): if not start: start = self.Productions[1].name if start not in self.Nonterminals: - raise GrammarError("start symbol %s undefined" % start) - self.Productions[0] = Production(0,"S'",[start]) + raise GrammarError('start symbol %s undefined' % start) + self.Productions[0] = Production(0, "S'", [start]) self.Nonterminals[start].append(0) self.Start = start @@ -1487,26 +1649,20 @@ def set_start(self,start=None): # ----------------------------------------------------------------------------- def find_unreachable(self): - + # Mark all symbols that are reachable from a symbol s def mark_reachable_from(s): - if reachable[s]: - # We've already reached symbol s. + if s in reachable: return - reachable[s] = 1 - for p in self.Prodnames.get(s,[]): + reachable.add(s) + for p in self.Prodnames.get(s, []): for r in p.prod: mark_reachable_from(r) - reachable = { } - for s in list(self.Terminals) + list(self.Nonterminals): - reachable[s] = 0 - - mark_reachable_from( self.Productions[0].prod[0] ) + reachable = set() + mark_reachable_from(self.Productions[0].prod[0]) + return [s for s in self.Nonterminals if s not in reachable] - return [s for s in list(self.Nonterminals) - if not reachable[s]] - # ----------------------------------------------------------------------------- # infinite_cycles() # @@ -1520,20 +1676,20 @@ def infinite_cycles(self): # Terminals: for t in self.Terminals: - terminates[t] = 1 + terminates[t] = True - terminates['$end'] = 1 + terminates['$end'] = True # Nonterminals: # Initialize to false: for n in self.Nonterminals: - terminates[n] = 0 + terminates[n] = False # Then propagate termination until no change: - while 1: - some_change = 0 - for (n,pl) in self.Prodnames.items(): + while True: + some_change = False + for (n, pl) in self.Prodnames.items(): # Nonterminal n terminates iff any of its productions terminates. for p in pl: # Production p terminates iff all of its rhs symbols terminate. @@ -1541,19 +1697,19 @@ def infinite_cycles(self): if not terminates[s]: # The symbol s does not terminate, # so production p does not terminate. - p_terminates = 0 + p_terminates = False break else: # didn't break from the loop, # so every symbol s terminates # so production p terminates. - p_terminates = 1 + p_terminates = True if p_terminates: # symbol n terminates! if not terminates[n]: - terminates[n] = 1 - some_change = 1 + terminates[n] = True + some_change = True # Don't need to consider any more productions for this n. break @@ -1561,9 +1717,9 @@ def infinite_cycles(self): break infinite = [] - for (s,term) in terminates.items(): + for (s, term) in terminates.items(): if not term: - if not s in self.Prodnames and not s in self.Terminals and s != 'error': + if s not in self.Prodnames and s not in self.Terminals and s != 'error': # s is used-but-not-defined, and we've already warned of that, # so it would be overkill to say that it's also non-terminating. pass @@ -1572,22 +1728,22 @@ def infinite_cycles(self): return infinite - # ----------------------------------------------------------------------------- # undefined_symbols() # # Find all symbols that were used the grammar, but not defined as tokens or # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol - # and prod is the production where the symbol was used. + # and prod is the production where the symbol was used. # ----------------------------------------------------------------------------- def undefined_symbols(self): result = [] for p in self.Productions: - if not p: continue + if not p: + continue for s in p.prod: - if not s in self.Prodnames and not s in self.Terminals and s != 'error': - result.append((s,p)) + if s not in self.Prodnames and s not in self.Terminals and s != 'error': + result.append((s, p)) return result # ----------------------------------------------------------------------------- @@ -1598,7 +1754,7 @@ def undefined_symbols(self): # ----------------------------------------------------------------------------- def unused_terminals(self): unused_tok = [] - for s,v in self.Terminals.items(): + for s, v in self.Terminals.items(): if s != 'error' and not v: unused_tok.append(s) @@ -1613,7 +1769,7 @@ def unused_terminals(self): def unused_rules(self): unused_prod = [] - for s,v in self.Nonterminals.items(): + for s, v in self.Nonterminals.items(): if not v: p = self.Prodnames[s][0] unused_prod.append(p) @@ -1625,15 +1781,15 @@ def unused_rules(self): # Returns a list of tuples (term,precedence) corresponding to precedence # rules that were never used by the grammar. term is the name of the terminal # on which precedence was applied and precedence is a string such as 'left' or - # 'right' corresponding to the type of precedence. + # 'right' corresponding to the type of precedence. # ----------------------------------------------------------------------------- def unused_precedence(self): unused = [] for termname in self.Precedence: if not (termname in self.Terminals or termname in self.UsedPrecedence): - unused.append((termname,self.Precedence[termname][0])) - + unused.append((termname, self.Precedence[termname][0])) + return unused # ------------------------------------------------------------------------- @@ -1644,19 +1800,20 @@ def unused_precedence(self): # During execution of compute_first1, the result may be incomplete. # Afterward (e.g., when called from compute_follow()), it will be complete. # ------------------------------------------------------------------------- - def _first(self,beta): + def _first(self, beta): # We are computing First(x1,x2,x3,...,xn) - result = [ ] + result = [] for x in beta: - x_produces_empty = 0 + x_produces_empty = False # Add all the non- symbols of First[x] to the result. for f in self.First[x]: if f == '': - x_produces_empty = 1 + x_produces_empty = True else: - if f not in result: result.append(f) + if f not in result: + result.append(f) if x_produces_empty: # We have to consider the next x in beta, @@ -1695,17 +1852,17 @@ def compute_first(self): self.First[n] = [] # Then propagate symbols until no change: - while 1: - some_change = 0 + while True: + some_change = False for n in self.Nonterminals: for p in self.Prodnames[n]: for f in self._first(p.prod): if f not in self.First[n]: - self.First[n].append( f ) - some_change = 1 + self.First[n].append(f) + some_change = True if not some_change: break - + return self.First # --------------------------------------------------------------------- @@ -1715,7 +1872,7 @@ def compute_first(self): # follow set is the set of all symbols that might follow a given # non-terminal. See the Dragon book, 2nd Ed. p. 189. # --------------------------------------------------------------------- - def compute_follow(self,start=None): + def compute_follow(self, start=None): # If already computed, return the result if self.Follow: return self.Follow @@ -1726,36 +1883,36 @@ def compute_follow(self,start=None): # Add '$end' to the follow list of the start symbol for k in self.Nonterminals: - self.Follow[k] = [ ] + self.Follow[k] = [] if not start: start = self.Productions[1].name - self.Follow[start] = [ '$end' ] + self.Follow[start] = ['$end'] - while 1: - didadd = 0 + while True: + didadd = False for p in self.Productions[1:]: # Here is the production set - for i in range(len(p.prod)): - B = p.prod[i] + for i, B in enumerate(p.prod): if B in self.Nonterminals: # Okay. We got a non-terminal in a production fst = self._first(p.prod[i+1:]) - hasempty = 0 + hasempty = False for f in fst: if f != '' and f not in self.Follow[B]: self.Follow[B].append(f) - didadd = 1 + didadd = True if f == '': - hasempty = 1 + hasempty = True if hasempty or i == (len(p.prod)-1): # Add elements of follow(a) to follow(b) for f in self.Follow[p.name]: if f not in self.Follow[B]: self.Follow[B].append(f) - didadd = 1 - if not didadd: break + didadd = True + if not didadd: + break return self.Follow @@ -1779,15 +1936,15 @@ def build_lritems(self): lastlri = p i = 0 lr_items = [] - while 1: + while True: if i > len(p): lri = None else: - lri = LRItem(p,i) + lri = LRItem(p, i) # Precompute the list of productions immediately following try: lri.lr_after = self.Prodnames[lri.prod[i+1]] - except (IndexError,KeyError): + except (IndexError, KeyError): lri.lr_after = [] try: lri.lr_before = lri.prod[i-1] @@ -1795,7 +1952,8 @@ def build_lritems(self): lri.lr_before = None lastlri.lr_next = lri - if not lri: break + if not lri: + break lr_items.append(lri) lastlri = lri i += 1 @@ -1804,12 +1962,13 @@ def build_lritems(self): # ----------------------------------------------------------------------------- # == Class LRTable == # -# This basic class represents a basic table of LR parsing information. +# This basic class represents a basic table of LR parsing information. # Methods for generating the tables are not defined here. They are defined # in the derived class LRGeneratedTable. # ----------------------------------------------------------------------------- -class VersionError(YaccError): pass +class VersionError(YaccError): + pass class LRTable(object): def __init__(self): @@ -1818,19 +1977,15 @@ def __init__(self): self.lr_productions = None self.lr_method = None - def read_table(self,module): - if isinstance(module,types.ModuleType): + def read_table(self, module): + if isinstance(module, types.ModuleType): parsetab = module else: - if sys.version_info[0] < 3: - exec("import %s as parsetab" % module) - else: - env = { } - exec("import %s as parsetab" % module, env, env) - parsetab = env['parsetab'] + exec('import %s' % module) + parsetab = sys.modules[module] if parsetab._tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") + raise VersionError('yacc table file version is out of date') self.lr_action = parsetab._lr_action self.lr_goto = parsetab._lr_goto @@ -1842,17 +1997,20 @@ def read_table(self,module): self.lr_method = parsetab._lr_method return parsetab._lr_signature - def read_pickle(self,filename): + def read_pickle(self, filename): try: import cPickle as pickle except ImportError: import pickle - in_f = open(filename,"rb") + if not os.path.exists(filename): + raise ImportError + + in_f = open(filename, 'rb') tabversion = pickle.load(in_f) if tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") + raise VersionError('yacc table file version is out of date') self.lr_method = pickle.load(in_f) signature = pickle.load(in_f) self.lr_action = pickle.load(in_f) @@ -1867,14 +2025,15 @@ def read_pickle(self,filename): return signature # Bind all production function names to callable objects in pdict - def bind_callables(self,pdict): + def bind_callables(self, pdict): for p in self.lr_productions: p.bind(pdict) - + + # ----------------------------------------------------------------------------- # === LR Generator === # -# The following classes and functions are used to generate LR parsing tables on +# The following classes and functions are used to generate LR parsing tables on # a grammar. # ----------------------------------------------------------------------------- @@ -1895,17 +2054,18 @@ def bind_callables(self,pdict): # FP - Set-valued function # ------------------------------------------------------------------------------ -def digraph(X,R,FP): - N = { } +def digraph(X, R, FP): + N = {} for x in X: - N[x] = 0 + N[x] = 0 stack = [] - F = { } + F = {} for x in X: - if N[x] == 0: traverse(x,N,stack,F,X,R,FP) + if N[x] == 0: + traverse(x, N, stack, F, X, R, FP) return F -def traverse(x,N,stack,F,X,R,FP): +def traverse(x, N, stack, F, X, R, FP): stack.append(x) d = len(stack) N[x] = d @@ -1914,20 +2074,22 @@ def traverse(x,N,stack,F,X,R,FP): rel = R(x) # Get y's related to x for y in rel: if N[y] == 0: - traverse(y,N,stack,F,X,R,FP) - N[x] = min(N[x],N[y]) - for a in F.get(y,[]): - if a not in F[x]: F[x].append(a) + traverse(y, N, stack, F, X, R, FP) + N[x] = min(N[x], N[y]) + for a in F.get(y, []): + if a not in F[x]: + F[x].append(a) if N[x] == d: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() - while element != x: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + while element != x: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() -class LALRError(YaccError): pass +class LALRError(YaccError): + pass # ----------------------------------------------------------------------------- # == LRGeneratedTable == @@ -1937,9 +2099,9 @@ class LALRError(YaccError): pass # ----------------------------------------------------------------------------- class LRGeneratedTable(LRTable): - def __init__(self,grammar,method='LALR',log=None): - if method not in ['SLR','LALR']: - raise LALRError("Unsupported method %s" % method) + def __init__(self, grammar, method='LALR', log=None): + if method not in ['SLR', 'LALR']: + raise LALRError('Unsupported method %s' % method) self.grammar = grammar self.lr_method = method @@ -1974,21 +2136,22 @@ def __init__(self,grammar,method='LALR',log=None): # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. - def lr0_closure(self,I): + def lr0_closure(self, I): self._add_count += 1 # Add everything in I to J J = I[:] - didadd = 1 + didadd = True while didadd: - didadd = 0 + didadd = False for j in J: for x in j.lr_after: - if getattr(x,"lr0_added",0) == self._add_count: continue + if getattr(x, 'lr0_added', 0) == self._add_count: + continue # Add B --> .G to J J.append(x.lr_next) x.lr0_added = self._add_count - didadd = 1 + didadd = True return J @@ -1999,43 +2162,43 @@ def lr0_closure(self,I): # objects). With uniqueness, we can later do fast set comparisons using # id(obj) instead of element-wise comparison. - def lr0_goto(self,I,x): + def lr0_goto(self, I, x): # First we look for a previously cached entry - g = self.lr_goto_cache.get((id(I),x),None) - if g: return g + g = self.lr_goto_cache.get((id(I), x)) + if g: + return g # Now we generate the goto set in a way that guarantees uniqueness # of the result - s = self.lr_goto_cache.get(x,None) + s = self.lr_goto_cache.get(x) if not s: - s = { } + s = {} self.lr_goto_cache[x] = s - gs = [ ] + gs = [] for p in I: n = p.lr_next if n and n.lr_before == x: - s1 = s.get(id(n),None) + s1 = s.get(id(n)) if not s1: - s1 = { } + s1 = {} s[id(n)] = s1 gs.append(n) s = s1 - g = s.get('$end',None) + g = s.get('$end') if not g: if gs: g = self.lr0_closure(gs) s['$end'] = g else: s['$end'] = gs - self.lr_goto_cache[(id(I),x)] = g + self.lr_goto_cache[(id(I), x)] = g return g # Compute the LR(0) sets of item function def lr0_items(self): - - C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] + C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] i = 0 for I in C: self.lr0_cidhash[id(I)] = i @@ -2048,15 +2211,15 @@ def lr0_items(self): i += 1 # Collect all of the symbols that could possibly be in the goto(I,X) sets - asyms = { } + asyms = {} for ii in I: for s in ii.usyms: asyms[s] = None for x in asyms: - g = self.lr0_goto(I,x) - if not g: continue - if id(g) in self.lr0_cidhash: continue + g = self.lr0_goto(I, x) + if not g or id(g) in self.lr0_cidhash: + continue self.lr0_cidhash[id(g)] = len(C) C.append(g) @@ -2091,19 +2254,21 @@ def lr0_items(self): # ----------------------------------------------------------------------------- def compute_nullable_nonterminals(self): - nullable = {} + nullable = set() num_nullable = 0 - while 1: - for p in self.grammar.Productions[1:]: - if p.len == 0: - nullable[p.name] = 1 + while True: + for p in self.grammar.Productions[1:]: + if p.len == 0: + nullable.add(p.name) continue - for t in p.prod: - if not t in nullable: break - else: - nullable[p.name] = 1 - if len(nullable) == num_nullable: break - num_nullable = len(nullable) + for t in p.prod: + if t not in nullable: + break + else: + nullable.add(p.name) + if len(nullable) == num_nullable: + break + num_nullable = len(nullable) return nullable # ----------------------------------------------------------------------------- @@ -2117,16 +2282,16 @@ def compute_nullable_nonterminals(self): # The input C is the set of LR(0) items. # ----------------------------------------------------------------------------- - def find_nonterminal_transitions(self,C): - trans = [] - for state in range(len(C)): - for p in C[state]: - if p.lr_index < p.len - 1: - t = (state,p.prod[p.lr_index+1]) - if t[1] in self.grammar.Nonterminals: - if t not in trans: trans.append(t) - state = state + 1 - return trans + def find_nonterminal_transitions(self, C): + trans = [] + for stateno, state in enumerate(C): + for p in state: + if p.lr_index < p.len - 1: + t = (stateno, p.prod[p.lr_index+1]) + if t[1] in self.grammar.Nonterminals: + if t not in trans: + trans.append(t) + return trans # ----------------------------------------------------------------------------- # dr_relation() @@ -2137,21 +2302,21 @@ def find_nonterminal_transitions(self,C): # Returns a list of terminals. # ----------------------------------------------------------------------------- - def dr_relation(self,C,trans,nullable): - dr_set = { } - state,N = trans + def dr_relation(self, C, trans, nullable): + state, N = trans terms = [] - g = self.lr0_goto(C[state],N) + g = self.lr0_goto(C[state], N) for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index+1] - if a in self.grammar.Terminals: - if a not in terms: terms.append(a) + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index+1] + if a in self.grammar.Terminals: + if a not in terms: + terms.append(a) # This extra bit is to handle the start state if state == 0 and N == self.grammar.Productions[0].prod[0]: - terms.append('$end') + terms.append('$end') return terms @@ -2161,18 +2326,18 @@ def dr_relation(self,C,trans,nullable): # Computes the READS() relation (p,A) READS (t,C). # ----------------------------------------------------------------------------- - def reads_relation(self,C, trans, empty): + def reads_relation(self, C, trans, empty): # Look for empty transitions rel = [] state, N = trans - g = self.lr0_goto(C[state],N) - j = self.lr0_cidhash.get(id(g),-1) + g = self.lr0_goto(C[state], N) + j = self.lr0_cidhash.get(id(g), -1) for p in g: if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if a in empty: - rel.append((j,a)) + a = p.prod[p.lr_index + 1] + if a in empty: + rel.append((j, a)) return rel @@ -2204,8 +2369,7 @@ def reads_relation(self,C, trans, empty): # # ----------------------------------------------------------------------------- - def compute_lookback_includes(self,C,trans,nullable): - + def compute_lookback_includes(self, C, trans, nullable): lookdict = {} # Dictionary of lookback relations includedict = {} # Dictionary of include relations @@ -2215,11 +2379,12 @@ def compute_lookback_includes(self,C,trans,nullable): dtrans[t] = 1 # Loop over all transitions and compute lookbacks and includes - for state,N in trans: + for state, N in trans: lookb = [] includes = [] for p in C[state]: - if p.name != N: continue + if p.name != N: + continue # Okay, we have a name match. We now follow the production all the way # through the state machine until we get the . on the right hand side @@ -2227,44 +2392,50 @@ def compute_lookback_includes(self,C,trans,nullable): lr_index = p.lr_index j = state while lr_index < p.len - 1: - lr_index = lr_index + 1 - t = p.prod[lr_index] - - # Check to see if this symbol and state are a non-terminal transition - if (j,t) in dtrans: - # Yes. Okay, there is some chance that this is an includes relation - # the only way to know for certain is whether the rest of the - # production derives empty - - li = lr_index + 1 - while li < p.len: - if p.prod[li] in self.grammar.Terminals: break # No forget it - if not p.prod[li] in nullable: break - li = li + 1 - else: - # Appears to be a relation between (j,t) and (state,N) - includes.append((j,t)) - - g = self.lr0_goto(C[j],t) # Go to next set - j = self.lr0_cidhash.get(id(g),-1) # Go to next state + lr_index = lr_index + 1 + t = p.prod[lr_index] + + # Check to see if this symbol and state are a non-terminal transition + if (j, t) in dtrans: + # Yes. Okay, there is some chance that this is an includes relation + # the only way to know for certain is whether the rest of the + # production derives empty + + li = lr_index + 1 + while li < p.len: + if p.prod[li] in self.grammar.Terminals: + break # No forget it + if p.prod[li] not in nullable: + break + li = li + 1 + else: + # Appears to be a relation between (j,t) and (state,N) + includes.append((j, t)) + + g = self.lr0_goto(C[j], t) # Go to next set + j = self.lr0_cidhash.get(id(g), -1) # Go to next state # When we get here, j is the final state, now we have to locate the production for r in C[j]: - if r.name != p.name: continue - if r.len != p.len: continue - i = 0 - # This look is comparing a production ". A B C" with "A B C ." - while i < r.lr_index: - if r.prod[i] != p.prod[i+1]: break - i = i + 1 - else: - lookb.append((j,r)) + if r.name != p.name: + continue + if r.len != p.len: + continue + i = 0 + # This look is comparing a production ". A B C" with "A B C ." + while i < r.lr_index: + if r.prod[i] != p.prod[i+1]: + break + i = i + 1 + else: + lookb.append((j, r)) for i in includes: - if not i in includedict: includedict[i] = [] - includedict[i].append((state,N)) - lookdict[(state,N)] = lookb + if i not in includedict: + includedict[i] = [] + includedict[i].append((state, N)) + lookdict[(state, N)] = lookb - return lookdict,includedict + return lookdict, includedict # ----------------------------------------------------------------------------- # compute_read_sets() @@ -2278,10 +2449,10 @@ def compute_lookback_includes(self,C,trans,nullable): # Returns a set containing the read sets # ----------------------------------------------------------------------------- - def compute_read_sets(self,C, ntrans, nullable): - FP = lambda x: self.dr_relation(C,x,nullable) - R = lambda x: self.reads_relation(C,x,nullable) - F = digraph(ntrans,R,FP) + def compute_read_sets(self, C, ntrans, nullable): + FP = lambda x: self.dr_relation(C, x, nullable) + R = lambda x: self.reads_relation(C, x, nullable) + F = digraph(ntrans, R, FP) return F # ----------------------------------------------------------------------------- @@ -2300,11 +2471,11 @@ def compute_read_sets(self,C, ntrans, nullable): # Returns a set containing the follow sets # ----------------------------------------------------------------------------- - def compute_follow_sets(self,ntrans,readsets,inclsets): - FP = lambda x: readsets[x] - R = lambda x: inclsets.get(x,[]) - F = digraph(ntrans,R,FP) - return F + def compute_follow_sets(self, ntrans, readsets, inclsets): + FP = lambda x: readsets[x] + R = lambda x: inclsets.get(x, []) + F = digraph(ntrans, R, FP) + return F # ----------------------------------------------------------------------------- # add_lookaheads() @@ -2318,15 +2489,16 @@ def compute_follow_sets(self,ntrans,readsets,inclsets): # in the lookbacks set # ----------------------------------------------------------------------------- - def add_lookaheads(self,lookbacks,followset): - for trans,lb in lookbacks.items(): + def add_lookaheads(self, lookbacks, followset): + for trans, lb in lookbacks.items(): # Loop over productions in lookback - for state,p in lb: - if not state in p.lookaheads: - p.lookaheads[state] = [] - f = followset.get(trans,[]) - for a in f: - if a not in p.lookaheads[state]: p.lookaheads[state].append(a) + for state, p in lb: + if state not in p.lookaheads: + p.lookaheads[state] = [] + f = followset.get(trans, []) + for a in f: + if a not in p.lookaheads[state]: + p.lookaheads[state].append(a) # ----------------------------------------------------------------------------- # add_lalr_lookaheads() @@ -2335,7 +2507,7 @@ def add_lookaheads(self,lookbacks,followset): # with LALR parsing # ----------------------------------------------------------------------------- - def add_lalr_lookaheads(self,C): + def add_lalr_lookaheads(self, C): # Determine all of the nullable nonterminals nullable = self.compute_nullable_nonterminals() @@ -2343,16 +2515,16 @@ def add_lalr_lookaheads(self,C): trans = self.find_nonterminal_transitions(C) # Compute read sets - readsets = self.compute_read_sets(C,trans,nullable) + readsets = self.compute_read_sets(C, trans, nullable) # Compute lookback/includes relations - lookd, included = self.compute_lookback_includes(C,trans,nullable) + lookd, included = self.compute_lookback_includes(C, trans, nullable) # Compute LALR FOLLOW sets - followsets = self.compute_follow_sets(trans,readsets,included) + followsets = self.compute_follow_sets(trans, readsets, included) # Add all of the lookaheads - self.add_lookaheads(lookd,followsets) + self.add_lookaheads(lookd, followsets) # ----------------------------------------------------------------------------- # lr_parse_table() @@ -2366,9 +2538,9 @@ def lr_parse_table(self): action = self.lr_action # Action array log = self.log # Logger for output - actionp = { } # Action production array (temporary) - - log.info("Parsing method: %s", self.lr_method) + actionp = {} # Action production array (temporary) + + log.info('Parsing method: %s', self.lr_method) # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items # This determines the number of states @@ -2382,23 +2554,23 @@ def lr_parse_table(self): st = 0 for I in C: # Loop over each production in I - actlist = [ ] # List of actions - st_action = { } - st_actionp = { } - st_goto = { } - log.info("") - log.info("state %d", st) - log.info("") + actlist = [] # List of actions + st_action = {} + st_actionp = {} + st_goto = {} + log.info('') + log.info('state %d', st) + log.info('') for p in I: - log.info(" (%d) %s", p.number, str(p)) - log.info("") + log.info(' (%d) %s', p.number, p) + log.info('') for p in I: if p.len == p.lr_index + 1: if p.name == "S'": # Start symbol. Accept! - st_action["$end"] = 0 - st_actionp["$end"] = p + st_action['$end'] = 0 + st_actionp['$end'] = p else: # We are at the end of a production. Reduce! if self.lr_method == 'LALR': @@ -2406,31 +2578,36 @@ def lr_parse_table(self): else: laheads = self.grammar.Follow[p.name] for a in laheads: - actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) - r = st_action.get(a,None) + actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) + r = st_action.get(a) if r is not None: # Whoa. Have a shift/reduce or reduce/reduce conflict if r > 0: # Need to decide on shift or reduce here # By default we favor shifting. Need to add # some precedence rules here. - sprec,slevel = Productions[st_actionp[a].number].prec - rprec,rlevel = Precedence.get(a,('right',0)) + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from rule being reduced (p) + rprec, rlevel = Productions[p.number].prec + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): # We really need to reduce here. st_action[a] = -p.number st_actionp[a] = p if not slevel and not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as reduce",a) - self.sr_conflicts.append((st,a,'reduce')) + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) Productions[p.number].reduced += 1 elif (slevel == rlevel) and (rprec == 'nonassoc'): st_action[a] = None else: # Hmmm. Guess we'll keep the shift if not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as shift",a) - self.sr_conflicts.append((st,a,'shift')) + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) elif r < 0: # Reduce/reduce conflict. In this case, we favor the rule # that was defined first in the grammar file @@ -2439,15 +2616,16 @@ def lr_parse_table(self): if oldp.line > pp.line: st_action[a] = -p.number st_actionp[a] = p - chosenp,rejectp = pp,oldp + chosenp, rejectp = pp, oldp Productions[p.number].reduced += 1 Productions[oldp.number].reduced -= 1 else: - chosenp,rejectp = oldp,pp - self.rr_conflicts.append((st,chosenp,rejectp)) - log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) + chosenp, rejectp = oldp, pp + self.rr_conflicts.append((st, chosenp, rejectp)) + log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', + a, st_actionp[a].number, st_actionp[a]) else: - raise LALRError("Unknown conflict in state %d" % st) + raise LALRError('Unknown conflict in state %d' % st) else: st_action[a] = -p.number st_actionp[a] = p @@ -2456,205 +2634,211 @@ def lr_parse_table(self): i = p.lr_index a = p.prod[i+1] # Get symbol right after the "." if a in self.grammar.Terminals: - g = self.lr0_goto(I,a) - j = self.lr0_cidhash.get(id(g),-1) + g = self.lr0_goto(I, a) + j = self.lr0_cidhash.get(id(g), -1) if j >= 0: # We are in a shift state - actlist.append((a,p,"shift and go to state %d" % j)) - r = st_action.get(a,None) + actlist.append((a, p, 'shift and go to state %d' % j)) + r = st_action.get(a) if r is not None: # Whoa have a shift/reduce or shift/shift conflict if r > 0: if r != j: - raise LALRError("Shift/shift conflict in state %d" % st) + raise LALRError('Shift/shift conflict in state %d' % st) elif r < 0: # Do a precedence check. # - if precedence of reduce rule is higher, we reduce. # - if precedence of reduce is same and left assoc, we reduce. # - otherwise we shift - rprec,rlevel = Productions[st_actionp[a].number].prec - sprec,slevel = Precedence.get(a,('right',0)) + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from the rule that could have been reduced + rprec, rlevel = Productions[st_actionp[a].number].prec + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): # We decide to shift here... highest precedence to shift Productions[st_actionp[a].number].reduced -= 1 st_action[a] = j st_actionp[a] = p if not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as shift",a) - self.sr_conflicts.append((st,a,'shift')) + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) elif (slevel == rlevel) and (rprec == 'nonassoc'): st_action[a] = None else: # Hmmm. Guess we'll keep the reduce if not slevel and not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as reduce",a) - self.sr_conflicts.append((st,a,'reduce')) + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) else: - raise LALRError("Unknown conflict in state %d" % st) + raise LALRError('Unknown conflict in state %d' % st) else: st_action[a] = j st_actionp[a] = p # Print the actions associated with each terminal - _actprint = { } - for a,p,m in actlist: + _actprint = {} + for a, p, m in actlist: if a in st_action: if p is st_actionp[a]: - log.info(" %-15s %s",a,m) - _actprint[(a,m)] = 1 - log.info("") + log.info(' %-15s %s', a, m) + _actprint[(a, m)] = 1 + log.info('') # Print the actions that were not used. (debugging) not_used = 0 - for a,p,m in actlist: + for a, p, m in actlist: if a in st_action: if p is not st_actionp[a]: - if not (a,m) in _actprint: - log.debug(" ! %-15s [ %s ]",a,m) + if not (a, m) in _actprint: + log.debug(' ! %-15s [ %s ]', a, m) not_used = 1 - _actprint[(a,m)] = 1 + _actprint[(a, m)] = 1 if not_used: - log.debug("") + log.debug('') # Construct the goto table for this state - nkeys = { } + nkeys = {} for ii in I: for s in ii.usyms: if s in self.grammar.Nonterminals: nkeys[s] = None for n in nkeys: - g = self.lr0_goto(I,n) - j = self.lr0_cidhash.get(id(g),-1) + g = self.lr0_goto(I, n) + j = self.lr0_cidhash.get(id(g), -1) if j >= 0: st_goto[n] = j - log.info(" %-30s shift and go to state %d",n,j) + log.info(' %-30s shift and go to state %d', n, j) action[st] = st_action actionp[st] = st_actionp goto[st] = st_goto st += 1 - # ----------------------------------------------------------------------------- # write() # # This function writes the LR parsing tables to a file # ----------------------------------------------------------------------------- - def write_table(self,modulename,outputdir='',signature=""): - basemodulename = modulename.split(".")[-1] - filename = os.path.join(outputdir,basemodulename) + ".py" + def write_table(self, tabmodule, outputdir='', signature=''): + if isinstance(tabmodule, types.ModuleType): + raise IOError("Won't overwrite existing tabmodule") + + basemodulename = tabmodule.split('.')[-1] + filename = os.path.join(outputdir, basemodulename) + '.py' try: - f = open(filename,"w") + f = open(filename, 'w') - f.write(""" + f.write(''' # %s # This file is automatically generated. Do not edit. +# pylint: disable=W,C,R _tabversion = %r _lr_method = %r _lr_signature = %r - """ % (filename, __tabversion__, self.lr_method, signature)) + ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature)) # Change smaller to 0 to go back to original tables smaller = 1 # Factor out names to try and make smaller if smaller: - items = { } - - for s,nd in self.lr_action.items(): - for name,v in nd.items(): - i = items.get(name) - if not i: - i = ([],[]) - items[name] = i - i[0].append(s) - i[1].append(v) - - f.write("\n_lr_action_items = {") - for k,v in items.items(): - f.write("%r:([" % k) + items = {} + + for s, nd in self.lr_action.items(): + for name, v in nd.items(): + i = items.get(name) + if not i: + i = ([], []) + items[name] = i + i[0].append(s) + i[1].append(v) + + f.write('\n_lr_action_items = {') + for k, v in items.items(): + f.write('%r:([' % k) for i in v[0]: - f.write("%r," % i) - f.write("],[") + f.write('%r,' % i) + f.write('],[') for i in v[1]: - f.write("%r," % i) + f.write('%r,' % i) - f.write("]),") - f.write("}\n") + f.write(']),') + f.write('}\n') - f.write(""" -_lr_action = { } + f.write(''' +_lr_action = {} for _k, _v in _lr_action_items.items(): for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = { } + if not _x in _lr_action: _lr_action[_x] = {} _lr_action[_x][_k] = _y del _lr_action_items -""") +''') else: - f.write("\n_lr_action = { "); - for k,v in self.lr_action.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); + f.write('\n_lr_action = { ') + for k, v in self.lr_action.items(): + f.write('(%r,%r):%r,' % (k[0], k[1], v)) + f.write('}\n') if smaller: # Factor out names to try and make smaller - items = { } - - for s,nd in self.lr_goto.items(): - for name,v in nd.items(): - i = items.get(name) - if not i: - i = ([],[]) - items[name] = i - i[0].append(s) - i[1].append(v) - - f.write("\n_lr_goto_items = {") - for k,v in items.items(): - f.write("%r:([" % k) + items = {} + + for s, nd in self.lr_goto.items(): + for name, v in nd.items(): + i = items.get(name) + if not i: + i = ([], []) + items[name] = i + i[0].append(s) + i[1].append(v) + + f.write('\n_lr_goto_items = {') + for k, v in items.items(): + f.write('%r:([' % k) for i in v[0]: - f.write("%r," % i) - f.write("],[") + f.write('%r,' % i) + f.write('],[') for i in v[1]: - f.write("%r," % i) + f.write('%r,' % i) - f.write("]),") - f.write("}\n") + f.write(']),') + f.write('}\n') - f.write(""" -_lr_goto = { } + f.write(''' +_lr_goto = {} for _k, _v in _lr_goto_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_goto: _lr_goto[_x] = { } + for _x, _y in zip(_v[0], _v[1]): + if not _x in _lr_goto: _lr_goto[_x] = {} _lr_goto[_x][_k] = _y del _lr_goto_items -""") +''') else: - f.write("\n_lr_goto = { "); - for k,v in self.lr_goto.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); + f.write('\n_lr_goto = { ') + for k, v in self.lr_goto.items(): + f.write('(%r,%r):%r,' % (k[0], k[1], v)) + f.write('}\n') # Write production table - f.write("_lr_productions = [\n") + f.write('_lr_productions = [\n') for p in self.lr_productions: if p.func: - f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line)) + f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len, + p.func, os.path.basename(p.file), p.line)) else: - f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len)) - f.write("]\n") + f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len)) + f.write(']\n') f.close() - except IOError: - e = sys.exc_info()[1] - sys.stderr.write("Unable to create '%s'\n" % filename) - sys.stderr.write(str(e)+"\n") - return + except IOError as e: + raise # ----------------------------------------------------------------------------- @@ -2663,26 +2847,25 @@ def write_table(self,modulename,outputdir='',signature=""): # This function pickles the LR parsing tables to a supplied file object # ----------------------------------------------------------------------------- - def pickle_table(self,filename,signature=""): + def pickle_table(self, filename, signature=''): try: import cPickle as pickle except ImportError: import pickle - outf = open(filename,"wb") - pickle.dump(__tabversion__,outf,pickle_protocol) - pickle.dump(self.lr_method,outf,pickle_protocol) - pickle.dump(signature,outf,pickle_protocol) - pickle.dump(self.lr_action,outf,pickle_protocol) - pickle.dump(self.lr_goto,outf,pickle_protocol) - - outp = [] - for p in self.lr_productions: - if p.func: - outp.append((p.str,p.name, p.len, p.func,p.file,p.line)) - else: - outp.append((str(p),p.name,p.len,None,None,None)) - pickle.dump(outp,outf,pickle_protocol) - outf.close() + with open(filename, 'wb') as outf: + pickle.dump(__tabversion__, outf, pickle_protocol) + pickle.dump(self.lr_method, outf, pickle_protocol) + pickle.dump(signature, outf, pickle_protocol) + pickle.dump(self.lr_action, outf, pickle_protocol) + pickle.dump(self.lr_goto, outf, pickle_protocol) + + outp = [] + for p in self.lr_productions: + if p.func: + outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line)) + else: + outp.append((str(p), p.name, p.len, None, None, None)) + pickle.dump(outp, outf, pickle_protocol) # ----------------------------------------------------------------------------- # === INTROSPECTION === @@ -2700,26 +2883,18 @@ def pickle_table(self,filename,signature=""): # ----------------------------------------------------------------------------- def get_caller_module_dict(levels): - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - while levels > 0: - f = f.f_back - levels -= 1 - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - - return ldict + f = sys._getframe(levels) + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + return ldict # ----------------------------------------------------------------------------- # parse_grammar() # # This takes a raw grammar rule string and parses it into production data # ----------------------------------------------------------------------------- -def parse_grammar(doc,file,line): +def parse_grammar(doc, file, line): grammar = [] # Split the doc string into lines pstrings = doc.splitlines() @@ -2728,12 +2903,13 @@ def parse_grammar(doc,file,line): for ps in pstrings: dline += 1 p = ps.split() - if not p: continue + if not p: + continue try: if p[0] == '|': # This is a continuation of a previous rule if not lastp: - raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) + raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) prodname = lastp syms = p[1:] else: @@ -2742,13 +2918,13 @@ def parse_grammar(doc,file,line): syms = p[2:] assign = p[1] if assign != ':' and assign != '::=': - raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline)) + raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) - grammar.append((file,dline,prodname,syms)) + grammar.append((file, dline, prodname, syms)) except SyntaxError: raise except Exception: - raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip())) + raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) return grammar @@ -2760,14 +2936,14 @@ def parse_grammar(doc,file,line): # etc. # ----------------------------------------------------------------------------- class ParserReflect(object): - def __init__(self,pdict,log=None): + def __init__(self, pdict, log=None): self.pdict = pdict self.start = None self.error_func = None self.tokens = None - self.files = {} + self.modules = set() self.grammar = [] - self.error = 0 + self.error = False if log is None: self.log = PlyLogger(sys.stderr) @@ -2781,7 +2957,7 @@ def get_all(self): self.get_tokens() self.get_precedence() self.get_pfunctions() - + # Validate all of the information def validate_all(self): self.validate_start() @@ -2789,32 +2965,28 @@ def validate_all(self): self.validate_tokens() self.validate_precedence() self.validate_pfunctions() - self.validate_files() + self.validate_modules() return self.error # Compute a signature over the grammar def signature(self): + parts = [] try: - from hashlib import md5 - except ImportError: - from md5 import md5 - try: - sig = md5() if self.start: - sig.update(self.start.encode('latin-1')) + parts.append(self.start) if self.prec: - sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1')) + parts.append(''.join([''.join(p) for p in self.prec])) if self.tokens: - sig.update(" ".join(self.tokens).encode('latin-1')) + parts.append(' '.join(self.tokens)) for f in self.pfuncs: if f[3]: - sig.update(f[3].encode('latin-1')) - except (TypeError,ValueError): + parts.append(f[3]) + except (TypeError, ValueError): pass - return sig.digest() + return ''.join(parts) # ----------------------------------------------------------------------------- - # validate_file() + # validate_modules() # # This method checks to see if there are duplicated p_rulename() functions # in the parser module file. Without this function, it is really easy for @@ -2824,32 +2996,29 @@ def signature(self): # to try and detect duplicates. # ----------------------------------------------------------------------------- - def validate_files(self): + def validate_modules(self): # Match def p_funcname( fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') - for filename in self.files.keys(): - base,ext = os.path.splitext(filename) - if ext != '.py': return 1 # No idea. Assume it's okay. - + for module in self.modules: try: - f = open(filename) - lines = f.readlines() - f.close() + lines, linen = inspect.getsourcelines(module) except IOError: continue - counthash = { } - for linen,l in enumerate(lines): + counthash = {} + for linen, line in enumerate(lines): linen += 1 - m = fre.match(l) + m = fre.match(line) if m: name = m.group(1) prev = counthash.get(name) if not prev: counthash[name] = linen else: - self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev) + filename = inspect.getsourcefile(module) + self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', + filename, linen, name, prev) # Get the start symbol def get_start(self): @@ -2858,7 +3027,7 @@ def get_start(self): # Validate the start symbol def validate_start(self): if self.start is not None: - if not isinstance(self.start,str): + if not isinstance(self.start, string_types): self.log.error("'start' must be a string") # Look for error handler @@ -2868,162 +3037,173 @@ def get_error_func(self): # Validate the error function def validate_error_func(self): if self.error_func: - if isinstance(self.error_func,types.FunctionType): + if isinstance(self.error_func, types.FunctionType): ismethod = 0 elif isinstance(self.error_func, types.MethodType): ismethod = 1 else: self.log.error("'p_error' defined, but is not a function or method") - self.error = 1 + self.error = True return - eline = func_code(self.error_func).co_firstlineno - efile = func_code(self.error_func).co_filename - self.files[efile] = 1 + eline = self.error_func.__code__.co_firstlineno + efile = self.error_func.__code__.co_filename + module = inspect.getmodule(self.error_func) + self.modules.add(module) - if (func_code(self.error_func).co_argcount != 1+ismethod): - self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) - self.error = 1 + argcount = self.error_func.__code__.co_argcount - ismethod + if argcount != 1: + self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) + self.error = True # Get the tokens map def get_tokens(self): - tokens = self.pdict.get("tokens",None) + tokens = self.pdict.get('tokens') if not tokens: - self.log.error("No token list is defined") - self.error = 1 + self.log.error('No token list is defined') + self.error = True return - if not isinstance(tokens,(list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = 1 + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True return - + if not tokens: - self.log.error("tokens is empty") - self.error = 1 + self.log.error('tokens is empty') + self.error = True return - self.tokens = tokens + self.tokens = sorted(tokens) # Validate the tokens def validate_tokens(self): # Validate the tokens. if 'error' in self.tokens: self.log.error("Illegal token name 'error'. Is a reserved word") - self.error = 1 + self.error = True return - terminals = {} + terminals = set() for n in self.tokens: if n in terminals: - self.log.warning("Token '%s' multiply defined", n) - terminals[n] = 1 + self.log.warning('Token %r multiply defined', n) + terminals.add(n) # Get the precedence map (if any) def get_precedence(self): - self.prec = self.pdict.get("precedence",None) + self.prec = self.pdict.get('precedence') # Validate and parse the precedence map def validate_precedence(self): preclist = [] if self.prec: - if not isinstance(self.prec,(list,tuple)): - self.log.error("precedence must be a list or tuple") - self.error = 1 + if not isinstance(self.prec, (list, tuple)): + self.log.error('precedence must be a list or tuple') + self.error = True return - for level,p in enumerate(self.prec): - if not isinstance(p,(list,tuple)): - self.log.error("Bad precedence table") - self.error = 1 + for level, p in enumerate(self.prec): + if not isinstance(p, (list, tuple)): + self.log.error('Bad precedence table') + self.error = True return if len(p) < 2: - self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p) - self.error = 1 + self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) + self.error = True return assoc = p[0] - if not isinstance(assoc,str): - self.log.error("precedence associativity must be a string") - self.error = 1 + if not isinstance(assoc, string_types): + self.log.error('precedence associativity must be a string') + self.error = True return for term in p[1:]: - if not isinstance(term,str): - self.log.error("precedence items must be strings") - self.error = 1 + if not isinstance(term, string_types): + self.log.error('precedence items must be strings') + self.error = True return - preclist.append((term,assoc,level+1)) + preclist.append((term, assoc, level+1)) self.preclist = preclist # Get all p_functions from the grammar def get_pfunctions(self): p_functions = [] for name, item in self.pdict.items(): - if name[:2] != 'p_': continue - if name == 'p_error': continue - if isinstance(item,(types.FunctionType,types.MethodType)): - line = func_code(item).co_firstlineno - file = func_code(item).co_filename - p_functions.append((line,file,name,item.__doc__)) - - # Sort all of the actions by line number - p_functions.sort() + if not name.startswith('p_') or name == 'p_error': + continue + if isinstance(item, (types.FunctionType, types.MethodType)): + line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) + module = inspect.getmodule(item) + p_functions.append((line, module, name, item.__doc__)) + + # Sort all of the actions by line number; make sure to stringify + # modules to make them sortable, since `line` may not uniquely sort all + # p functions + p_functions.sort(key=lambda p_function: ( + p_function[0], + str(p_function[1]), + p_function[2], + p_function[3])) self.pfuncs = p_functions - # Validate all of the p_functions def validate_pfunctions(self): grammar = [] # Check for non-empty symbols if len(self.pfuncs) == 0: - self.log.error("no rules of the form p_rulename are defined") - self.error = 1 - return - - for line, file, name, doc in self.pfuncs: + self.log.error('no rules of the form p_rulename are defined') + self.error = True + return + + for line, module, name, doc in self.pfuncs: + file = inspect.getsourcefile(module) func = self.pdict[name] if isinstance(func, types.MethodType): reqargs = 2 else: reqargs = 1 - if func_code(func).co_argcount > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__) - self.error = 1 - elif func_code(func).co_argcount < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__) - self.error = 1 + if func.__code__.co_argcount > reqargs: + self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) + self.error = True + elif func.__code__.co_argcount < reqargs: + self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) + self.error = True elif not func.__doc__: - self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__) + self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', + file, line, func.__name__) else: try: - parsed_g = parse_grammar(doc,file,line) + parsed_g = parse_grammar(doc, file, line) for g in parsed_g: grammar.append((name, g)) - except SyntaxError: - e = sys.exc_info()[1] + except SyntaxError as e: self.log.error(str(e)) - self.error = 1 + self.error = True # Looks like a valid grammar rule # Mark the file in which defined. - self.files[file] = 1 + self.modules.add(module) # Secondary validation step that looks for p_ definitions that are not functions # or functions that look like they might be grammar rules. - for n,v in self.pdict.items(): - if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue - if n[0:2] == 't_': continue - if n[0:2] == 'p_' and n != 'p_error': - self.log.warning("'%s' not defined as a function", n) - if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or - (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)): - try: - doc = v.__doc__.split(" ") - if doc[1] == ':': - self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix", - func_code(v).co_filename, func_code(v).co_firstlineno,n) - except Exception: - pass + for n, v in self.pdict.items(): + if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): + continue + if n.startswith('t_'): + continue + if n.startswith('p_') and n != 'p_error': + self.log.warning('%r not defined as a function', n) + if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or + (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): + if v.__doc__: + try: + doc = v.__doc__.split(' ') + if doc[1] == ':': + self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', + v.__code__.co_filename, v.__code__.co_firstlineno, n) + except IndexError: + pass self.grammar = grammar @@ -3033,14 +3213,17 @@ def validate_pfunctions(self): # Build a parser # ----------------------------------------------------------------------------- -def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, - check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='', - debuglog=None, errorlog = None, picklefile=None): +def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, + check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file, + outputdir=None, debuglog=None, errorlog=None, picklefile=None): - global parse # Reference to the parsing method of the last built parser + if tabmodule is None: + tabmodule = tab_module - # If pickling is enabled, table files are not created + # Reference to the parsing method of the last built parser + global parse + # If pickling is enabled, table files are not created if picklefile: write_tables = 0 @@ -3049,17 +3232,54 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star # Get the module dictionary used for the parser if module: - _items = [(k,getattr(module,k)) for k in dir(module)] + _items = [(k, getattr(module, k)) for k in dir(module)] pdict = dict(_items) + # If no __file__ or __package__ attributes are available, try to obtain them + # from the __module__ instead + if '__file__' not in pdict: + pdict['__file__'] = sys.modules[pdict['__module__']].__file__ + if '__package__' not in pdict and '__module__' in pdict: + if hasattr(sys.modules[pdict['__module__']], '__package__'): + pdict['__package__'] = sys.modules[pdict['__module__']].__package__ else: pdict = get_caller_module_dict(2) + if outputdir is None: + # If no output directory is set, the location of the output files + # is determined according to the following rules: + # - If tabmodule specifies a package, files go into that package directory + # - Otherwise, files go in the same directory as the specifying module + if isinstance(tabmodule, types.ModuleType): + srcfile = tabmodule.__file__ + else: + if '.' not in tabmodule: + srcfile = pdict['__file__'] + else: + parts = tabmodule.split('.') + pkgname = '.'.join(parts[:-1]) + exec('import %s' % pkgname) + srcfile = getattr(sys.modules[pkgname], '__file__', '') + outputdir = os.path.dirname(srcfile) + + # Determine if the module is package of a package or not. + # If so, fix the tabmodule setting so that tables load correctly + pkg = pdict.get('__package__') + if pkg and isinstance(tabmodule, str): + if '.' not in tabmodule: + tabmodule = pkg + '.' + tabmodule + + + + # Set start symbol if it's specified directly using an argument + if start is not None: + pdict['start'] = start + # Collect parser information from the dictionary - pinfo = ParserReflect(pdict,log=errorlog) + pinfo = ParserReflect(pdict, log=errorlog) pinfo.get_all() if pinfo.error: - raise YaccError("Unable to build parser") + raise YaccError('Unable to build parser') # Check signature against table files (if any) signature = pinfo.signature() @@ -3074,35 +3294,36 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star if optimize or (read_signature == signature): try: lr.bind_callables(pinfo.pdict) - parser = LRParser(lr,pinfo.error_func) + parser = LRParser(lr, pinfo.error_func) parse = parser.parse return parser - except Exception: - e = sys.exc_info()[1] - errorlog.warning("There was a problem loading the table file: %s", repr(e)) - except VersionError: - e = sys.exc_info() + except Exception as e: + errorlog.warning('There was a problem loading the table file: %r', e) + except VersionError as e: errorlog.warning(str(e)) - except Exception: + except ImportError: pass if debuglog is None: if debug: - debuglog = PlyLogger(open(debugfile,"w")) + try: + debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w')) + except IOError as e: + errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) + debuglog = NullLogger() else: debuglog = NullLogger() - debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) - + debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__) - errors = 0 + errors = False # Validate the parser information if pinfo.validate_all(): - raise YaccError("Unable to build parser") - + raise YaccError('Unable to build parser') + if not pinfo.error_func: - errorlog.warning("no p_error() function is defined") + errorlog.warning('no p_error() function is defined') # Create a grammar object grammar = Grammar(pinfo.tokens) @@ -3110,20 +3331,18 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star # Set precedence level for terminals for term, assoc, level in pinfo.preclist: try: - grammar.set_precedence(term,assoc,level) - except GrammarError: - e = sys.exc_info()[1] - errorlog.warning("%s",str(e)) + grammar.set_precedence(term, assoc, level) + except GrammarError as e: + errorlog.warning('%s', e) # Add productions to the grammar for funcname, gram in pinfo.grammar: file, line, prodname, syms = gram try: - grammar.add_production(prodname,syms,funcname,file,line) - except GrammarError: - e = sys.exc_info()[1] - errorlog.error("%s",str(e)) - errors = 1 + grammar.add_production(prodname, syms, funcname, file, line) + except GrammarError as e: + errorlog.error('%s', e) + errors = True # Set the grammar start symbols try: @@ -3131,146 +3350,153 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star grammar.set_start(pinfo.start) else: grammar.set_start(start) - except GrammarError: - e = sys.exc_info()[1] + except GrammarError as e: errorlog.error(str(e)) - errors = 1 + errors = True if errors: - raise YaccError("Unable to build parser") + raise YaccError('Unable to build parser') # Verify the grammar structure undefined_symbols = grammar.undefined_symbols() for sym, prod in undefined_symbols: - errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym) - errors = 1 + errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) + errors = True unused_terminals = grammar.unused_terminals() if unused_terminals: - debuglog.info("") - debuglog.info("Unused terminals:") - debuglog.info("") + debuglog.info('') + debuglog.info('Unused terminals:') + debuglog.info('') for term in unused_terminals: - errorlog.warning("Token '%s' defined, but not used", term) - debuglog.info(" %s", term) + errorlog.warning('Token %r defined, but not used', term) + debuglog.info(' %s', term) # Print out all productions to the debug log if debug: - debuglog.info("") - debuglog.info("Grammar") - debuglog.info("") - for n,p in enumerate(grammar.Productions): - debuglog.info("Rule %-5d %s", n, p) + debuglog.info('') + debuglog.info('Grammar') + debuglog.info('') + for n, p in enumerate(grammar.Productions): + debuglog.info('Rule %-5d %s', n, p) # Find unused non-terminals unused_rules = grammar.unused_rules() for prod in unused_rules: - errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name) + errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) if len(unused_terminals) == 1: - errorlog.warning("There is 1 unused token") + errorlog.warning('There is 1 unused token') if len(unused_terminals) > 1: - errorlog.warning("There are %d unused tokens", len(unused_terminals)) + errorlog.warning('There are %d unused tokens', len(unused_terminals)) if len(unused_rules) == 1: - errorlog.warning("There is 1 unused rule") + errorlog.warning('There is 1 unused rule') if len(unused_rules) > 1: - errorlog.warning("There are %d unused rules", len(unused_rules)) + errorlog.warning('There are %d unused rules', len(unused_rules)) if debug: - debuglog.info("") - debuglog.info("Terminals, with rules where they appear") - debuglog.info("") + debuglog.info('') + debuglog.info('Terminals, with rules where they appear') + debuglog.info('') terms = list(grammar.Terminals) terms.sort() for term in terms: - debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) - - debuglog.info("") - debuglog.info("Nonterminals, with rules where they appear") - debuglog.info("") + debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) + + debuglog.info('') + debuglog.info('Nonterminals, with rules where they appear') + debuglog.info('') nonterms = list(grammar.Nonterminals) nonterms.sort() for nonterm in nonterms: - debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) - debuglog.info("") + debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) + debuglog.info('') if check_recursion: unreachable = grammar.find_unreachable() for u in unreachable: - errorlog.warning("Symbol '%s' is unreachable",u) + errorlog.warning('Symbol %r is unreachable', u) infinite = grammar.infinite_cycles() for inf in infinite: - errorlog.error("Infinite recursion detected for symbol '%s'", inf) - errors = 1 - + errorlog.error('Infinite recursion detected for symbol %r', inf) + errors = True + unused_prec = grammar.unused_precedence() for term, assoc in unused_prec: - errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term) - errors = 1 + errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) + errors = True if errors: - raise YaccError("Unable to build parser") - + raise YaccError('Unable to build parser') + # Run the LRGeneratedTable on the grammar if debug: - errorlog.debug("Generating %s tables", method) - - lr = LRGeneratedTable(grammar,method,debuglog) + errorlog.debug('Generating %s tables', method) + + lr = LRGeneratedTable(grammar, method, debuglog) if debug: num_sr = len(lr.sr_conflicts) # Report shift/reduce and reduce/reduce conflicts if num_sr == 1: - errorlog.warning("1 shift/reduce conflict") + errorlog.warning('1 shift/reduce conflict') elif num_sr > 1: - errorlog.warning("%d shift/reduce conflicts", num_sr) + errorlog.warning('%d shift/reduce conflicts', num_sr) num_rr = len(lr.rr_conflicts) if num_rr == 1: - errorlog.warning("1 reduce/reduce conflict") + errorlog.warning('1 reduce/reduce conflict') elif num_rr > 1: - errorlog.warning("%d reduce/reduce conflicts", num_rr) + errorlog.warning('%d reduce/reduce conflicts', num_rr) # Write out conflicts to the output file if debug and (lr.sr_conflicts or lr.rr_conflicts): - debuglog.warning("") - debuglog.warning("Conflicts:") - debuglog.warning("") + debuglog.warning('') + debuglog.warning('Conflicts:') + debuglog.warning('') for state, tok, resolution in lr.sr_conflicts: - debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution) - - already_reported = {} + debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) + + already_reported = set() for state, rule, rejected in lr.rr_conflicts: - if (state,id(rule),id(rejected)) in already_reported: + if (state, id(rule), id(rejected)) in already_reported: continue - debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) - debuglog.warning("rejected rule (%s) in state %d", rejected,state) - errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) - errorlog.warning("rejected rule (%s) in state %d", rejected, state) - already_reported[state,id(rule),id(rejected)] = 1 - + debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + debuglog.warning('rejected rule (%s) in state %d', rejected, state) + errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + errorlog.warning('rejected rule (%s) in state %d', rejected, state) + already_reported.add((state, id(rule), id(rejected))) + warned_never = [] for state, rule, rejected in lr.rr_conflicts: if not rejected.reduced and (rejected not in warned_never): - debuglog.warning("Rule (%s) is never reduced", rejected) - errorlog.warning("Rule (%s) is never reduced", rejected) + debuglog.warning('Rule (%s) is never reduced', rejected) + errorlog.warning('Rule (%s) is never reduced', rejected) warned_never.append(rejected) # Write the table file if requested if write_tables: - lr.write_table(tabmodule,outputdir,signature) + try: + lr.write_table(tabmodule, outputdir, signature) + if tabmodule in sys.modules: + del sys.modules[tabmodule] + except IOError as e: + errorlog.warning("Couldn't create %r. %s" % (tabmodule, e)) # Write a pickled version of the tables if picklefile: - lr.pickle_table(picklefile,signature) + try: + lr.pickle_table(picklefile, signature) + except IOError as e: + errorlog.warning("Couldn't create %r. %s" % (picklefile, e)) # Build the parser lr.bind_callables(pinfo.pdict) - parser = LRParser(lr,pinfo.error_func) + parser = LRParser(lr, pinfo.error_func) parse = parser.parse return parser