From 24f3e8a33f6060ef4324ed796eef142167cd6f64 Mon Sep 17 00:00:00 2001 From: Pascal Vizeli Date: Fri, 10 Jul 2020 18:52:44 +0200 Subject: [PATCH] Optimize python with LTO & no-semantic (#82) * Optimize python with LTO & no-semantic * Change optimize * align patch also to 3.7 --- python/3.6/Dockerfile | 6 ++++-- python/3.7/Dockerfile | 6 ++++-- python/3.7/arm-alignment.patch | 17 +++++++++++++++++ python/3.8/Dockerfile | 6 ++++-- python/3.8/arm-alignment.patch | 17 +++++++++++++++++ 5 files changed, 46 insertions(+), 6 deletions(-) create mode 100644 python/3.7/arm-alignment.patch create mode 100644 python/3.8/arm-alignment.patch diff --git a/python/3.6/Dockerfile b/python/3.6/Dockerfile index a881350..db90e4e 100644 --- a/python/3.6/Dockerfile +++ b/python/3.6/Dockerfile @@ -41,7 +41,7 @@ RUN set -ex \ dpkg-dev dpkg \ expat-dev \ findutils \ - gcc \ + build-base \ gdbm-dev \ libc-dev \ libffi-dev \ @@ -73,14 +73,16 @@ RUN set -ex \ --enable-loadable-sqlite-extensions \ --enable-optimizations \ --enable-shared \ + --with-lto \ --with-system-expat \ --with-system-ffi \ --without-ensurepip \ && make -j "$(nproc)" \ + LDFLAGS="-Wl,--strip-all" \ + CFLAGS="-fno-semantic-interposition -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free -ljemalloc" \ # set thread stack size to 1MB so we don't segfault before we hit sys.getrecursionlimit() # https://github.com/alpinelinux/aports/commit/2026e1259422d4e0cf92391ca2d3844356c649d0 EXTRA_CFLAGS="-DTHREAD_STACK_SIZE=0x100000" \ - LDFLAGS="-Wl,--strip-all" \ # setting PROFILE_TASK makes "--enable-optimizations" reasonable: https://bugs.python.org/issue36044 PROFILE_TASK='-m test.regrtest --pgo \ test_asyncio \ diff --git a/python/3.7/Dockerfile b/python/3.7/Dockerfile index cd02b5e..ffa548d 100644 --- a/python/3.7/Dockerfile +++ b/python/3.7/Dockerfile @@ -41,7 +41,7 @@ RUN set -ex \ dpkg-dev dpkg \ expat-dev \ findutils \ - gcc \ + build-base \ gdbm-dev \ libc-dev \ libffi-dev \ @@ -73,14 +73,16 @@ RUN set -ex \ --enable-loadable-sqlite-extensions \ --enable-optimizations \ --enable-shared \ + --with-lto \ --with-system-expat \ --with-system-ffi \ --without-ensurepip \ && make -j "$(nproc)" \ + LDFLAGS="-Wl,--strip-all" \ + CFLAGS="-fno-semantic-interposition -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free -ljemalloc" \ # set thread stack size to 1MB so we don't segfault before we hit sys.getrecursionlimit() # https://github.com/alpinelinux/aports/commit/2026e1259422d4e0cf92391ca2d3844356c649d0 EXTRA_CFLAGS="-DTHREAD_STACK_SIZE=0x100000" \ - LDFLAGS="-Wl,--strip-all" \ # setting PROFILE_TASK makes "--enable-optimizations" reasonable: https://bugs.python.org/issue36044 PROFILE_TASK='-m test.regrtest --pgo \ test_asyncio \ diff --git a/python/3.7/arm-alignment.patch b/python/3.7/arm-alignment.patch new file mode 100644 index 0000000..a7a4b39 --- /dev/null +++ b/python/3.7/arm-alignment.patch @@ -0,0 +1,17 @@ +Author: Dave Jones +Description: Use aligned access for _sha3 module on ARM. +--- a/Modules/_sha3/sha3module.c ++++ b/Modules/_sha3/sha3module.c +@@ -64,6 +64,12 @@ + #define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + #endif + ++/* Bus error on 32-bit ARM due to un-aligned memory accesses; 64-bit ARM ++ * doesn't complain but un-aligned memory accesses are sub-optimal */ ++#if defined(__arm__) || defined(__aarch64__) ++#define NO_MISALIGNED_ACCESSES ++#endif ++ + /* mangle names */ + #define KeccakF1600_FastLoop_Absorb _PySHA3_KeccakF1600_FastLoop_Absorb + #define Keccak_HashFinal _PySHA3_Keccak_HashFinal diff --git a/python/3.8/Dockerfile b/python/3.8/Dockerfile index 56abaa7..4b5663b 100644 --- a/python/3.8/Dockerfile +++ b/python/3.8/Dockerfile @@ -41,7 +41,7 @@ RUN set -ex \ dpkg-dev dpkg \ expat-dev \ findutils \ - gcc \ + build-base \ gdbm-dev \ libc-dev \ libffi-dev \ @@ -73,14 +73,16 @@ RUN set -ex \ --enable-loadable-sqlite-extensions \ --enable-optimizations \ --enable-shared \ + --with-lto \ --with-system-expat \ --with-system-ffi \ --without-ensurepip \ && make -j "$(nproc)" \ + LDFLAGS="-Wl,--strip-all" \ + CFLAGS="-fno-semantic-interposition -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free -ljemalloc" \ # set thread stack size to 1MB so we don't segfault before we hit sys.getrecursionlimit() # https://github.com/alpinelinux/aports/commit/2026e1259422d4e0cf92391ca2d3844356c649d0 EXTRA_CFLAGS="-DTHREAD_STACK_SIZE=0x100000" \ - LDFLAGS="-Wl,--strip-all" \ && make install \ \ && find /usr/local -type f -executable -not \( -name '*tkinter*' \) -exec scanelf --needed --nobanner --format '%n#p' '{}' ';' \ diff --git a/python/3.8/arm-alignment.patch b/python/3.8/arm-alignment.patch new file mode 100644 index 0000000..a7a4b39 --- /dev/null +++ b/python/3.8/arm-alignment.patch @@ -0,0 +1,17 @@ +Author: Dave Jones +Description: Use aligned access for _sha3 module on ARM. +--- a/Modules/_sha3/sha3module.c ++++ b/Modules/_sha3/sha3module.c +@@ -64,6 +64,12 @@ + #define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + #endif + ++/* Bus error on 32-bit ARM due to un-aligned memory accesses; 64-bit ARM ++ * doesn't complain but un-aligned memory accesses are sub-optimal */ ++#if defined(__arm__) || defined(__aarch64__) ++#define NO_MISALIGNED_ACCESSES ++#endif ++ + /* mangle names */ + #define KeccakF1600_FastLoop_Absorb _PySHA3_KeccakF1600_FastLoop_Absorb + #define Keccak_HashFinal _PySHA3_Keccak_HashFinal