From 783c2335f8b2171ecb5c2bfbff39a65076b260d9 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Thu, 26 Sep 2024 14:51:23 -0700 Subject: [PATCH] Add lib stemmer in setup scripts (#10984) Summary: The early merged pr (https://github.com/facebookincubator/velox/commit/38f9a1f5db0e4d07246daa105b769a43eb653a99) was reverted by https://github.com/facebookincubator/velox/pull/10965 due to this issue: https://github.com/facebookincubator/velox/issues/10963. In the original impl., a patch is expected to be applied to add `-fPIC` before building lib stemmer, but when building docker image (ghcr.io/facebookincubator/velox-dev:centos9), that patch file is not available to use due to the copied setup script outside velox repo is executed. See code: https://github.com/facebookincubator/velox/blob/7f2d7adaa1544c72129fd1c8d0766755ff354455/scripts/centos.dockerfile#L22 This pr proposes the installation of lib stemmer with the above issue fixed. Pull Request resolved: https://github.com/facebookincubator/velox/pull/10984 Reviewed By: DanielHunte Differential Revision: D63344087 Pulled By: Yuhta fbshipit-source-id: a1467f5392ef828efdd219d0b7f1dfe0003d00ed --- .github/workflows/macos.yml | 2 ++ scripts/setup-centos9.sh | 13 +++++++++++++ scripts/setup-macos.sh | 13 +++++++++++++ scripts/setup-ubuntu.sh | 13 +++++++++++++ 4 files changed, 41 insertions(+) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 63672ec7aa87..c56baff117c7 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -22,6 +22,7 @@ on: - "CMakeLists.txt" - "CMake/**" - "third_party/**" + - "scripts/setup-macos.sh" - ".github/workflows/macos.yml" pull_request: @@ -31,6 +32,7 @@ on: - "CMakeLists.txt" - "CMake/**" - "third_party/**" + - "scripts/setup-macos.sh" - ".github/workflows/macos.yml" permissions: diff --git a/scripts/setup-centos9.sh b/scripts/setup-centos9.sh index 1efb8e53ec34..723754915b2c 100755 --- a/scripts/setup-centos9.sh +++ b/scripts/setup-centos9.sh @@ -44,6 +44,7 @@ FMT_VERSION="10.1.1" BOOST_VERSION="boost-1.84.0" ARROW_VERSION="15.0.0" FAST_FLOAT_VERSION="v6.1.6" +STEMMER_VERSION="2.2.0" function dnf_install { dnf install -y -q --setopt=install_weak_deps=False "$@" @@ -178,6 +179,17 @@ function install_duckdb { fi } +function install_stemmer { + wget_and_untar https://snowballstem.org/dist/libstemmer_c-${STEMMER_VERSION}.tar.gz stemmer + ( + cd ${DEPENDENCY_DIR}/stemmer + sed -i '/CPPFLAGS=-Iinclude/ s/$/ -fPIC/' Makefile + make clean && make "-j${NPROC}" + ${SUDO} cp libstemmer.a ${INSTALL_PREFIX}/lib/ + ${SUDO} cp include/libstemmer.h ${INSTALL_PREFIX}/include/ + ) +} + function install_arrow { wget_and_untar https://archive.apache.org/dist/arrow/arrow-${ARROW_VERSION}/apache-arrow-${ARROW_VERSION}.tar.gz arrow cmake_install_dir arrow/cpp \ @@ -233,6 +245,7 @@ function install_velox_deps { run_and_time install_mvfst run_and_time install_fbthrift run_and_time install_duckdb + run_and_time install_stemmer run_and_time install_arrow } diff --git a/scripts/setup-macos.sh b/scripts/setup-macos.sh index b394e8ce4755..ed8b6c2a21fe 100755 --- a/scripts/setup-macos.sh +++ b/scripts/setup-macos.sh @@ -45,6 +45,7 @@ MACOS_BUILD_DEPS="ninja cmake" FB_OS_VERSION="v2024.09.16.00" FMT_VERSION="10.1.1" FAST_FLOAT_VERSION="v6.1.6" +STEMMER_VERSION="2.2.0" function update_brew { DEFAULT_BREW_PATH=/usr/local/bin/brew @@ -157,6 +158,17 @@ function install_duckdb { fi } +function install_stemmer { + wget_and_untar https://snowballstem.org/dist/libstemmer_c-${STEMMER_VERSION}.tar.gz stemmer + ( + cd ${DEPENDENCY_DIR}/stemmer + sed -i '/CPPFLAGS=-Iinclude/ s/$/ -fPIC/' Makefile + make clean && make "-j${NPROC}" + ${SUDO} cp libstemmer.a ${INSTALL_PREFIX}/lib/ + ${SUDO} cp include/libstemmer.h ${INSTALL_PREFIX}/include/ + ) +} + function install_velox_deps { run_and_time install_velox_deps_from_brew run_and_time install_ranges_v3 @@ -170,6 +182,7 @@ function install_velox_deps { run_and_time install_mvfst run_and_time install_fbthrift run_and_time install_duckdb + run_and_time install_stemmer } (return 2> /dev/null) && return # If script was sourced, don't run commands. diff --git a/scripts/setup-ubuntu.sh b/scripts/setup-ubuntu.sh index c4785af00fe5..ab82ea9da410 100755 --- a/scripts/setup-ubuntu.sh +++ b/scripts/setup-ubuntu.sh @@ -59,6 +59,7 @@ FMT_VERSION="10.1.1" BOOST_VERSION="boost-1.84.0" ARROW_VERSION="15.0.0" FAST_FLOAT_VERSION="v6.1.6" +STEMMER_VERSION="2.2.0" # Install packages required for build. function install_build_prerequisites { @@ -186,6 +187,17 @@ function install_duckdb { fi } +function install_stemmer { + wget_and_untar https://snowballstem.org/dist/libstemmer_c-${STEMMER_VERSION}.tar.gz stemmer + ( + cd ${DEPENDENCY_DIR}/stemmer + sed -i '/CPPFLAGS=-Iinclude/ s/$/ -fPIC/' Makefile + make clean && make "-j${NPROC}" + ${SUDO} cp libstemmer.a ${INSTALL_PREFIX}/lib/ + ${SUDO} cp include/libstemmer.h ${INSTALL_PREFIX}/include/ + ) +} + function install_arrow { wget_and_untar https://archive.apache.org/dist/arrow/arrow-${ARROW_VERSION}/apache-arrow-${ARROW_VERSION}.tar.gz arrow cmake_install_dir arrow/cpp \ @@ -241,6 +253,7 @@ function install_velox_deps { run_and_time install_fbthrift run_and_time install_conda run_and_time install_duckdb + run_and_time install_stemmer run_and_time install_arrow }