Skip to content

GH-45196: [C++][Acero] Small refinement to hash join #41854

GH-45196: [C++][Acero] Small refinement to hash join

GH-45196: [C++][Acero] Small refinement to hash join #41854

Workflow file for this run

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
name: C GLib & Ruby
on:
push:
branches:
- '**'
- '!dependabot/**'
tags:
- '**'
paths:
- '.dockerignore'
- '.github/workflows/ruby.yml'
- 'ci/docker/**'
- 'ci/scripts/c_glib_*'
- 'ci/scripts/cpp_*'
- 'ci/scripts/msys2_*'
- 'ci/scripts/ruby_*'
- 'ci/scripts/util_*'
- 'c_glib/**'
- 'cpp/**'
- 'docker-compose.yml'
- 'ruby/**'
pull_request:
paths:
- '.dockerignore'
- '.github/workflows/ruby.yml'
- 'ci/docker/**'
- 'ci/scripts/c_glib_*'
- 'ci/scripts/cpp_*'
- 'ci/scripts/msys2_*'
- 'ci/scripts/ruby_*'
- 'ci/scripts/util_*'
- 'c_glib/**'
- 'cpp/**'
- 'docker-compose.yml'
- 'ruby/**'
concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true
permissions:
contents: read
env:
ARCHERY_DEBUG: 1
DOCKER_VOLUME_PREFIX: ".docker/"
jobs:
ubuntu:
name: AMD64 Ubuntu ${{ matrix.ubuntu }} GLib & Ruby
runs-on: ubuntu-latest
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
ubuntu:
- 22.04
env:
UBUNTU: ${{ matrix.ubuntu }}
steps:
- name: Checkout Arrow
uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
with:
fetch-depth: 0
submodules: recursive
- name: Cache Docker Volumes
uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
with:
path: .docker
key: ubuntu-${{ matrix.ubuntu }}-ruby-${{ hashFiles('cpp/**') }}
restore-keys: ubuntu-${{ matrix.ubuntu }}-ruby-
- name: Setup Python
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: 3.12
- name: Setup Archery
run: pip install -e dev/archery[docker]
- name: Execute Docker Build
env:
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
run: |
source ci/scripts/util_enable_core_dumps.sh
archery docker run \
-e ARROW_FLIGHT=ON \
-e ARROW_FLIGHT_SQL=ON \
-e ARROW_GCS=ON \
-e Protobuf_SOURCE=BUNDLED \
-e gRPC_SOURCE=BUNDLED \
ubuntu-ruby
- name: Docker Push
if: >-
success() &&
github.event_name == 'push' &&
github.repository == 'apache/arrow' &&
github.ref_name == 'main'
env:
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
continue-on-error: true
shell: bash
run: archery docker push ubuntu-ruby
macos:
name: ARM64 macOS 14 GLib & Ruby
runs-on: macos-latest
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 60
env:
ARROW_BUILD_STATIC: OFF
ARROW_BUILD_TESTS: OFF
ARROW_BUILD_UTILITIES: OFF
ARROW_DATASET: ON
ARROW_FLIGHT: ON
ARROW_FLIGHT_SQL: ON
ARROW_GANDIVA: ON
ARROW_GCS: ON
ARROW_GLIB_GTK_DOC: true
ARROW_GLIB_WERROR: true
ARROW_HOME: /tmp/local
ARROW_JEMALLOC: OFF
ARROW_ORC: OFF
ARROW_PARQUET: ON
ARROW_WITH_BROTLI: ON
ARROW_WITH_LZ4: ON
ARROW_WITH_SNAPPY: ON
ARROW_WITH_ZLIB: ON
ARROW_WITH_ZSTD: ON
steps:
- name: Checkout Arrow
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: recursive
- name: Install Homebrew Dependencies
shell: bash
run: |
# pkg-config formula is deprecated but it's still installed
# in GitHub Actions runner now. We can remove this once
# pkg-config formula is removed from GitHub Actions runner.
brew uninstall pkg-config || :
brew uninstall [email protected] || :
brew bundle --file=cpp/Brewfile
brew bundle --file=c_glib/Brewfile
# For Meson.
# See also: https://github.com/mesonbuild/meson/issues/7701
echo "PKG_CONFIG=$(brew --prefix pkgconf)/bin/pkgconf" >> $GITHUB_ENV
- name: Install Ruby Dependencies
run: |
export MAKEFLAGS="-j$(sysctl -n hw.ncpu)"
bundle install --gemfile c_glib/Gemfile
bundle install --gemfile ruby/Gemfile
for ruby_package_gemfile in ruby/*/Gemfile; do \
bundle install --gemfile ${ruby_package_gemfile}
done
- name: Setup ccache
run: |
ci/scripts/ccache_setup.sh
- name: ccache info
id: ccache-info
run: |
echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT
- name: Cache ccache
uses: actions/cache@v4
with:
path: ${{ steps.ccache-info.outputs.cache-dir }}
key: ruby-ccache-macos-${{ hashFiles('cpp/**') }}
restore-keys: ruby-ccache-macos-
- name: Build C++
run: |
ci/scripts/cpp_build.sh $(pwd) $(pwd)/build
- name: Build GLib
run: |
ci/scripts/c_glib_build.sh $(pwd) $(pwd)/build
- name: Test GLib
shell: bash
run: ci/scripts/c_glib_test.sh $(pwd) $(pwd)/build
- name: Test Ruby
shell: bash
run: ci/scripts/ruby_test.sh $(pwd) $(pwd)/build
windows-mingw:
name: AMD64 Windows MinGW ${{ matrix.mingw-n-bits }} GLib & Ruby
runs-on: windows-2019
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 90
strategy:
fail-fast: false
matrix:
mingw-n-bits:
- 64
ruby-version:
- ruby
env:
ARROW_BUILD_STATIC: OFF
ARROW_BUILD_TESTS: OFF
ARROW_BUILD_UTILITIES: OFF
ARROW_BUILD_TYPE: release
ARROW_DATASET: ON
ARROW_FLIGHT: ON
ARROW_FLIGHT_SQL: ON
ARROW_GANDIVA: ON
ARROW_GCS: ON
ARROW_HDFS: OFF
ARROW_HOME: /ucrt${{ matrix.mingw-n-bits }}
ARROW_JEMALLOC: OFF
ARROW_PARQUET: ON
ARROW_PYTHON: OFF
ARROW_S3: ON
ARROW_USE_GLOG: OFF
ARROW_WITH_BROTLI: ON
ARROW_WITH_BZ2: ON
ARROW_WITH_LZ4: ON
ARROW_WITH_SNAPPY: ON
ARROW_WITH_ZLIB: ON
ARROW_WITH_ZSTD: ON
# Don't use preinstalled Boost by empty BOOST_ROOT and
# -DBoost_NO_BOOST_CMAKE=ON
BOOST_ROOT: ""
ARROW_CMAKE_ARGS: >-
-DARROW_PACKAGE_PREFIX=/ucrt${{ matrix.mingw-n-bits }}
-DBoost_NO_BOOST_CMAKE=ON
-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON
CMAKE_UNITY_BUILD: ON
steps:
- name: Disable Crash Dialogs
run: |
reg add `
"HKCU\SOFTWARE\Microsoft\Windows\Windows Error Reporting" `
/v DontShowUI `
/t REG_DWORD `
/d 1 `
/f
- name: Checkout Arrow
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: recursive
- name: Setup Ruby
uses: ruby/setup-ruby@v1
with:
ruby-version: ${{ matrix.ruby-version }}
- name: Setup MSYS2
run: |
ridk exec bash ci\scripts\msys2_setup.sh ruby
- name: Cache ccache
uses: actions/cache@v4
with:
path: ccache
key: ruby-ccache-ucrt${{ matrix.mingw-n-bits }}-${{ hashFiles('cpp/**') }}
restore-keys: ruby-ccache-ucrt${{ matrix.mingw-n-bits }}-
- name: Build C++
run: |
$Env:CMAKE_BUILD_PARALLEL_LEVEL = $Env:NUMBER_OF_PROCESSORS
$source_dir = "$(ridk exec cygpath --unix "$(Get-Location)")"
$build_dir = "$(ridk exec cygpath --unix "$(Get-Location)\build")"
$ErrorActionPreference = "Continue"
ridk exec bash ci\scripts\cpp_build.sh "${source_dir}" "${build_dir}"
- name: Build GLib
run: |
$source_dir = "$(ridk exec cygpath --unix "$(Get-Location)")"
$build_dir = "$(ridk exec cygpath --unix "$(Get-Location)\build")"
$ErrorActionPreference = "Continue"
ridk exec bash ci\scripts\c_glib_build.sh "${source_dir}" "${build_dir}"
- name: RubyGems info
id: rubygems-info
run: |
Write-Output "gem-dir=$(ridk exec gem env gemdir)" | `
Out-File -FilePath $env:GITHUB_OUTPUT -Encoding utf8 -Append
- name: Cache RubyGems
uses: actions/cache@v4
with:
path: ${{ steps.rubygems-info.outputs.gem-dir }}
key: ruby-rubygems-ucrt${{ matrix.mingw-n-bits }}-${{ hashFiles('**/Gemfile', 'ruby/*/*.gemspec') }}
restore-keys: ruby-rubygems-ucrt${{ matrix.mingw-n-bits }}-
- name: Install test dependencies
run: |
bundle install --gemfile c_glib\Gemfile
bundle install --gemfile ruby\Gemfile
Get-ChildItem ruby\*\Gemfile | `
ForEach-Object {bundle install --gemfile $_}
- name: Test GLib
run: |
$source_dir = "$(ridk exec cygpath --unix "$(Get-Location)")"
$build_dir = "$(ridk exec cygpath --unix "$(Get-Location)\build")"
$ErrorActionPreference = "Continue"
ridk exec bash ci\scripts\c_glib_test.sh "${source_dir}" "${build_dir}"
- name: Test Ruby
run: |
$Env:PKG_CONFIG_PATH = `
"$(ridk exec cygpath --absolute --windows "${Env:ARROW_HOME}/lib/pkgconfig")"
$Env:GI_TYPELIB_PATH = `
"$(ridk exec cygpath --absolute --windows "${Env:ARROW_HOME}/lib/girepository-1.0")"
$Env:RUBYOPTS = "-rdevkit"
$Env:MAKE = "ridk exec make"
$ErrorActionPreference = "Continue"
rake -f ruby\Rakefile
windows-msvc:
name: AMD64 Windows MSVC GLib
runs-on: windows-2019
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 90
strategy:
fail-fast: false
env:
ARROW_ACERO: ON
ARROW_BOOST_USE_SHARED: OFF
ARROW_BUILD_BENCHMARKS: OFF
ARROW_BUILD_SHARED: ON
ARROW_BUILD_STATIC: OFF
ARROW_BUILD_TESTS: OFF
ARROW_DATASET: ON
ARROW_DEPENDENCY_SOURCE: VCPKG
ARROW_DEPENDENCY_USE_SHARED: OFF
ARROW_FLIGHT: ON
ARROW_FLIGHT_SQL: ON
ARROW_GANDIVA: OFF
ARROW_GLIB_VAPI: "false"
ARROW_HDFS: OFF
ARROW_HOME: "${{ github.workspace }}/dist"
ARROW_JEMALLOC: OFF
ARROW_MIMALLOC: ON
ARROW_ORC: OFF
ARROW_PARQUET: ON
ARROW_SUBSTRAIT: OFF
ARROW_USE_GLOG: OFF
ARROW_VERBOSE_THIRDPARTY_BUILD: OFF
ARROW_WITH_BROTLI: OFF
ARROW_WITH_BZ2: OFF
ARROW_WITH_LZ4: OFF
ARROW_WITH_OPENTELEMETRY: OFF
ARROW_WITH_SNAPPY: ON
ARROW_WITH_ZLIB: ON
ARROW_WITH_ZSTD: ON
CMAKE_CXX_STANDARD: "17"
CMAKE_GENERATOR: Ninja
CMAKE_INSTALL_PREFIX: "${{ github.workspace }}/dist"
CMAKE_UNITY_BUILD: ON
VCPKG_BINARY_SOURCES: 'clear;nuget,GitHub,readwrite'
VCPKG_ROOT: "${{ github.workspace }}/vcpkg"
VCPKG_TRIPLET: x64-windows
permissions:
packages: write
steps:
- name: Disable Crash Dialogs
run: |
reg add `
"HKCU\SOFTWARE\Microsoft\Windows\Windows Error Reporting" `
/v DontShowUI `
/t REG_DWORD `
/d 1 `
/f
- name: Checkout Arrow
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: recursive
- name: Install vcpkg
shell: bash
run: |
ci/scripts/install_vcpkg.sh "${VCPKG_ROOT}"
- name: Install meson
run: |
python -m pip install meson
- name: Install ccache
shell: bash
run: |
ci/scripts/install_ccache.sh 4.6.3 /usr
- name: Setup ccache
shell: bash
run: |
ci/scripts/ccache_setup.sh
- name: ccache info
id: ccache-info
shell: bash
run: |
echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT
- name: Cache ccache
uses: actions/cache@v4
with:
path: ${{ steps.ccache-info.outputs.cache-dir }}
key: glib-ccache-msvc-${{ env.CACHE_VERSION }}-${{ hashFiles('cpp/**') }}
restore-keys: glib-ccache-msvc-${{ env.CACHE_VERSION }}-
env:
# We can invalidate the current cache by updating this.
CACHE_VERSION: "2024-05-09"
- name: Setup NuGet credentials for vcpkg caching
shell: bash
run: |
$(vcpkg/vcpkg.exe fetch nuget | tail -n 1) \
sources add \
-source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json" \
-storepasswordincleartext \
-name "GitHub" \
-username "$GITHUB_REPOSITORY_OWNER" \
-password "${{ secrets.GITHUB_TOKEN }}"
$(vcpkg/vcpkg.exe fetch nuget | tail -n 1) \
setapikey "${{ secrets.GITHUB_TOKEN }}" \
-source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json"
- name: Build C++ vcpkg dependencies
run: |
vcpkg\vcpkg.exe install `
--triplet $env:VCPKG_TRIPLET `
--x-manifest-root cpp `
--x-install-root build\cpp\vcpkg_installed
- name: Build C++
shell: cmd
run: |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
bash -c "ci/scripts/cpp_build.sh $(pwd) $(pwd)/build"
- name: Build GLib
shell: cmd
run: |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
bash -c "ci/scripts/c_glib_build.sh $(pwd) $(pwd)/build"