Skip to content

Commit

Permalink
Merge branch 'main' into normalize-dict-encoding-handling
Browse files Browse the repository at this point in the history
  • Loading branch information
mapleFU committed Jun 10, 2024
2 parents 36373f7 + 7179511 commit a0ae77c
Show file tree
Hide file tree
Showing 657 changed files with 27,559 additions and 18,027 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ on:
push:
paths:
- '.github/workflows/cpp.yml'
- 'ci/conda_env_*'
- 'ci/docker/**'
- 'ci/scripts/cpp_*'
- 'ci/scripts/install_azurite.sh'
Expand All @@ -35,6 +36,7 @@ on:
pull_request:
paths:
- '.github/workflows/cpp.yml'
- 'ci/conda_env_*'
- 'ci/docker/**'
- 'ci/scripts/cpp_*'
- 'ci/scripts/install_azurite.sh'
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/csharp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ jobs:
run: ci/scripts/csharp_test.sh $(pwd)

macos:
name: ARM64 macOS 14 C# ${{ matrix.dotnet }}
runs-on: macos-latest
name: AMD64 macOS 13 C# ${{ matrix.dotnet }}
runs-on: macos-13 # Pending https://github.com/pythonnet/pythonnet/issues/2396
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 15
strategy:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ jobs:
shell: bash
run: |
gem install test-unit
pip install "cython>=0.29.31" setuptools six pytest jira
pip install "cython>=0.29.31" setuptools six pytest jira setuptools-scm
- name: Run Release Test
env:
ARROW_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/issue_bot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ on:
issues:
types:
- opened
- edited

permissions:
contents: read
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/java.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,11 @@ jobs:
env:
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: |
archery docker run \
-e CI=true \
-e "GRADLE_ENTERPRISE_ACCESS_KEY=$GRADLE_ENTERPRISE_ACCESS_KEY" \
-e "DEVELOCITY_ACCESS_KEY=$DEVELOCITY_ACCESS_KEY" \
${{ matrix.image }}
- name: Docker Push
if: >-
Expand Down Expand Up @@ -127,12 +127,12 @@ jobs:
- name: Build
shell: bash
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: ci/scripts/java_build.sh $(pwd) $(pwd)/build
- name: Test
shell: bash
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: ci/scripts/java_test.sh $(pwd) $(pwd)/build

windows:
Expand All @@ -158,10 +158,10 @@ jobs:
- name: Build
shell: bash
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: ci/scripts/java_build.sh $(pwd) $(pwd)/build
- name: Test
shell: bash
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: ci/scripts/java_test.sh $(pwd) $(pwd)/build
4 changes: 2 additions & 2 deletions .github/workflows/java_jni.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,11 @@ jobs:
env:
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: |
archery docker run \
-e CI=true \
-e "GRADLE_ENTERPRISE_ACCESS_KEY=$GRADLE_ENTERPRISE_ACCESS_KEY" \
-e "DEVELOCITY_ACCESS_KEY=$DEVELOCITY_ACCESS_KEY" \
conda-python-java-integration
- name: Docker Push
if: >-
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/js.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,10 @@ jobs:
node-version: ${{ matrix.node }}
- name: Build
shell: bash
run: ci/scripts/js_build.sh $(pwd)
run: ci/scripts/js_build.sh $(pwd) build
- name: Test
shell: bash
run: ci/scripts/js_test.sh $(pwd)
run: ci/scripts/js_test.sh $(pwd) build

windows:
name: AMD64 Windows NodeJS ${{ matrix.node }}
Expand All @@ -136,7 +136,7 @@ jobs:
node-version: ${{ matrix.node }}
- name: Build
shell: bash
run: ci/scripts/js_build.sh $(pwd)
run: ci/scripts/js_build.sh $(pwd) build
- name: Test
shell: bash
run: ci/scripts/js_test.sh $(pwd)
run: ci/scripts/js_test.sh $(pwd) build
5 changes: 3 additions & 2 deletions .github/workflows/r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -370,11 +370,12 @@ jobs:
MAKEFLAGS = paste0("-j", parallel::detectCores()),
ARROW_R_DEV = TRUE,
"_R_CHECK_FORCE_SUGGESTS_" = FALSE,
"_R_CHECK_STOP_ON_INVALID_NUMERIC_VERSION_INPUTS_" = TRUE
"_R_CHECK_STOP_ON_INVALID_NUMERIC_VERSION_INPUTS_" = TRUE,
"_R_CHECK_DONTTEST_EXAMPLES_" = TRUE
)
rcmdcheck::rcmdcheck(".",
build_args = '--no-build-vignettes',
args = c('--no-manual', '--as-cran', '--ignore-vignettes', '--run-donttest'),
args = c('--no-manual', '--as-cran', '--ignore-vignettes'),
error_on = 'warning',
check_dir = 'check',
timeout = 3600
Expand Down
39 changes: 29 additions & 10 deletions .github/workflows/ruby.yml
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,7 @@ jobs:
mingw-n-bits:
- 64
ruby-version:
# TODO: Use the latest Ruby again when we fix GH-39130.
# - ruby
- "3.1"
- ruby
env:
ARROW_BUILD_STATIC: OFF
ARROW_BUILD_TESTS: OFF
Expand Down Expand Up @@ -313,15 +311,17 @@ jobs:
strategy:
fail-fast: false
env:
ARROW_ACERO: ON
ARROW_BOOST_USE_SHARED: OFF
ARROW_BUILD_BENCHMARKS: OFF
ARROW_BUILD_SHARED: ON
ARROW_BUILD_STATIC: OFF
ARROW_BUILD_TESTS: OFF
ARROW_ACERO: ON
ARROW_DATASET: ON
ARROW_FLIGHT: OFF
ARROW_FLIGHT_SQL: OFF
ARROW_DEPENDENCY_SOURCE: VCPKG
ARROW_DEPENDENCY_USE_SHARED: OFF
ARROW_FLIGHT: ON
ARROW_FLIGHT_SQL: ON
ARROW_GANDIVA: OFF
ARROW_HDFS: OFF
ARROW_HOME: "${{ github.workspace }}/dist"
Expand All @@ -337,13 +337,16 @@ jobs:
ARROW_WITH_LZ4: OFF
ARROW_WITH_OPENTELEMETRY: OFF
ARROW_WITH_SNAPPY: ON
ARROW_WITH_ZLIB: OFF
ARROW_WITH_ZLIB: ON
ARROW_WITH_ZSTD: ON
BOOST_SOURCE: BUNDLED
CMAKE_CXX_STANDARD: "17"
CMAKE_GENERATOR: Ninja
CMAKE_INSTALL_PREFIX: "${{ github.workspace }}/dist"
CMAKE_UNITY_BUILD: ON
VCPKG_BINARY_SOURCES: 'clear;nuget,GitHub,readwrite'
VCPKG_ROOT: "${{ github.workspace }}/vcpkg"
permissions:
packages: write
steps:
- name: Disable Crash Dialogs
run: |
Expand All @@ -361,7 +364,7 @@ jobs:
- name: Install vcpkg
shell: bash
run: |
ci/scripts/install_vcpkg.sh ./vcpkg
ci/scripts/install_vcpkg.sh "${VCPKG_ROOT}"
- name: Install meson
run: |
python -m pip install meson
Expand All @@ -387,6 +390,22 @@ jobs:
env:
# We can invalidate the current cache by updating this.
CACHE_VERSION: "2024-05-09"
- name: Setup NuGet credentials for vcpkg caching
shell: bash
run: |
$(vcpkg/vcpkg.exe fetch nuget | tail -n 1) \
sources add \
-source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json" \
-storepasswordincleartext \
-name "GitHub" \
-username "$GITHUB_REPOSITORY_OWNER" \
-password "${{ secrets.GITHUB_TOKEN }}"
$(vcpkg/vcpkg.exe fetch nuget | tail -n 1) \
setapikey "${{ secrets.GITHUB_TOKEN }}" \
-source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json"
- name: Build C++ vcpkg dependencies
run: |
vcpkg\vcpkg.exe install --triplet x64-windows --x-manifest-root cpp --x-install-root build\cpp\vcpkg_installed
- name: Build C++
shell: cmd
run: |
Expand All @@ -396,4 +415,4 @@ jobs:
shell: cmd
run: |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
bash -c "VCPKG_ROOT=\"$(pwd)/vcpkg\" ci/scripts/c_glib_build.sh $(pwd) $(pwd)/build"
bash -c "ci/scripts/c_glib_build.sh $(pwd) $(pwd)/build"
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,9 @@ __debug_bin
.envrc

# Develocity
.mvn/.develocity.xml
java/.mvn/.gradle-enterprise/
java/.mvn/.develocity/

# rat
filtered_rat.txt
rat.txt
29 changes: 29 additions & 0 deletions .golangci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

linters:
# Disable all linters.
# Default: false
disable-all: true
# Enable specific linter
# https://golangci-lint.run/usage/linters/#enabled-by-default
enable:
- gofmt
- goimports

issues:
fix: true
14 changes: 14 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,17 @@ repos:
'--disable',
'dangling-hyphen,line-too-long',
]
- repo: https://github.com/golangci/golangci-lint
rev: v1.59.0
hooks:
# no built-in support for multiple go.mod
# https://github.com/golangci/golangci-lint/issues/828
- id: golangci-lint-full
name: golangci-lint-full-arrow
entry: bash -c 'cd go/arrow && golangci-lint run'
- id: golangci-lint-full
name: golangci-lint-full-parquet
entry: bash -c 'cd go/parquet && golangci-lint run'
- id: golangci-lint-full
name: golangci-lint-full-internal
entry: bash -c 'cd go/internal && golangci-lint run'
37 changes: 36 additions & 1 deletion c_glib/arrow-dataset-glib/dataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <arrow-glib/error.hpp>
#include <arrow-glib/file-system.hpp>
#include <arrow-glib/reader.hpp>
#include <arrow-glib/table.hpp>

#include <arrow-dataset-glib/dataset-factory.hpp>
Expand Down Expand Up @@ -152,12 +153,46 @@ gadataset_dataset_to_table(GADatasetDataset *dataset, GError **error)
}
auto arrow_scanner = *arrow_scanner_result;
auto arrow_table_result = arrow_scanner->ToTable();
if (!garrow::check(error, arrow_scanner_result, "[dataset][to-table]")) {
if (!garrow::check(error, arrow_table_result, "[dataset][to-table]")) {
return NULL;
}
return garrow_table_new_raw(&(*arrow_table_result));
}

/**
* gadataset_dataset_to_record_batch_reader:
* @dataset: A #GADatasetDataset.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: (transfer full) (nullable):
* A #GArrowRecordBatchReader on success, %NULL on error.
*
* Since: 17.0.0
*/
GArrowRecordBatchReader *
gadataset_dataset_to_record_batch_reader(GADatasetDataset *dataset, GError **error)
{
auto arrow_dataset = gadataset_dataset_get_raw(dataset);
auto arrow_scanner_builder_result = arrow_dataset->NewScan();
if (!garrow::check(error,
arrow_scanner_builder_result,
"[dataset][to-record-batch-reader]")) {
return nullptr;
}
auto arrow_scanner_builder = *arrow_scanner_builder_result;
auto arrow_scanner_result = arrow_scanner_builder->Finish();
if (!garrow::check(error, arrow_scanner_result, "[dataset][to-record-batch-reader]")) {
return nullptr;
}
auto arrow_scanner = *arrow_scanner_result;
auto arrow_reader_result = arrow_scanner->ToRecordBatchReader();
if (!garrow::check(error, arrow_reader_result, "[dataset][to-record-batch-reader]")) {
return nullptr;
}
auto sources = g_list_prepend(nullptr, dataset);
return garrow_record_batch_reader_new_raw(&(*arrow_reader_result), sources);
}

/**
* gadataset_dataset_get_type_name:
* @dataset: A #GADatasetDataset.
Expand Down
3 changes: 3 additions & 0 deletions c_glib/arrow-dataset-glib/dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ gadataset_dataset_to_table(GADatasetDataset *dataset, GError **error);
GADATASET_AVAILABLE_IN_5_0
gchar *
gadataset_dataset_get_type_name(GADatasetDataset *dataset);
GADATASET_AVAILABLE_IN_17_0
GArrowRecordBatchReader *
gadataset_dataset_to_record_batch_reader(GADatasetDataset *dataset, GError **error);

#define GADATASET_TYPE_FILE_SYSTEM_DATASET_WRITE_OPTIONS \
(gadataset_file_system_dataset_write_options_get_type())
Expand Down
22 changes: 22 additions & 0 deletions c_glib/arrow-dataset-glib/scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,28 @@ gadataset_scanner_to_table(GADatasetScanner *scanner, GError **error)
}
}

/**
* gadataset_scanner_to_record_batch_reader:
* @scanner: A #GADatasetScanner.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: (transfer full) (nullable):
* A #GArrowRecordBatchReader on success, %NULL on error.
*
* Since: 17.0.0
*/
GArrowRecordBatchReader *
gadataset_scanner_to_record_batch_reader(GADatasetScanner *scanner, GError **error)
{
auto arrow_scanner = gadataset_scanner_get_raw(scanner);
auto arrow_reader_result = arrow_scanner->ToRecordBatchReader();
if (!garrow::check(error, arrow_reader_result, "[scanner][to-record-batch-reader]")) {
return nullptr;
}
auto sources = g_list_prepend(nullptr, scanner);
return garrow_record_batch_reader_new_raw(&(*arrow_reader_result), sources);
}

typedef struct GADatasetScannerBuilderPrivate_
{
std::shared_ptr<arrow::dataset::ScannerBuilder> scanner_builder;
Expand Down
4 changes: 4 additions & 0 deletions c_glib/arrow-dataset-glib/scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ GADATASET_AVAILABLE_IN_5_0
GArrowTable *
gadataset_scanner_to_table(GADatasetScanner *scanner, GError **error);

GADATASET_AVAILABLE_IN_17_0
GArrowRecordBatchReader *
gadataset_scanner_to_record_batch_reader(GADatasetScanner *scanner, GError **error);

#define GADATASET_TYPE_SCANNER_BUILDER (gadataset_scanner_builder_get_type())
GADATASET_AVAILABLE_IN_5_0
G_DECLARE_DERIVABLE_TYPE(
Expand Down
Loading

0 comments on commit a0ae77c

Please sign in to comment.