diff --git a/.env b/.env index c9cd6c8094ed8..014bad3fe2a7a 100644 --- a/.env +++ b/.env @@ -58,8 +58,8 @@ CUDA=11.0.3 DASK=latest DOTNET=7.0 GCC_VERSION="" -GO=1.17 -STATICCHECK=v0.2.2 +GO=1.19.13 +STATICCHECK=v0.4.5 HDFS=3.2.1 JDK=8 KARTOTHEK=latest diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index c31ad0b77c2df..3c695891b48d6 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -54,28 +54,23 @@ jobs: include: - arch-label: AMD64 arch: amd64 - go: 1.17 + go: 1.19 runs-on: ubuntu-latest - staticcheck: v0.2.2 - arch-label: AMD64 arch: amd64 - go: 1.18 + go: '1.20' runs-on: ubuntu-latest - staticcheck: v0.3.3 - arch-label: ARM64 arch: arm64v8 - go: 1.17 - staticcheck: v0.2.2 + go: 1.19 runs-on: ["self-hosted", "arm", "linux"] - arch-label: ARM64 arch: arm64v8 - go: 1.18 - staticcheck: v0.3.3 + go: '1.20' runs-on: ["self-hosted", "arm", "linux"] env: ARCH: ${{ matrix.arch }} GO: ${{ matrix.go }} - STATICCHECK: ${{ matrix.staticcheck }} steps: - name: Checkout Arrow uses: actions/checkout@v4 @@ -145,7 +140,7 @@ jobs: - name: Install Go uses: actions/setup-go@v4 with: - go-version: 1.18 + go-version: 1.19 cache: true cache-dependency-path: go/go.sum - name: Run build @@ -161,15 +156,9 @@ jobs: strategy: fail-fast: false matrix: - go: [1.17, 1.18] - include: - - go: 1.17 - staticcheck: v0.2.2 - - go: 1.18 - staticcheck: v0.3.3 + go: [1.19, '1.20'] env: GO: ${{ matrix.go }} - STATICCHECK: ${{ matrix.staticcheck }} steps: - name: Checkout Arrow uses: actions/checkout@v4 @@ -208,15 +197,9 @@ jobs: strategy: fail-fast: false matrix: - go: [1.17, 1.18] - include: - - go: 1.17 - staticcheck: v0.2.2 - - go: 1.18 - staticcheck: v0.3.3 + go: [1.19, '1.20'] env: GO: ${{ matrix.go }} - STATICCHECK: ${{ matrix.staticcheck }} steps: - name: Checkout Arrow uses: actions/checkout@v4 @@ -253,12 +236,7 @@ jobs: strategy: fail-fast: false matrix: - go: [1.17, 1.18] - include: - - go: 1.17 - staticcheck: v0.2.2 - - go: 1.18 - staticcheck: v0.3.3 + go: [1.19, '1.20'] steps: - name: Checkout Arrow uses: actions/checkout@v4 @@ -272,7 +250,10 @@ jobs: cache: true cache-dependency-path: go/go.sum - name: Install staticcheck - run: go install honnef.co/go/tools/cmd/staticcheck@${{ matrix.staticcheck }} + shell: bash + run: | + . .env + go install honnef.co/go/tools/cmd/staticcheck@${STATICCHECK} - name: Build shell: bash run: ci/scripts/go_build.sh $(pwd) @@ -288,12 +269,7 @@ jobs: strategy: fail-fast: false matrix: - go: [1.17, 1.18] - include: - - go: 1.17 - staticcheck: v0.2.2 - - go: 1.18 - staticcheck: v0.3.3 + go: [1.19, '1.20'] steps: - name: Checkout Arrow uses: actions/checkout@v4 @@ -306,8 +282,10 @@ jobs: go-version: ${{ matrix.go }} cache: true cache-dependency-path: go/go.sum - - name: Install staticcheck - run: go install honnef.co/go/tools/cmd/staticcheck@${{ matrix.staticcheck }} + - name: Install staticcheck + run: | + . .env + go install honnef.co/go/tools/cmd/staticcheck@${STATICCHECK} - name: Build shell: bash run: ci/scripts/go_build.sh $(pwd) @@ -349,12 +327,7 @@ jobs: strategy: fail-fast: false matrix: - go: [1.17, 1.18] - include: - - go: 1.17 - staticcheck: v0.2.2 - - go: 1.18 - staticcheck: v0.3.3 + go: [1.19, '1.20'] env: ARROW_GO_TESTCGO: "1" steps: @@ -373,7 +346,9 @@ jobs: shell: bash run: brew install apache-arrow pkg-config - name: Install staticcheck - run: go install honnef.co/go/tools/cmd/staticcheck@${{ matrix.staticcheck }} + run: | + . .env + go install honnef.co/go/tools/cmd/staticcheck@${STATICCHECK} - name: Add To pkg config path shell: bash run: | @@ -430,11 +405,14 @@ jobs: - name: Install go uses: actions/setup-go@v4 with: - go-version: '1.18' + go-version: '1.19' cache: true cache-dependency-path: go/go.sum - name: Install staticcheck - run: go install honnef.co/go/tools/cmd/staticcheck@v0.3.3 + shell: bash + run: | + . .env + go install honnef.co/go/tools/cmd/staticcheck@${STATICCHECK} - name: Build shell: bash run: ci/scripts/go_build.sh $(pwd) diff --git a/c_glib/test/test-array-datum.rb b/c_glib/test/test-array-datum.rb index 623e5589ce40b..1b2c9f91e2aa2 100644 --- a/c_glib/test/test-array-datum.rb +++ b/c_glib/test/test-array-datum.rb @@ -61,7 +61,7 @@ def test_false end def test_to_string - assert_equal("Array", @datum.to_s) + assert_equal("Array([\n" + " true,\n" + " false\n" + "])", @datum.to_s) end def test_value diff --git a/c_glib/test/test-chunked-array-datum.rb b/c_glib/test/test-chunked-array-datum.rb index 76317315327e8..b82f3eed8a7af 100644 --- a/c_glib/test/test-chunked-array-datum.rb +++ b/c_glib/test/test-chunked-array-datum.rb @@ -49,7 +49,7 @@ def test_false end def test_to_string - assert_equal("ChunkedArray", @datum.to_s) + assert_equal("ChunkedArray([\n" + " [\n" + " true,\n" + " false\n" + " ]\n" + "])", @datum.to_s) end def test_value diff --git a/c_glib/test/test-record-batch-datum.rb b/c_glib/test/test-record-batch-datum.rb index 33eb793ba869a..ec572e0f13023 100644 --- a/c_glib/test/test-record-batch-datum.rb +++ b/c_glib/test/test-record-batch-datum.rb @@ -49,7 +49,7 @@ def test_false end def test_to_string - assert_equal("RecordBatch", @datum.to_s) + assert_equal("RecordBatch(visible: [\n" + " true,\n" + " false\n" + " ]\n" + ")", @datum.to_s) end def test_value diff --git a/c_glib/test/test-scalar-datum.rb b/c_glib/test/test-scalar-datum.rb index 17e5d6b061cc7..32a5331518d8b 100644 --- a/c_glib/test/test-scalar-datum.rb +++ b/c_glib/test/test-scalar-datum.rb @@ -60,7 +60,7 @@ def test_false end def test_to_string - assert_equal("Scalar", @datum.to_s) + assert_equal("Scalar(true)", @datum.to_s) end def test_value diff --git a/c_glib/test/test-table-datum.rb b/c_glib/test/test-table-datum.rb index 7ff3997e88a37..c34ecf6314118 100644 --- a/c_glib/test/test-table-datum.rb +++ b/c_glib/test/test-table-datum.rb @@ -49,7 +49,16 @@ def test_false end def test_to_string - assert_equal("Table", @datum.to_s) + assert_equal("Table(visible: bool\n" + + "----\n" + + "visible:\n" + + " [\n" + + " [\n" + + " true,\n" + + " false\n" + + " ]\n" + + " ]\n" + + ")", @datum.to_s) end def test_value diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile index 43d7e7ab0b60d..a306790b5cb6d 100644 --- a/ci/docker/conda-integration.dockerfile +++ b/ci/docker/conda-integration.dockerfile @@ -24,7 +24,7 @@ ARG maven=3.5 ARG node=16 ARG yarn=1.22 ARG jdk=8 -ARG go=1.15 +ARG go=1.19.13 # Install Archery and integration dependencies COPY ci/conda_env_archery.txt /arrow/ci/ diff --git a/ci/docker/debian-11-go.dockerfile b/ci/docker/debian-11-go.dockerfile index 9f75bf23fddf2..de8186b9b8e1c 100644 --- a/ci/docker/debian-11-go.dockerfile +++ b/ci/docker/debian-11-go.dockerfile @@ -16,8 +16,8 @@ # under the License. ARG arch=amd64 -ARG go=1.17 -ARG staticcheck=v0.2.2 +ARG go=1.19 +ARG staticcheck=v0.4.5 FROM ${arch}/golang:${go}-bullseye # FROM collects all the args, get back the staticcheck version arg diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py index e4eea5c17af37..a05e25de8bdd3 100644 --- a/ci/scripts/go_bench_adapt.py +++ b/ci/scripts/go_bench_adapt.py @@ -20,7 +20,7 @@ import uuid import logging from pathlib import Path -from typing import List, Optional, Dict +from typing import List from benchadapt import BenchmarkResult from benchadapt.adapters import BenchmarkAdapter @@ -33,9 +33,9 @@ # `github_commit_info` is meant to communicate GitHub-flavored commit # information to Conbench. See -# https://github.com/conbench/conbench/blob/7c4968e631ecdc064559c86a1174a1353713b700/benchadapt/python/benchadapt/result.py#L66 +# https://github.com/conbench/conbench/blob/cf7931f/benchadapt/python/benchadapt/result.py#L66 # for a specification. -github_commit_info: Optional[Dict] = None +github_commit_info = {"repository": "https://github.com/apache/arrow"} if os.environ.get("CONBENCH_REF") == "main": # Assume GitHub Actions CI. The environment variable lookups below are @@ -53,7 +53,7 @@ # This is probably a local dev environment, for testing. In this case, it # does usually not make sense to provide commit information (not a - # controlled CI environment). Explicitly keep `github_commit_info=None` to + # controlled CI environment). Explicitly leave out "commit" and "pr_number" to # reflect that (to not send commit information). # Reflect 'local dev' scenario in run_reason. Allow user to (optionally) @@ -114,10 +114,9 @@ def _transform_results(self) -> List[BenchmarkResult]: run_reason=run_reason, github=github_commit_info, ) - if github_commit_info is not None: - parsed.run_name = ( - f"{parsed.run_reason}: {github_commit_info['commit']}" - ) + parsed.run_name = ( + f"{parsed.run_reason}: {github_commit_info.get('commit')}" + ) parsed_results.append(parsed) return parsed_results diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d343de836e528..f2906b960eba6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -533,6 +533,9 @@ set(ARROW_TESTING_PC_LIBS "") set(ARROW_TESTING_PC_REQUIRES "") # For parquet.pc. +set(PARQUET_PC_CFLAGS "") +set(PARQUET_PC_CFLAGS_PRIVATE " -DPARQUET_STATIC") +set(PARQUET_PC_REQUIRES "") set(PARQUET_PC_REQUIRES_PRIVATE "") include(ThirdpartyToolchain) diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h index 8f45f6199fbe1..3493c3146310d 100644 --- a/cpp/src/arrow/compute/api_aggregate.h +++ b/cpp/src/arrow/compute/api_aggregate.h @@ -138,7 +138,7 @@ class ARROW_EXPORT QuantileOptions : public FunctionOptions { static constexpr char const kTypeName[] = "QuantileOptions"; static QuantileOptions Defaults() { return QuantileOptions{}; } - /// quantile must be between 0 and 1 inclusive + /// probability level of quantile must be between 0 and 1 inclusive std::vector q; enum Interpolation interpolation; /// If true (the default), null values are ignored. Otherwise, if any value is null, @@ -162,7 +162,7 @@ class ARROW_EXPORT TDigestOptions : public FunctionOptions { static constexpr char const kTypeName[] = "TDigestOptions"; static TDigestOptions Defaults() { return TDigestOptions{}; } - /// quantile must be between 0 and 1 inclusive + /// probability level of quantile must be between 0 and 1 inclusive std::vector q; /// compression parameter, default 100 uint32_t delta; diff --git a/cpp/src/arrow/datum.cc b/cpp/src/arrow/datum.cc index d0b5cf62c61be..2ac230232e1b7 100644 --- a/cpp/src/arrow/datum.cc +++ b/cpp/src/arrow/datum.cc @@ -182,15 +182,15 @@ std::string Datum::ToString() const { case Datum::NONE: return "nullptr"; case Datum::SCALAR: - return "Scalar"; + return "Scalar(" + scalar()->ToString() + ")"; case Datum::ARRAY: - return "Array"; + return "Array(" + make_array()->ToString() + ")"; case Datum::CHUNKED_ARRAY: - return "ChunkedArray"; + return "ChunkedArray(" + chunked_array()->ToString() + ")"; case Datum::RECORD_BATCH: - return "RecordBatch"; + return "RecordBatch(" + record_batch()->ToString() + ")"; case Datum::TABLE: - return "Table"; + return "Table(" + table()->ToString() + ")"; default: DCHECK(false); return ""; diff --git a/cpp/src/arrow/datum.h b/cpp/src/arrow/datum.h index 57ae3731b5ccd..31b2d2274c900 100644 --- a/cpp/src/arrow/datum.h +++ b/cpp/src/arrow/datum.h @@ -301,7 +301,6 @@ struct ARROW_EXPORT Datum { bool operator==(const Datum& other) const { return Equals(other); } bool operator!=(const Datum& other) const { return !Equals(other); } - /// \brief Return a string representation of the kind of datum stored. std::string ToString() const; }; diff --git a/cpp/src/arrow/datum_test.cc b/cpp/src/arrow/datum_test.cc index 14daac6a794fc..909d2577e68fb 100644 --- a/cpp/src/arrow/datum_test.cc +++ b/cpp/src/arrow/datum_test.cc @@ -154,8 +154,8 @@ TEST(Datum, ToString) { Datum v1(arr); Datum v2(std::make_shared(1)); - ASSERT_EQ("Array", v1.ToString()); - ASSERT_EQ("Scalar", v2.ToString()); + ASSERT_EQ("Array([\n 1,\n 2,\n 3,\n 4\n])", v1.ToString()); + ASSERT_EQ("Scalar(1)", v2.ToString()); } TEST(Datum, TotalBufferSize) { diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index 8ffeab4dfbeab..3cdd24dbe9eb9 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -21,6 +21,17 @@ add_custom_target(parquet-benchmarks) add_custom_target(parquet-tests) add_dependencies(parquet-all parquet parquet-tests parquet-benchmarks) +# If libparquet.a is only built, "pkg-config --cflags --libs parquet" +# outputs build flags for static linking not shared +# linking. PARQUET_PC_* except PARQUET_PC_*_PRIVATE are for the static +# linking case. +if(NOT ARROW_BUILD_SHARED AND ARROW_BUILD_STATIC) + string(APPEND PARQUET_PC_CFLAGS "${PARQUET_PC_CFLAGS_PRIVATE}") + set(PARQUET_PC_CFLAGS_PRIVATE "") + string(APPEND PARQUET_PC_REQUIRES "${PARQUET_PC_REQUIRES_PRIVATE}") + set(PARQUET_PC_REQUIRES_PRIVATE "") +endif() + function(ADD_PARQUET_TEST REL_TEST_NAME) set(one_value_args) set(multi_value_args EXTRA_DEPENDENCIES LABELS) diff --git a/cpp/src/parquet/parquet.pc.in b/cpp/src/parquet/parquet.pc.in index 6def1c5d8b399..d7a0389c5f187 100644 --- a/cpp/src/parquet/parquet.pc.in +++ b/cpp/src/parquet/parquet.pc.in @@ -26,8 +26,8 @@ full_so_version=@ARROW_FULL_SO_VERSION@ Name: Apache Parquet Description: Apache Parquet is a columnar storage format. Version: @ARROW_VERSION@ -Requires: arrow +Requires: arrow@PARQUET_PC_REQUIRES@ Requires.private:@PARQUET_PC_REQUIRES_PRIVATE@ Libs: -L${libdir} -lparquet -Cflags: -I${includedir} -Cflags.private: -DPARQUET_STATIC +Cflags: -I${includedir}@PARQUET_PC_CFLAGS@ +Cflags.private:@PARQUET_PC_CFLAGS_PRIVATE@ diff --git a/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj b/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj index c794e1a4f5089..845f2667970e4 100644 --- a/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj +++ b/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj @@ -1,11 +1,11 @@ - netcoreapp3.1 + net6.0 - + diff --git a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj index 57bb6b6876ca8..4f785971b2849 100644 --- a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj +++ b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj @@ -5,7 +5,7 @@ - + diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj index 005714ef28a18..3a3a7d406b128 100644 --- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj +++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj @@ -5,9 +5,9 @@ - - - + + + diff --git a/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj b/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj index a81fc15bae861..35f17270e0b04 100644 --- a/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj +++ b/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj @@ -6,8 +6,8 @@ - - + + diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj index e06e0f9ef2845..fc21b06ced689 100644 --- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj +++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj @@ -8,8 +8,8 @@ - - + + diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj index dff3f1e541c08..48ba93f58b973 100644 --- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj @@ -7,8 +7,8 @@ - - + + diff --git a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj index ef7d730d2cd45..ce46466bd6ca5 100644 --- a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj +++ b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj @@ -5,7 +5,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj index 7f5a726ee5f03..6dd816ac73e86 100644 --- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj @@ -7,8 +7,8 @@ - - + + diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj index d6dc25d6b5e20..e7af9e2246276 100644 --- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj +++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj @@ -11,8 +11,8 @@ - - + + all runtime; build; native; contentfiles; analyzers diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index ce31b497c1fab..77b996766f78c 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -24,7 +24,7 @@ # - JDK >=7 # - gcc >= 4.8 # - Node.js >= 11.12 (best way is to use nvm) -# - Go >= 1.17 +# - Go >= 1.19 # - Docker # # If using a non-system Boost, set BOOST_ROOT and add Boost libraries to @@ -405,7 +405,7 @@ install_go() { return 0 fi - local version=1.17.13 + local version=1.19.13 show_info "Installing go version ${version}..." local arch="$(uname -m)" @@ -422,8 +422,9 @@ install_go() { fi local archive="go${version}.${os}-${arch}.tar.gz" - curl -sLO https://dl.google.com/go/$archive + curl -sLO https://go.dev/dl/$archive + ls -l local prefix=${ARROW_TMPDIR}/go mkdir -p $prefix tar -xzf $archive -C $prefix @@ -860,12 +861,12 @@ test_go() { show_header "Build and test Go libraries" maybe_setup_go || exit 1 - maybe_setup_conda compilers go=1.17 || exit 1 + maybe_setup_conda compilers go=1.19 || exit 1 pushd go go get -v ./... go test ./... - go install ./... + go install -buildvcs=false ./... go clean -modcache popd } diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in index 8c1bab8d058da..f08fc05bfc3ad 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/control.in +++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in @@ -317,7 +317,7 @@ Multi-Arch: same Depends: ${misc:Depends}, libglib2.0-dev, - libarrow-dev (= ${binary:Version}), + libarrow-acero-dev (= ${binary:Version}), libarrow-glib1400 (= ${binary:Version}), gir1.2-arrow-1.0 (= ${binary:Version}) Suggests: libarrow-glib-doc diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index 6a87e19cd3091..4691f9e5439da 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -562,7 +562,7 @@ This package contains the libraries for Apache Arrow GLib. %package glib-devel Summary: Libraries and header files for Apache Arrow GLib License: Apache-2.0 -Requires: %{name}-devel = %{version}-%{release} +Requires: %{name}-acero-devel = %{version}-%{release} Requires: %{name}%{major_version}-glib-libs = %{version}-%{release} Requires: glib2-devel Requires: gobject-introspection-devel diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile index 513a63fee8128..b1e1630103c34 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile @@ -18,15 +18,12 @@ ARG FROM=quay.io/centos/centos:stream9 FROM ${FROM} -ENV SCL=gcc-toolset-12 - ARG DEBUG RUN \ quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \ dnf install -y ${quiet} epel-release && \ dnf install --enablerepo=crb -y ${quiet} \ - ${SCL} \ bison \ boost-devel \ brotli-devel \ @@ -46,7 +43,6 @@ RUN \ libarchive \ libzstd-devel \ llvm-devel \ - llvm-static \ lz4-devel \ make \ ncurses-devel \ @@ -65,11 +61,3 @@ RUN \ vala \ zlib-devel && \ dnf clean ${quiet} all - -# Workaround: We can remove this once redhat-rpm-config uses "annobin" -# not "gcc-annobin". -RUN \ - sed \ - -i \ - -e 's/gcc-annobin/annobin/g' \ - /usr/lib/rpm/redhat/redhat-annobin-select-gcc-built-plugin diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja index 06b9390c0f974..faf77a1168d1b 100644 --- a/dev/tasks/macros.jinja +++ b/dev/tasks/macros.jinja @@ -25,9 +25,9 @@ on: - "*-github-*" {% endmacro %} -{%- macro github_checkout_arrow(fetch_depth=1, submodules="recursive") -%} +{%- macro github_checkout_arrow(fetch_depth=1, submodules="recursive", action_v="4") -%} - name: Checkout Arrow - uses: actions/checkout@v4 + uses: actions/checkout@v{{ action_v }} with: fetch-depth: {{ fetch_depth }} path: arrow diff --git a/dev/tasks/r/github.macos.autobrew.yml b/dev/tasks/r/github.macos.autobrew.yml index 28733dbfef148..b8e23690e2090 100644 --- a/dev/tasks/r/github.macos.autobrew.yml +++ b/dev/tasks/r/github.macos.autobrew.yml @@ -34,7 +34,7 @@ jobs: - "{{ macros.r_release.ver }}" - "{{ macros.r_oldrel.ver }}" steps: - {{ macros.github_checkout_arrow()|indent }} + {{ macros.github_checkout_arrow(action_v='3')|indent }} - name: Configure autobrew script run: | # minio and sccache are pre-installed on the self-hosted 10.13 runner diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index e3e3d34e156dc..dbe21ffb6b160 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -262,7 +262,8 @@ jobs: # Get the arrow checkout just for the docker config scripts # Don't need submodules for this (hence false arg to macro): they fail on # actions/checkout for some reason in this context - {{ macros.github_checkout_arrow(1, false)|indent }} + {{ macros.github_checkout_arrow(1, false, '3')|indent }} + - name: Install system requirements env: ARROW_R_DEV: "TRUE" # To install curl/openssl in r_docker_configure.sh diff --git a/docs/source/java/memory.rst b/docs/source/java/memory.rst index af6c0abc7c82a..036befa148692 100644 --- a/docs/source/java/memory.rst +++ b/docs/source/java/memory.rst @@ -133,7 +133,7 @@ Development Guidelines Applications should generally: * Use the BufferAllocator interface in APIs instead of RootAllocator. -* Create one RootAllocator at the start of the program. +* Create one RootAllocator at the start of the program and explicitly pass it when needed. * ``close()`` allocators after use (whether they are child allocators or the RootAllocator), either manually or preferably via a try-with-resources statement. @@ -288,6 +288,53 @@ Finally, enabling the ``TRACE`` logging level will automatically provide this st | at RootAllocator.close (RootAllocator.java:29) | at (#8:1) +Sometimes, explicitly passing allocators around is difficult. For example, it +can be hard to pass around extra state, like an allocator, through layers of +existing application or framework code. A global or singleton allocator instance +can be useful here, though it should not be your first choice. + +How this works: + +1. Set up a global allocator in a singleton class. +2. Provide methods to create child allocators from the global allocator. +3. Give child allocators proper names to make it easier to figure out where + allocations occurred in case of errors. +4. Ensure that resources are properly closed. +5. Check that the global allocator is empty at some suitable point, such as + right before program shutdown. +6. If it is not empty, review the above allocation bugs. + +.. code-block:: java + + //1 + private static final BufferAllocator allocator = new RootAllocator(); + private static final AtomicInteger childNumber = new AtomicInteger(0); + ... + //2 + public static BufferAllocator getChildAllocator() { + return allocator.newChildAllocator(nextChildName(), 0, Long.MAX_VALUE); + } + ... + //3 + private static String nextChildName() { + return "Allocator-Child-" + childNumber.incrementAndGet(); + } + ... + //4: Business code + try (BufferAllocator allocator = GlobalAllocator.getChildAllocator()) { + ... + } + ... + //5 + public static void checkGlobalCleanUpResources() { + ... + if (!allocator.getChildAllocators().isEmpty()) { + throw new IllegalStateException(...); + } else if (allocator.getAllocatedMemory() != 0) { + throw new IllegalStateException(...); + } + } + .. _`ArrowBuf`: https://arrow.apache.org/docs/java/reference/org/apache/arrow/memory/ArrowBuf.html .. _`ArrowBuf.print()`: https://arrow.apache.org/docs/java/reference/org/apache/arrow/memory/ArrowBuf.html#print-java.lang.StringBuilder-int-org.apache.arrow.memory.BaseAllocator.Verbosity- .. _`BufferAllocator`: https://arrow.apache.org/docs/java/reference/org/apache/arrow/memory/BufferAllocator.html diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go index aada095d099b8..58d4a0f4b8895 100644 --- a/go/arrow/array/builder.go +++ b/go/arrow/array/builder.go @@ -86,6 +86,9 @@ type Builder interface { // IsNull returns if a previously appended value at a given index is null or not. IsNull(i int) bool + // SetNull sets the value at index i to null. + SetNull(i int) + UnsafeAppendBoolToBitmap(bool) init(capacity int) @@ -126,6 +129,13 @@ func (b *builder) IsNull(i int) bool { return b.nullBitmap.Len() != 0 && bitutil.BitIsNotSet(b.nullBitmap.Bytes(), i) } +func (b *builder) SetNull(i int) { + if i < 0 || i >= b.length { + panic("arrow/array: index out of range") + } + bitutil.ClearBit(b.nullBitmap.Bytes(), i) +} + func (b *builder) init(capacity int) { toAlloc := bitutil.CeilByte(capacity) / 8 b.nullBitmap = memory.NewResizableBuffer(b.mem) diff --git a/go/arrow/array/builder_test.go b/go/arrow/array/builder_test.go index eeb7a2ac46b3f..3cacb54f725e7 100644 --- a/go/arrow/array/builder_test.go +++ b/go/arrow/array/builder_test.go @@ -97,3 +97,27 @@ func TestBuilder_IsNull(t *testing.T) { assert.Equal(t, i%2 != 0, b.IsNull(i)) } } + +func TestBuilder_SetNull(t *testing.T) { + b := &builder{mem: memory.NewGoAllocator()} + n := 32 + b.init(n) + + for i := 0; i < n; i++ { + // Set everything to true + b.UnsafeAppendBoolToBitmap(true) + } + for i := 0; i < n; i++ { + if i%2 == 0 { // Set all even numbers to null + b.SetNull(i) + } + } + + for i := 0; i < n; i++ { + if i%2 == 0 { + assert.True(t, b.IsNull(i)) + } else { + assert.False(t, b.IsNull(i)) + } + } +} diff --git a/go/arrow/array/map.go b/go/arrow/array/map.go index 84c1b55a0cf21..4fe860f26ef61 100644 --- a/go/arrow/array/map.go +++ b/go/arrow/array/map.go @@ -234,6 +234,10 @@ func (b *MapBuilder) AppendNulls(n int) { } } +func (b *MapBuilder) SetNull(i int) { + b.listBuilder.SetNull(i) +} + func (b *MapBuilder) AppendEmptyValue() { b.Append(true) } diff --git a/go/arrow/array/map_test.go b/go/arrow/array/map_test.go index cfb1cac87bedc..3fe78549ec803 100644 --- a/go/arrow/array/map_test.go +++ b/go/arrow/array/map_test.go @@ -217,3 +217,38 @@ func TestMapStringRoundTrip(t *testing.T) { assert.True(t, array.Equal(arr, arr1)) } + +func TestMapBuilder_SetNull(t *testing.T) { + pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer pool.AssertSize(t, 0) + + var ( + arr *array.Map + equalValid = []bool{true, true, true, true, true, true, true} + equalOffsets = []int32{0, 1, 2, 5, 6, 7, 8, 10} + equalKeys = []string{"a", "a", "a", "b", "c", "a", "a", "a", "a", "b"} + equalValues = []int32{1, 2, 3, 4, 5, 2, 2, 2, 5, 6} + ) + + bldr := array.NewMapBuilder(pool, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false) + defer bldr.Release() + + kb := bldr.KeyBuilder().(*array.StringBuilder) + ib := bldr.ItemBuilder().(*array.Int32Builder) + + bldr.AppendValues(equalOffsets, equalValid) + for _, k := range equalKeys { + kb.Append(k) + } + ib.AppendValues(equalValues, nil) + + bldr.SetNull(0) + bldr.SetNull(3) + + arr = bldr.NewMapArray() + defer arr.Release() + + assert.True(t, arr.IsNull(0)) + assert.True(t, arr.IsValid(1)) + assert.True(t, arr.IsNull(3)) +} diff --git a/go/arrow/compute/executor.go b/go/arrow/compute/executor.go index ac87d063915b7..6da7ed1293065 100644 --- a/go/arrow/compute/executor.go +++ b/go/arrow/compute/executor.go @@ -1007,6 +1007,9 @@ func (v *vectorExecutor) WrapResults(ctx context.Context, out <-chan Datum, hasC case <-ctx.Done(): return nil case output = <-out: + if output == nil { + return nil + } // if the inputs contained at least one chunked array // then we want to return chunked output if hasChunked { diff --git a/go/arrow/compute/exprs/builders_test.go b/go/arrow/compute/exprs/builders_test.go index 9aaa4a2c4f9e4..e42d7569a8f03 100644 --- a/go/arrow/compute/exprs/builders_test.go +++ b/go/arrow/compute/exprs/builders_test.go @@ -37,7 +37,7 @@ func TestNewScalarFunc(t *testing.T) { require.NoError(t, err) assert.Equal(t, "add(i32(1), i32(10), {overflow: [ERROR]}) => i32", fn.String()) - assert.Equal(t, "add:i32_i32", fn.Name()) + assert.Equal(t, "add:i32_i32", fn.CompoundName()) } func TestFieldRefDotPath(t *testing.T) { diff --git a/go/arrow/memory/go_allocator.go b/go/arrow/memory/go_allocator.go index 1dea4a8d23385..1017eb688d2ff 100644 --- a/go/arrow/memory/go_allocator.go +++ b/go/arrow/memory/go_allocator.go @@ -32,10 +32,9 @@ func (a *GoAllocator) Allocate(size int) []byte { } func (a *GoAllocator) Reallocate(size int, b []byte) []byte { - if size == len(b) { - return b + if cap(b) >= size { + return b[:size] } - newBuf := a.Allocate(size) copy(newBuf, b) return newBuf diff --git a/go/go.mod b/go/go.mod index 46c093ed1ece2..a5581eb3925ca 100644 --- a/go/go.mod +++ b/go/go.mod @@ -20,60 +20,60 @@ go 1.20 require ( github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c - github.com/andybalholm/brotli v1.0.4 - github.com/apache/thrift v0.16.0 + github.com/andybalholm/brotli v1.0.5 + github.com/apache/thrift v0.17.0 github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815 - github.com/goccy/go-json v0.10.0 + github.com/goccy/go-json v0.10.2 github.com/golang/snappy v0.0.4 - github.com/google/flatbuffers v23.1.21+incompatible + github.com/google/flatbuffers v23.5.26+incompatible github.com/klauspost/asmfmt v1.3.2 - github.com/klauspost/compress v1.15.15 - github.com/klauspost/cpuid/v2 v2.2.3 + github.com/klauspost/compress v1.16.7 + github.com/klauspost/cpuid/v2 v2.2.5 github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 - github.com/pierrec/lz4/v4 v4.1.17 - github.com/stretchr/testify v1.8.1 + github.com/pierrec/lz4/v4 v4.1.18 + github.com/stretchr/testify v1.8.4 github.com/zeebo/xxh3 v1.0.2 - golang.org/x/exp v0.0.0-20230206171751-46f607a40771 - golang.org/x/sync v0.1.0 - golang.org/x/sys v0.5.0 - golang.org/x/tools v0.6.0 + golang.org/x/exp v0.0.0-20230905200255-921286631fa9 + golang.org/x/sync v0.3.0 + golang.org/x/sys v0.12.0 + golang.org/x/tools v0.13.0 golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 gonum.org/v1/gonum v0.12.0 - google.golang.org/grpc v1.53.0 - google.golang.org/protobuf v1.28.1 - modernc.org/sqlite v1.20.4 + google.golang.org/grpc v1.54.0 + google.golang.org/protobuf v1.31.0 + modernc.org/sqlite v1.21.2 ) require ( github.com/google/uuid v1.3.0 - github.com/substrait-io/substrait-go v0.2.1-0.20230517203920-30fa08bd57d0 + github.com/substrait-io/substrait-go v0.4.2 ) require ( - github.com/alecthomas/participle/v2 v2.0.0 // indirect + github.com/alecthomas/participle/v2 v2.1.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dustin/go-humanize v1.0.1 // indirect - github.com/fatih/color v1.13.0 // indirect - github.com/goccy/go-yaml v1.9.8 // indirect - github.com/golang/protobuf v1.5.2 // indirect + github.com/fatih/color v1.15.0 // indirect + github.com/goccy/go-yaml v1.11.0 // indirect + github.com/golang/protobuf v1.5.3 // indirect github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect github.com/kr/text v0.2.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect - github.com/mattn/go-isatty v0.0.17 // indirect + github.com/mattn/go-isatty v0.0.19 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rogpeppe/go-internal v1.9.0 // indirect github.com/stretchr/objx v0.5.0 // indirect - golang.org/x/mod v0.8.0 // indirect - golang.org/x/net v0.7.0 // indirect - golang.org/x/text v0.7.0 // indirect - google.golang.org/genproto v0.0.0-20230209215440-0dfe4f8abfcc // indirect + golang.org/x/mod v0.12.0 // indirect + golang.org/x/net v0.15.0 // indirect + golang.org/x/text v0.13.0 // indirect + google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - lukechampine.com/uint128 v1.2.0 // indirect + lukechampine.com/uint128 v1.3.0 // indirect modernc.org/cc/v3 v3.40.0 // indirect modernc.org/ccgo/v3 v3.16.13 // indirect - modernc.org/libc v1.22.2 // indirect + modernc.org/libc v1.22.4 // indirect modernc.org/mathutil v1.5.0 // indirect modernc.org/memory v1.5.0 // indirect modernc.org/opt v0.1.3 // indirect diff --git a/go/go.sum b/go/go.sum index 0ccd809f50fae..609cf7173ef98 100644 --- a/go/go.sum +++ b/go/go.sum @@ -1,13 +1,13 @@ github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU= github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk= -github.com/alecthomas/assert/v2 v2.2.2 h1:Z/iVC0xZfWTaFNE6bA3z07T86hd45Xe2eLt6WVy2bbk= -github.com/alecthomas/participle/v2 v2.0.0 h1:Fgrq+MbuSsJwIkw3fEj9h75vDP0Er5JzepJ0/HNHv0g= -github.com/alecthomas/participle/v2 v2.0.0/go.mod h1:rAKZdJldHu8084ojcWevWAL8KmEU+AT+Olodb+WoN2Y= +github.com/alecthomas/assert/v2 v2.3.0 h1:mAsH2wmvjsuvyBvAmCtm7zFsBlb8mIHx5ySLVdDZXL0= +github.com/alecthomas/participle/v2 v2.1.0 h1:z7dElHRrOEEq45F2TG5cbQihMtNTv8vwldytDj7Wrz4= +github.com/alecthomas/participle/v2 v2.1.0/go.mod h1:Y1+hAs8DHPmc3YUFzqllV+eSQ9ljPTk0ZkPMtEdAx2c= github.com/alecthomas/repr v0.2.0 h1:HAzS41CIzNW5syS8Mf9UwXhNH1J9aix/BvDRf1Ml2Yk= -github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= -github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= -github.com/apache/thrift v0.16.0 h1:qEy6UW60iVOlUy+b9ZR0d5WzUWYGOo4HfopoyBaNmoY= -github.com/apache/thrift v0.16.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU= +github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= +github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/apache/thrift v0.17.0 h1:cMd2aj52n+8VoAtvSvLn4kDC3aZ6IAkBuqWQ2IDu7wo= +github.com/apache/thrift v0.17.0/go.mod h1:OLxhMRJxomX+1I/KUw03qoV3mMz16BwaKI+d4fPBx7Q= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -16,28 +16,22 @@ github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815 h1:bWDMxwH3px2JBh github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= -github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= -github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= -github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= -github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= +github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= github.com/go-playground/locales v0.13.0 h1:HyWk6mgj5qFqCT5fjGBuRArbVDfE4hi8+e8ceBS/t7Q= -github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD876Lmtgy7VtROAbHHXk8no= -github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= github.com/go-playground/validator/v10 v10.4.1 h1:pH2c5ADXtd66mxoE0Zm9SUhxE20r7aM3F26W0hOn+GE= -github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4= -github.com/goccy/go-json v0.10.0 h1:mXKd9Qw4NuzShiRlOXKews24ufknHO7gx30lsDyokKA= -github.com/goccy/go-json v0.10.0/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= -github.com/goccy/go-yaml v1.9.8 h1:5gMyLUeU1/6zl+WFfR1hN7D2kf+1/eRGa7DFtToiBvQ= -github.com/goccy/go-yaml v1.9.8/go.mod h1:JubOolP3gh0HpiBc4BLRD4YmjEjHAmIIB2aaXKkTfoE= -github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= +github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= +github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/goccy/go-yaml v1.11.0 h1:n7Z+zx8S9f9KgzG6KtQKf+kwqXZlLNR2F6018Dgau54= +github.com/goccy/go-yaml v1.11.0/go.mod h1:H+mJrWtjPTJAHvRbV09MCK9xYwODM+wRTVFFTWckfng= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= -github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/google/flatbuffers v23.1.21+incompatible h1:bUqzx/MXCDxuS0hRJL2EfjyZL3uQrPbMocUa8zGqsTA= -github.com/google/flatbuffers v23.1.21+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/flatbuffers v23.5.26+incompatible h1:M9dgRyhJemaM4Sw8+66GHBu8ioaQmyPLg1b8VwK5WJg= +github.com/google/flatbuffers v23.5.26+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ= @@ -48,31 +42,26 @@ github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNU github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= -github.com/klauspost/compress v1.15.15 h1:EF27CXIuDsYJ6mmvtBRlEuB2UVOqHG1tAXgZ7yIO+lw= -github.com/klauspost/compress v1.15.15/go.mod h1:ZcK2JAFqKOpnBlxcLsJzYfrS9X1akm9fHZNnD9+Vo/4= -github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU= -github.com/klauspost/cpuid/v2 v2.2.3/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= +github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= +github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= +github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y= -github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= -github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= -github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= -github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= -github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng= -github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI= +github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= +github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-sqlite3 v1.14.16 h1:yOQRA0RpS5PFz/oikGwBEqvAWhWg5ufRz4ETLjwpU1Y= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= -github.com/pierrec/lz4/v4 v4.1.17 h1:kV4Ip+/hUBC+8T6+2EgburRtkE9ef4nbY3f4dFhGjMc= -github.com/pierrec/lz4/v4 v4.1.17/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ= +github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= @@ -84,93 +73,72 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/substrait-io/substrait-go v0.2.1-0.20230517203920-30fa08bd57d0 h1:ULhfcCHY7uxA133qmInVpNpqfjyicryPXIaxCjbDVbw= -github.com/substrait-io/substrait-go v0.2.1-0.20230517203920-30fa08bd57d0/go.mod h1:qhpnLmrcvAnlZsUyPXZRqldiHapPTXC3t7xFgDi3aQg= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/substrait-io/substrait-go v0.4.2 h1:buDnjsb3qAqTaNbOR7VKmNgXf4lYQxWEcnSGUWBtmN8= +github.com/substrait-io/substrait-go v0.4.2/go.mod h1:qhpnLmrcvAnlZsUyPXZRqldiHapPTXC3t7xFgDi3aQg= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.1.0 h1:MDRAIl0xIo9Io2xV565hzXHw3zVseKrJKodhohM5CjU= -golang.org/x/exp v0.0.0-20230206171751-46f607a40771 h1:xP7rWLUr1e1n2xkK5YB4LI0hPEy3LJC6Wk+D4pGlOJg= -golang.org/x/exp v0.0.0-20230206171751-46f607a40771/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8= -golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= -golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= -golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220406163625-3f8b81556e12/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/crypto v0.13.0 h1:mvySKfSWJ+UKUii46M40LOvyWfN0s2U+46/jDd0e6Ck= +golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g= +golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k= +golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= +golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/net v0.15.0 h1:ugBLEUaxABaB5AJqW9enI0ACdci2RUd4eP51NTBvuJ8= +golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= +golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= +golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= -golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM= -golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o= +golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/tools v0.13.0 h1:Iey4qkscZuv0VvIt8E0neZjtPVQFSc870HQ448QgEmQ= +golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk= golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= gonum.org/v1/gonum v0.12.0 h1:xKuo6hzt+gMav00meVPUlXwSdoEJP46BR+wdxQEFK2o= gonum.org/v1/gonum v0.12.0/go.mod h1:73TDxJfAAHeA8Mk9mf8NlIppyhQNo5GLTcYeqgo2lvY= -google.golang.org/genproto v0.0.0-20230209215440-0dfe4f8abfcc h1:ijGwO+0vL2hJt5gaygqP2j6PfflOBrRot0IczKbmtio= -google.golang.org/genproto v0.0.0-20230209215440-0dfe4f8abfcc/go.mod h1:RGgjbofJ8xD9Sq1VVhDM1Vok1vRONV+rg+CjzG4SZKM= -google.golang.org/grpc v1.53.0 h1:LAv2ds7cmFV/XTS3XG1NneeENYrXGmorPxsBbptIjNc= -google.golang.org/grpc v1.53.0/go.mod h1:OnIrk0ipVdj4N5d9IUoFUx72/VlD7+jUsHwZgwSMQpw= +google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 h1:KpwkzHKEF7B9Zxg18WzOa7djJ+Ha5DzthMyZYQfEn2A= +google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1/go.mod h1:nKE/iIaLqn2bQwXBg8f1g2Ylh6r5MN5CmZvuzZCgsCU= +google.golang.org/grpc v1.54.0 h1:EhTqbhiYeixwWQtAEZAxmV9MGqcjEU2mFx52xCzNyag= +google.golang.org/grpc v1.54.0/go.mod h1:PUSEXI6iWghWaB6lXM4knEgpJNu2qUcKfDtNci3EC2g= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= -google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= +google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -lukechampine.com/uint128 v1.2.0 h1:mBi/5l91vocEN8otkC5bDLhi2KdCticRiwbdB0O+rjI= -lukechampine.com/uint128 v1.2.0/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk= +lukechampine.com/uint128 v1.3.0 h1:cDdUVfRwDUDovz610ABgFD17nXD4/uDgVHl2sC3+sbo= +lukechampine.com/uint128 v1.3.0/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk= modernc.org/cc/v3 v3.40.0 h1:P3g79IUS/93SYhtoeaHW+kRCIrYaxJ27MFPv+7kaTOw= modernc.org/cc/v3 v3.40.0/go.mod h1:/bTg4dnWkSXowUO6ssQKnOV0yMVxDYNIsIrzqTFDGH0= modernc.org/ccgo/v3 v3.16.13 h1:Mkgdzl46i5F/CNR/Kj80Ri59hC8TKAhZrYSaqvkwzUw= modernc.org/ccgo/v3 v3.16.13/go.mod h1:2Quk+5YgpImhPjv2Qsob1DnZ/4som1lJTodubIcoUkY= modernc.org/ccorpus v1.11.6 h1:J16RXiiqiCgua6+ZvQot4yUuUy8zxgqbqEEUuGPlISk= modernc.org/httpfs v1.0.6 h1:AAgIpFZRXuYnkjftxTAZwMIiwEqAfk8aVB2/oA6nAeM= -modernc.org/libc v1.22.2 h1:4U7v51GyhlWqQmwCHj28Rdq2Yzwk55ovjFrdPjs8Hb0= -modernc.org/libc v1.22.2/go.mod h1:uvQavJ1pZ0hIoC/jfqNoMLURIMhKzINIWypNM17puug= +modernc.org/libc v1.22.4 h1:wymSbZb0AlrjdAVX3cjreCHTPCpPARbQXNz6BHPzdwQ= +modernc.org/libc v1.22.4/go.mod h1:jj+Z7dTNX8fBScMVNRAYZ/jF91K8fdT2hYMThc3YjBY= modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ= modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= modernc.org/memory v1.5.0 h1:N+/8c5rE6EqugZwHii4IFsaJ7MUhoWX07J5tC/iI5Ds= modernc.org/memory v1.5.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU= modernc.org/opt v0.1.3 h1:3XOZf2yznlhC+ibLltsDGzABUGVx8J6pnFMS3E4dcq4= modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= -modernc.org/sqlite v1.20.4 h1:J8+m2trkN+KKoE7jglyHYYYiaq5xmz2HoHJIiBlRzbE= -modernc.org/sqlite v1.20.4/go.mod h1:zKcGyrICaxNTMEHSr1HQ2GUraP0j+845GYw37+EyT6A= +modernc.org/sqlite v1.21.2 h1:ixuUG0QS413Vfzyx6FWx6PYTmHaOegTY+hjzhn7L+a0= +modernc.org/sqlite v1.21.2/go.mod h1:cxbLkB5WS32DnQqeH4h4o1B0eMr8W/y8/RGuxQ3JsC0= modernc.org/strutil v1.1.3 h1:fNMm+oJklMGYfU9Ylcywl0CO5O6nTfaowNsh2wpPjzY= modernc.org/strutil v1.1.3/go.mod h1:MEHNA7PdEnEwLvspRMtWTNnp2nnyvMfkimT1NKNAGbw= -modernc.org/tcl v1.15.0 h1:oY+JeD11qVVSgVvodMJsu7Edf8tr5E/7tuhF5cNYz34= +modernc.org/tcl v1.15.1 h1:mOQwiEK4p7HruMZcwKTZPw/aqtGM4aY00uzWhlKKYws= modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= modernc.org/z v1.7.0 h1:xkDw/KepgEjeizO2sNco+hqYkU12taxQFqPEmgm1GWE= diff --git a/go/internal/hashing/hash_string_go1.19.go b/go/internal/hashing/hash_string_go1.19.go index c496f43abdcc6..f38eb5c523dde 100644 --- a/go/internal/hashing/hash_string_go1.19.go +++ b/go/internal/hashing/hash_string_go1.19.go @@ -24,7 +24,14 @@ import ( ) func hashString(val string, alg uint64) uint64 { - buf := *(*[]byte)(unsafe.Pointer(&val)) - (*reflect.SliceHeader)(unsafe.Pointer(&buf)).Cap = len(val) + if val == "" { + return Hash([]byte{}, alg) + } + // highly efficient way to get byte slice without copy before + // the introduction of unsafe.StringData in go1.20 + // (https://stackoverflow.com/questions/59209493/how-to-use-unsafe-get-a-byte-slice-from-a-string-without-memory-copy) + const MaxInt32 = 1<<31 - 1 + buf := (*[MaxInt32]byte)(unsafe.Pointer((*reflect.StringHeader)( + unsafe.Pointer(&val)).Data))[: len(val)&MaxInt32 : len(val)&MaxInt32] return Hash(buf, alg) } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java index c418219170380..99d66f94261ee 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java @@ -19,15 +19,31 @@ import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; +import java.math.BigDecimal; +import java.time.Duration; + import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.memory.util.ByteFunctionHelpers; import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.DateMilliVector; +import org.apache.arrow.vector.Decimal256Vector; +import org.apache.arrow.vector.DecimalVector; +import org.apache.arrow.vector.DurationVector; import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.IntervalDayVector; +import org.apache.arrow.vector.IntervalMonthDayNanoVector; import org.apache.arrow.vector.SmallIntVector; +import org.apache.arrow.vector.TimeMicroVector; +import org.apache.arrow.vector.TimeMilliVector; +import org.apache.arrow.vector.TimeNanoVector; +import org.apache.arrow.vector.TimeSecVector; +import org.apache.arrow.vector.TimeStampVector; import org.apache.arrow.vector.TinyIntVector; import org.apache.arrow.vector.UInt1Vector; import org.apache.arrow.vector.UInt2Vector; @@ -69,6 +85,32 @@ public static VectorValueComparator createDefaultComp return (VectorValueComparator) new UInt4Comparator(); } else if (vector instanceof UInt8Vector) { return (VectorValueComparator) new UInt8Comparator(); + } else if (vector instanceof BitVector) { + return (VectorValueComparator) new BitComparator(); + } else if (vector instanceof DateDayVector) { + return (VectorValueComparator) new DateDayComparator(); + } else if (vector instanceof DateMilliVector) { + return (VectorValueComparator) new DateMilliComparator(); + } else if (vector instanceof Decimal256Vector) { + return (VectorValueComparator) new Decimal256Comparator(); + } else if (vector instanceof DecimalVector) { + return (VectorValueComparator) new DecimalComparator(); + } else if (vector instanceof DurationVector) { + return (VectorValueComparator) new DurationComparator(); + } else if (vector instanceof IntervalDayVector) { + return (VectorValueComparator) new IntervalDayComparator(); + } else if (vector instanceof IntervalMonthDayNanoVector) { + throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName()); + } else if (vector instanceof TimeMicroVector) { + return (VectorValueComparator) new TimeMicroComparator(); + } else if (vector instanceof TimeMilliVector) { + return (VectorValueComparator) new TimeMilliComparator(); + } else if (vector instanceof TimeNanoVector) { + return (VectorValueComparator) new TimeNanoComparator(); + } else if (vector instanceof TimeSecVector) { + return (VectorValueComparator) new TimeSecComparator(); + } else if (vector instanceof TimeStampVector) { + return (VectorValueComparator) new TimeStampComparator(); } } else if (vector instanceof BaseVariableWidthVector) { return (VectorValueComparator) new VariableWidthComparator(); @@ -345,6 +387,293 @@ public VectorValueComparator createNew() { } } + /** + * Default comparator for bit type. + * The comparison is based on values, with null comes first. + */ + public static class BitComparator extends VectorValueComparator { + + public BitComparator() { + super(-1); + } + + @Override + public int compareNotNull(int index1, int index2) { + boolean value1 = vector1.get(index1) != 0; + boolean value2 = vector2.get(index2) != 0; + + return Boolean.compare(value1, value2); + } + + @Override + public VectorValueComparator createNew() { + return new BitComparator(); + } + } + + /** + * Default comparator for DateDay type. + * The comparison is based on values, with null comes first. + */ + public static class DateDayComparator extends VectorValueComparator { + + public DateDayComparator() { + super(DateDayVector.TYPE_WIDTH); + } + + @Override + public int compareNotNull(int index1, int index2) { + int value1 = vector1.get(index1); + int value2 = vector2.get(index2); + return Integer.compare(value1, value2); + } + + @Override + public VectorValueComparator createNew() { + return new DateDayComparator(); + } + } + + /** + * Default comparator for DateMilli type. + * The comparison is based on values, with null comes first. + */ + public static class DateMilliComparator extends VectorValueComparator { + + public DateMilliComparator() { + super(DateMilliVector.TYPE_WIDTH); + } + + @Override + public int compareNotNull(int index1, int index2) { + long value1 = vector1.get(index1); + long value2 = vector2.get(index2); + + return Long.compare(value1, value2); + } + + @Override + public VectorValueComparator createNew() { + return new DateMilliComparator(); + } + } + + /** + * Default comparator for Decimal256 type. + * The comparison is based on values, with null comes first. + */ + public static class Decimal256Comparator extends VectorValueComparator { + + public Decimal256Comparator() { + super(Decimal256Vector.TYPE_WIDTH); + } + + @Override + public int compareNotNull(int index1, int index2) { + BigDecimal value1 = vector1.getObjectNotNull(index1); + BigDecimal value2 = vector2.getObjectNotNull(index2); + + return value1.compareTo(value2); + } + + @Override + public VectorValueComparator createNew() { + return new Decimal256Comparator(); + } + } + + /** + * Default comparator for Decimal type. + * The comparison is based on values, with null comes first. + */ + public static class DecimalComparator extends VectorValueComparator { + + public DecimalComparator() { + super(DecimalVector.TYPE_WIDTH); + } + + @Override + public int compareNotNull(int index1, int index2) { + BigDecimal value1 = vector1.getObjectNotNull(index1); + BigDecimal value2 = vector2.getObjectNotNull(index2); + + return value1.compareTo(value2); + } + + @Override + public VectorValueComparator createNew() { + return new DecimalComparator(); + } + } + + /** + * Default comparator for Duration type. + * The comparison is based on values, with null comes first. + */ + public static class DurationComparator extends VectorValueComparator { + + public DurationComparator() { + super(DurationVector.TYPE_WIDTH); + } + + @Override + public int compareNotNull(int index1, int index2) { + Duration value1 = vector1.getObjectNotNull(index1); + Duration value2 = vector2.getObjectNotNull(index2); + + return value1.compareTo(value2); + } + + @Override + public VectorValueComparator createNew() { + return new DurationComparator(); + } + } + + /** + * Default comparator for IntervalDay type. + * The comparison is based on values, with null comes first. + */ + public static class IntervalDayComparator extends VectorValueComparator { + + public IntervalDayComparator() { + super(IntervalDayVector.TYPE_WIDTH); + } + + @Override + public int compareNotNull(int index1, int index2) { + Duration value1 = vector1.getObjectNotNull(index1); + Duration value2 = vector2.getObjectNotNull(index2); + + return value1.compareTo(value2); + } + + @Override + public VectorValueComparator createNew() { + return new IntervalDayComparator(); + } + } + + /** + * Default comparator for TimeMicro type. + * The comparison is based on values, with null comes first. + */ + public static class TimeMicroComparator extends VectorValueComparator { + + public TimeMicroComparator() { + super(TimeMicroVector.TYPE_WIDTH); + } + + @Override + public int compareNotNull(int index1, int index2) { + long value1 = vector1.get(index1); + long value2 = vector2.get(index2); + + return Long.compare(value1, value2); + } + + @Override + public VectorValueComparator createNew() { + return new TimeMicroComparator(); + } + } + + /** + * Default comparator for TimeMilli type. + * The comparison is based on values, with null comes first. + */ + public static class TimeMilliComparator extends VectorValueComparator { + + public TimeMilliComparator() { + super(TimeMilliVector.TYPE_WIDTH); + } + + @Override + public int compareNotNull(int index1, int index2) { + int value1 = vector1.get(index1); + int value2 = vector2.get(index2); + + return Integer.compare(value1, value2); + } + + @Override + public VectorValueComparator createNew() { + return new TimeMilliComparator(); + } + } + + /** + * Default comparator for TimeNano type. + * The comparison is based on values, with null comes first. + */ + public static class TimeNanoComparator extends VectorValueComparator { + + public TimeNanoComparator() { + super(TimeNanoVector.TYPE_WIDTH); + } + + @Override + public int compareNotNull(int index1, int index2) { + long value1 = vector1.get(index1); + long value2 = vector2.get(index2); + + return Long.compare(value1, value2); + } + + @Override + public VectorValueComparator createNew() { + return new TimeNanoComparator(); + } + } + + /** + * Default comparator for TimeSec type. + * The comparison is based on values, with null comes first. + */ + public static class TimeSecComparator extends VectorValueComparator { + + public TimeSecComparator() { + super(TimeSecVector.TYPE_WIDTH); + } + + @Override + public int compareNotNull(int index1, int index2) { + int value1 = vector1.get(index1); + int value2 = vector2.get(index2); + + return Integer.compare(value1, value2); + } + + @Override + public VectorValueComparator createNew() { + return new TimeSecComparator(); + } + } + + /** + * Default comparator for TimeSec type. + * The comparison is based on values, with null comes first. + */ + public static class TimeStampComparator extends VectorValueComparator { + + public TimeStampComparator() { + super(TimeStampVector.TYPE_WIDTH); + } + + @Override + public int compareNotNull(int index1, int index2) { + long value1 = vector1.get(index1); + long value2 = vector2.get(index2); + + return Long.compare(value1, value2); + } + + @Override + public VectorValueComparator createNew() { + return new TimeStampComparator(); + } + } + /** * Default comparator for {@link org.apache.arrow.vector.BaseVariableWidthVector}. * The comparison is in lexicographic order, with null comes first. diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java index 43d604060d086..c3b68facfda97 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java @@ -21,6 +21,7 @@ import org.apache.arrow.memory.util.MemoryUtil; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.BaseFixedWidthVector; +import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.IntVector; @@ -35,6 +36,9 @@ public class FixedWidthOutOfPlaceVectorSorter im @Override public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator comparator) { + if (srcVector instanceof BitVector) { + throw new IllegalArgumentException("BitVector is not supported with FixedWidthOutOfPlaceVectorSorter."); + } comparator.attachVector(srcVector); int valueWidth = comparator.getValueWidth(); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java index 818bb60d116da..62051197740d8 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java @@ -25,8 +25,23 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.DateMilliVector; +import org.apache.arrow.vector.Decimal256Vector; +import org.apache.arrow.vector.DecimalVector; +import org.apache.arrow.vector.DurationVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.IntervalDayVector; import org.apache.arrow.vector.SmallIntVector; +import org.apache.arrow.vector.TimeMicroVector; +import org.apache.arrow.vector.TimeMilliVector; +import org.apache.arrow.vector.TimeNanoVector; +import org.apache.arrow.vector.TimeSecVector; +import org.apache.arrow.vector.TimeStampMilliVector; +import org.apache.arrow.vector.TimeStampVector; import org.apache.arrow.vector.TinyIntVector; import org.apache.arrow.vector.UInt1Vector; import org.apache.arrow.vector.UInt2Vector; @@ -34,6 +49,7 @@ import org.apache.arrow.vector.UInt8Vector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; +import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; @@ -271,6 +287,76 @@ public void testCompareUInt8() { } } + @Test + public void testCompareFloat4() { + try (Float4Vector vec = new Float4Vector("", allocator)) { + vec.allocateNew(9); + ValueVectorDataPopulator.setVector( + vec, -1.1f, 0.0f, 1.0f, null, 1.0f, 2.0f, Float.NaN, Float.NaN, Float.POSITIVE_INFINITY, + Float.NEGATIVE_INFINITY); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) < 0); + assertTrue(comparator.compare(0, 2) < 0); + assertTrue(comparator.compare(2, 1) > 0); + + // test equality + assertTrue(comparator.compare(5, 5) == 0); + assertTrue(comparator.compare(2, 4) == 0); + + // null first + assertTrue(comparator.compare(3, 4) < 0); + assertTrue(comparator.compare(5, 3) > 0); + assertTrue(comparator.compare(8, 3) > 0); + + // NaN behavior. + assertTrue(comparator.compare(6, 7) == 0); + assertTrue(comparator.compare(7, 6) == 0); + assertTrue(comparator.compare(7, 7) == 0); + assertTrue(comparator.compare(6, 0) > 0); + assertTrue(comparator.compare(6, 8) > 0); + assertTrue(comparator.compare(6, 3) > 0); + } + } + + @Test + public void testCompareFloat8() { + try (Float8Vector vec = new Float8Vector("", allocator)) { + vec.allocateNew(9); + ValueVectorDataPopulator.setVector( + vec, -1.1, 0.0, 1.0, null, 1.0, 2.0, Double.NaN, Double.NaN, Double.POSITIVE_INFINITY, + Double.NEGATIVE_INFINITY); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) < 0); + assertTrue(comparator.compare(0, 2) < 0); + assertTrue(comparator.compare(2, 1) > 0); + + // test equality + assertTrue(comparator.compare(5, 5) == 0); + assertTrue(comparator.compare(2, 4) == 0); + + // null first + assertTrue(comparator.compare(3, 4) < 0); + assertTrue(comparator.compare(5, 3) > 0); + assertTrue(comparator.compare(8, 3) > 0); + + // NaN behavior. + assertTrue(comparator.compare(6, 7) == 0); + assertTrue(comparator.compare(7, 6) == 0); + assertTrue(comparator.compare(7, 7) == 0); + assertTrue(comparator.compare(6, 0) > 0); + assertTrue(comparator.compare(6, 8) > 0); + assertTrue(comparator.compare(6, 3) > 0); + } + } + @Test public void testCompareLong() { try (BigIntVector vec = new BigIntVector("", allocator)) { @@ -393,6 +479,367 @@ public void testCompareByte() { } } + @Test + public void testCompareBit() { + try (BitVector vec = new BitVector("", allocator)) { + vec.allocateNew(6); + ValueVectorDataPopulator.setVector( + vec, 1, 2, 0, 0, -1, null); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) == 0); + assertTrue(comparator.compare(0, 2) > 0); + assertTrue(comparator.compare(0, 4) == 0); + assertTrue(comparator.compare(2, 1) < 0); + assertTrue(comparator.compare(2, 4) < 0); + + // null first + assertTrue(comparator.compare(5, 0) < 0); + assertTrue(comparator.compare(5, 2) < 0); + } + } + + @Test + public void testCompareDateDay() { + try (DateDayVector vec = new DateDayVector("", allocator)) { + vec.allocateNew(8); + ValueVectorDataPopulator.setVector( + vec, -1, 0, 1, null, 1, 5, Integer.MIN_VALUE + 1, Integer.MAX_VALUE); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) < 0); + assertTrue(comparator.compare(0, 2) < 0); + assertTrue(comparator.compare(2, 1) > 0); + + // test equality + assertTrue(comparator.compare(5, 5) == 0); + assertTrue(comparator.compare(2, 4) == 0); + + // null first + assertTrue(comparator.compare(3, 4) < 0); + assertTrue(comparator.compare(5, 3) > 0); + + // potential overflow + assertTrue(comparator.compare(6, 7) < 0); + assertTrue(comparator.compare(7, 6) > 0); + assertTrue(comparator.compare(7, 7) == 0); + } + } + + @Test + public void testCompareDateMilli() { + try (DateMilliVector vec = new DateMilliVector("", allocator)) { + vec.allocateNew(8); + ValueVectorDataPopulator.setVector( + vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) < 0); + assertTrue(comparator.compare(0, 2) < 0); + assertTrue(comparator.compare(2, 1) > 0); + + // test equality + assertTrue(comparator.compare(5, 5) == 0); + assertTrue(comparator.compare(2, 4) == 0); + + // null first + assertTrue(comparator.compare(3, 4) < 0); + assertTrue(comparator.compare(5, 3) > 0); + + // potential overflow + assertTrue(comparator.compare(6, 7) < 0); + assertTrue(comparator.compare(7, 6) > 0); + assertTrue(comparator.compare(7, 7) == 0); + } + } + + @Test + public void testCompareDecimal() { + try (DecimalVector vec = new DecimalVector("", allocator, 10, 1)) { + vec.allocateNew(8); + ValueVectorDataPopulator.setVector( + vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) < 0); + assertTrue(comparator.compare(0, 2) < 0); + assertTrue(comparator.compare(2, 1) > 0); + + // test equality + assertTrue(comparator.compare(5, 5) == 0); + assertTrue(comparator.compare(2, 4) == 0); + + // null first + assertTrue(comparator.compare(3, 4) < 0); + assertTrue(comparator.compare(5, 3) > 0); + + // potential overflow + assertTrue(comparator.compare(6, 7) < 0); + assertTrue(comparator.compare(7, 6) > 0); + assertTrue(comparator.compare(7, 7) == 0); + } + } + + @Test + public void testCompareDecimal256() { + try (Decimal256Vector vec = new Decimal256Vector("", allocator, 10, 1)) { + vec.allocateNew(8); + ValueVectorDataPopulator.setVector( + vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) < 0); + assertTrue(comparator.compare(0, 2) < 0); + assertTrue(comparator.compare(2, 1) > 0); + + // test equality + assertTrue(comparator.compare(5, 5) == 0); + assertTrue(comparator.compare(2, 4) == 0); + + // null first + assertTrue(comparator.compare(3, 4) < 0); + assertTrue(comparator.compare(5, 3) > 0); + + // potential overflow + assertTrue(comparator.compare(6, 7) < 0); + assertTrue(comparator.compare(7, 6) > 0); + assertTrue(comparator.compare(7, 7) == 0); + } + } + + @Test + public void testCompareDuration() { + try (DurationVector vec = + new DurationVector("", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { + vec.allocateNew(8); + ValueVectorDataPopulator.setVector( + vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) < 0); + assertTrue(comparator.compare(0, 2) < 0); + assertTrue(comparator.compare(2, 1) > 0); + + // test equality + assertTrue(comparator.compare(5, 5) == 0); + assertTrue(comparator.compare(2, 4) == 0); + + // null first + assertTrue(comparator.compare(3, 4) < 0); + assertTrue(comparator.compare(5, 3) > 0); + + // potential overflow + assertTrue(comparator.compare(6, 7) < 0); + assertTrue(comparator.compare(7, 6) > 0); + assertTrue(comparator.compare(7, 7) == 0); + } + } + + @Test + public void testCompareIntervalDay() { + try (IntervalDayVector vec = + new IntervalDayVector("", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { + vec.allocateNew(8); + vec.set(0, -1, 0); + vec.set(1, 0, 0); + vec.set(2, 1, 0); + vec.setNull(3); + vec.set(4, -1, -1); + vec.set(5, 1, 1); + vec.set(6, 1, 1); + vec.set(7, -1, -1); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) < 0); + assertTrue(comparator.compare(0, 2) < 0); + assertTrue(comparator.compare(2, 1) > 0); + assertTrue(comparator.compare(2, 5) < 0); + assertTrue(comparator.compare(0, 4) > 0); + + // test equality + assertTrue(comparator.compare(5, 6) == 0); + assertTrue(comparator.compare(4, 7) == 0); + + // null first + assertTrue(comparator.compare(3, 4) < 0); + assertTrue(comparator.compare(5, 3) > 0); + } + } + + @Test + public void testCompareTimeMicro() { + try (TimeMicroVector vec = + new TimeMicroVector("", allocator)) { + vec.allocateNew(8); + ValueVectorDataPopulator.setVector( + vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) < 0); + assertTrue(comparator.compare(0, 2) < 0); + assertTrue(comparator.compare(2, 1) > 0); + + // test equality + assertTrue(comparator.compare(5, 5) == 0); + assertTrue(comparator.compare(2, 4) == 0); + + // null first + assertTrue(comparator.compare(3, 4) < 0); + assertTrue(comparator.compare(5, 3) > 0); + + // potential overflow + assertTrue(comparator.compare(6, 7) < 0); + assertTrue(comparator.compare(7, 6) > 0); + assertTrue(comparator.compare(7, 7) == 0); + } + } + + @Test + public void testCompareTimeMilli() { + try (TimeMilliVector vec = new TimeMilliVector("", allocator)) { + vec.allocateNew(8); + ValueVectorDataPopulator.setVector( + vec, -1, 0, 1, null, 1, 5, Integer.MIN_VALUE + 1, Integer.MAX_VALUE); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) < 0); + assertTrue(comparator.compare(0, 2) < 0); + assertTrue(comparator.compare(2, 1) > 0); + + // test equality + assertTrue(comparator.compare(5, 5) == 0); + assertTrue(comparator.compare(2, 4) == 0); + + // null first + assertTrue(comparator.compare(3, 4) < 0); + assertTrue(comparator.compare(5, 3) > 0); + + // potential overflow + assertTrue(comparator.compare(6, 7) < 0); + assertTrue(comparator.compare(7, 6) > 0); + assertTrue(comparator.compare(7, 7) == 0); + } + } + + @Test + public void testCompareTimeNano() { + try (TimeNanoVector vec = + new TimeNanoVector("", allocator)) { + vec.allocateNew(8); + ValueVectorDataPopulator.setVector( + vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) < 0); + assertTrue(comparator.compare(0, 2) < 0); + assertTrue(comparator.compare(2, 1) > 0); + + // test equality + assertTrue(comparator.compare(5, 5) == 0); + assertTrue(comparator.compare(2, 4) == 0); + + // null first + assertTrue(comparator.compare(3, 4) < 0); + assertTrue(comparator.compare(5, 3) > 0); + + // potential overflow + assertTrue(comparator.compare(6, 7) < 0); + assertTrue(comparator.compare(7, 6) > 0); + assertTrue(comparator.compare(7, 7) == 0); + } + } + + @Test + public void testCompareTimeSec() { + try (TimeSecVector vec = new TimeSecVector("", allocator)) { + vec.allocateNew(8); + ValueVectorDataPopulator.setVector( + vec, -1, 0, 1, null, 1, 5, Integer.MIN_VALUE + 1, Integer.MAX_VALUE); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) < 0); + assertTrue(comparator.compare(0, 2) < 0); + assertTrue(comparator.compare(2, 1) > 0); + + // test equality + assertTrue(comparator.compare(5, 5) == 0); + assertTrue(comparator.compare(2, 4) == 0); + + // null first + assertTrue(comparator.compare(3, 4) < 0); + assertTrue(comparator.compare(5, 3) > 0); + + // potential overflow + assertTrue(comparator.compare(6, 7) < 0); + assertTrue(comparator.compare(7, 6) > 0); + assertTrue(comparator.compare(7, 7) == 0); + } + } + + @Test + public void testCompareTimeStamp() { + try (TimeStampMilliVector vec = + new TimeStampMilliVector("", allocator)) { + vec.allocateNew(8); + ValueVectorDataPopulator.setVector( + vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) < 0); + assertTrue(comparator.compare(0, 2) < 0); + assertTrue(comparator.compare(2, 1) > 0); + + // test equality + assertTrue(comparator.compare(5, 5) == 0); + assertTrue(comparator.compare(2, 4) == 0); + + // null first + assertTrue(comparator.compare(3, 4) < 0); + assertTrue(comparator.compare(5, 3) > 0); + + // potential overflow + assertTrue(comparator.compare(6, 7) < 0); + assertTrue(comparator.compare(7, 6) > 0); + assertTrue(comparator.compare(7, 7) == 0); + } + } + @Test public void testCheckNullsOnCompareIsFalseForNonNullableVector() { try (IntVector vec = new IntVector("not nullable", diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/PerformanceTestServer.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/PerformanceTestServer.java index 319aee445dca6..0ded2f7065f9c 100644 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/PerformanceTestServer.java +++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/PerformanceTestServer.java @@ -18,7 +18,6 @@ package org.apache.arrow.flight.perf; import java.io.IOException; -import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; import java.util.concurrent.ExecutorService; @@ -115,7 +114,7 @@ public void getStream(CallContext context, Ticket ticket, try { Token token = Token.parseFrom(ticket.getBytes()); Perf perf = token.getDefinition(); - Schema schema = Schema.deserialize(ByteBuffer.wrap(perf.getSchema().toByteArray())); + Schema schema = Schema.deserializeMessage(perf.getSchema().asReadOnlyByteBuffer()); root = VectorSchemaRoot.create(schema, allocator); BigIntVector a = (BigIntVector) root.getVector("a"); BigIntVector b = (BigIntVector) root.getVector("b"); diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java index a7af8b713097d..17c83c205feb0 100644 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java +++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java @@ -65,7 +65,8 @@ public static FlightDescriptor getPerfFlightDescriptor(long recordCount, int rec Field.nullable("d", MinorType.BIGINT.getType()) )); - ByteString serializedSchema = ByteString.copyFrom(pojoSchema.toByteArray()); + byte[] bytes = pojoSchema.serializeAsMessage(); + ByteString serializedSchema = ByteString.copyFrom(bytes); return FlightDescriptor.command(Perf.newBuilder() .setRecordsPerStream(recordCount) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java index 4ccee50d6805a..70a895ff40496 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java @@ -154,10 +154,20 @@ public BigDecimal getObject(int index) { if (isSet(index) == 0) { return null; } else { - return DecimalUtility.getBigDecimalFromArrowBuf(valueBuffer, index, scale, TYPE_WIDTH); + return getObjectNotNull(index); } } + /** + * Same as {@link #getObject(int)}, but does not check for null. + * + * @param index position of element + * @return element at given index + */ + public BigDecimal getObjectNotNull(int index) { + return DecimalUtility.getBigDecimalFromArrowBuf(valueBuffer, index, scale, TYPE_WIDTH); + } + /** * Return precision for the decimal value. */ diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java index db04563df24d7..6a3ec60afc52e 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java @@ -153,10 +153,20 @@ public BigDecimal getObject(int index) { if (isSet(index) == 0) { return null; } else { - return DecimalUtility.getBigDecimalFromArrowBuf(valueBuffer, index, scale, TYPE_WIDTH); + return getObjectNotNull(index); } } + /** + * Same as {@link #getObect(int)} but does not check for null. + * + * @param index position of element + * @return element at given index + */ + public BigDecimal getObjectNotNull(int index) { + return DecimalUtility.getBigDecimalFromArrowBuf(valueBuffer, index, scale, TYPE_WIDTH); + } + /** * Return precision for the decimal value. */ diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java index 1e1db0d1c3c5f..b6abc16194b77 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java @@ -147,11 +147,21 @@ public Duration getObject(int index) { if (isSet(index) == 0) { return null; } else { - final long value = get(valueBuffer, index); - return toDuration(value, unit); + return getObjectNotNull(index); } } + /** + * Same as {@link #getObject(int)} but does not check for null. + * + * @param index position of element + * @return element at given index + */ + public Duration getObjectNotNull(int index) { + final long value = get(valueBuffer, index); + return toDuration(value, unit); + } + /** * Converts the given value and unit to the appropriate {@link Duration}. */ diff --git a/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java b/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java index 35312ba7c96a1..7c0d19baa9a6f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java @@ -168,13 +168,23 @@ public Duration getObject(int index) { if (isSet(index) == 0) { return null; } else { - final long startIndex = (long) index * TYPE_WIDTH; - final int days = valueBuffer.getInt(startIndex); - final int milliseconds = valueBuffer.getInt(startIndex + MILLISECOND_OFFSET); - return Duration.ofDays(days).plusMillis(milliseconds); + return getObjectNotNull(index); } } + /** + * Same as {@link #getObject(int)} but does not check for null. + * + * @param index position of element + * @return element at given index + */ + public Duration getObjectNotNull(int index) { + final long startIndex = (long) index * TYPE_WIDTH; + final int days = valueBuffer.getInt(startIndex); + final int milliseconds = valueBuffer.getInt(startIndex + MILLISECOND_OFFSET); + return Duration.ofDays(days).plusMillis(milliseconds); + } + /** * Get the Interval value at a given index as a {@link StringBuilder} object. * diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java index 2b3db1fb7de43..dcffea0ef5367 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java @@ -20,9 +20,11 @@ import static org.apache.arrow.vector.types.pojo.Field.convertField; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.channels.Channels; import java.util.AbstractMap; import java.util.ArrayList; import java.util.Collections; @@ -36,7 +38,10 @@ import org.apache.arrow.flatbuf.KeyValue; import org.apache.arrow.util.Collections2; import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.ipc.ReadChannel; +import org.apache.arrow.vector.ipc.WriteChannel; import org.apache.arrow.vector.ipc.message.FBSerializables; +import org.apache.arrow.vector.ipc.message.MessageSerializer; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; @@ -47,6 +52,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import com.fasterxml.jackson.databind.ObjectWriter; +import com.fasterxml.jackson.databind.util.ByteBufferBackedInputStream; import com.google.flatbuffers.FlatBufferBuilder; /** @@ -83,10 +89,30 @@ public static Schema fromJSON(String json) throws IOException { return reader.readValue(Preconditions.checkNotNull(json)); } + /** + * Deserialize a schema that has been serialized using {@link #toByteArray()}. + * @param buffer the bytes to deserialize. + * @return The deserialized schema. + */ + @Deprecated public static Schema deserialize(ByteBuffer buffer) { return convertSchema(org.apache.arrow.flatbuf.Schema.getRootAsSchema(buffer)); } + /** + * Deserialize a schema that has been serialized as a message using {@link #serializeAsMessage()}. + * @param buffer the bytes to deserialize. + * @return The deserialized schema. + */ + public static Schema deserializeMessage(ByteBuffer buffer) { + ByteBufferBackedInputStream stream = new ByteBufferBackedInputStream(buffer); + try (ReadChannel channel = new ReadChannel(Channels.newChannel(stream))) { + return MessageSerializer.deserializeSchema(channel); + } catch (IOException ex) { + throw new RuntimeException(ex); + } + } + /** Converts a flatbuffer schema to its POJO representation. */ public static Schema convertSchema(org.apache.arrow.flatbuf.Schema schema) { List fields = new ArrayList<>(); @@ -217,9 +243,27 @@ public int getSchema(FlatBufferBuilder builder) { return org.apache.arrow.flatbuf.Schema.endSchema(builder); } + /** + * Returns the serialized flatbuffer bytes of the schema wrapped in a message table. + * Use {@link #deserializeMessage() to rebuild the Schema.} + */ + public byte[] serializeAsMessage() { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try (WriteChannel channel = new WriteChannel(Channels.newChannel(out))) { + long size = MessageSerializer.serialize( + new WriteChannel(Channels.newChannel(out)), this); + return out.toByteArray(); + } catch (IOException ex) { + throw new RuntimeException(ex); + } + } + /** * Returns the serialized flatbuffer representation of this schema. + * @deprecated This method does not encapsulate the schema in a Message payload which is incompatible with other + * languages. Use {@link #serializeAsMessage()} instead. */ + @Deprecated public byte[] toByteArray() { FlatBufferBuilder builder = new FlatBufferBuilder(); int schemaOffset = this.getSchema(builder); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java index 15d6a5cf993c4..f9f0357861c15 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java @@ -19,6 +19,7 @@ import static org.junit.Assert.assertEquals; +import java.math.BigDecimal; import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; @@ -29,6 +30,7 @@ import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.DateDayVector; import org.apache.arrow.vector.DateMilliVector; +import org.apache.arrow.vector.Decimal256Vector; import org.apache.arrow.vector.DecimalVector; import org.apache.arrow.vector.DurationVector; import org.apache.arrow.vector.FixedSizeBinaryVector; @@ -147,6 +149,34 @@ public static void setVector(DecimalVector vector, Long... values) { vector.setValueCount(length); } + /** + * Populate values for Decimal256Vector. + */ + public static void setVector(Decimal256Vector vector, Long... values) { + final int length = values.length; + vector.allocateNew(length); + for (int i = 0; i < length; i++) { + if (values[i] != null) { + vector.set(i, values[i]); + } + } + vector.setValueCount(length); + } + + /** + * Populate values for Decimal256Vector. + */ + public static void setVector(Decimal256Vector vector, BigDecimal... values) { + final int length = values.length; + vector.allocateNew(length); + for (int i = 0; i < length; i++) { + if (values[i] != null) { + vector.set(i, values[i]); + } + } + vector.setValueCount(length); + } + /** * Populate values for DurationVector. * @param values values of elapsed time in either seconds, milliseconds, microseconds or nanoseconds. diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java index 0e5375865a8bd..7b62247c6e12d 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java @@ -24,6 +24,7 @@ import static org.junit.Assert.assertTrue; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.HashMap; import java.util.Map; @@ -216,6 +217,35 @@ public void testMetadata() throws IOException { contains(schema, "\"" + METADATA_KEY + "\" : \"testKey\"", "\"" + METADATA_VALUE + "\" : \"testValue\""); } + @Test + public void testMessageSerialization() { + Schema schema = new Schema(asList( + field("a", false, new Null()), + field("b", new Struct(), field("ba", new Null())), + field("c", new List(), field("ca", new Null())), + field("d", new Union(UnionMode.Sparse, new int[] {1, 2, 3}), field("da", new Null())), + field("e", new Int(8, true)), + field("f", new FloatingPoint(FloatingPointPrecision.SINGLE)), + field("g", new Utf8()), + field("h", new Binary()), + field("i", new Bool()), + field("j", new Decimal(5, 5, 128)), + field("k", new Date(DateUnit.DAY)), + field("l", new Date(DateUnit.MILLISECOND)), + field("m", new Time(TimeUnit.SECOND, 32)), + field("n", new Time(TimeUnit.MILLISECOND, 32)), + field("o", new Time(TimeUnit.MICROSECOND, 64)), + field("p", new Time(TimeUnit.NANOSECOND, 64)), + field("q", new Timestamp(TimeUnit.MILLISECOND, "UTC")), + field("r", new Timestamp(TimeUnit.MICROSECOND, null)), + field("s", new Interval(IntervalUnit.DAY_TIME)), + field("t", new FixedSizeBinary(100)), + field("u", new Duration(TimeUnit.SECOND)), + field("v", new Duration(TimeUnit.MICROSECOND)) + )); + roundTripMessage(schema); + } + private void roundTrip(Schema schema) throws IOException { String json = schema.toJson(); Schema actual = Schema.fromJSON(json); @@ -225,6 +255,15 @@ private void roundTrip(Schema schema) throws IOException { assertEquals(schema.hashCode(), actual.hashCode()); } + private void roundTripMessage(Schema schema) { + byte[] bytes = schema.serializeAsMessage(); + Schema actual = Schema.deserializeMessage(ByteBuffer.wrap(bytes)); + assertEquals(schema.toJson(), actual.toJson()); + assertEquals(schema, actual); + validateFieldsHashcode(schema.getFields(), actual.getFields()); + assertEquals(schema.hashCode(), actual.hashCode()); + } + private void validateFieldsHashcode(java.util.List schemaFields, java.util.List actualFields) { assertEquals(schemaFields.size(), actualFields.size()); if (schemaFields.size() == 0) { diff --git a/js/.eslintrc.cjs b/js/.eslintrc.cjs index b629b862190f4..8a36516eec1c0 100644 --- a/js/.eslintrc.cjs +++ b/js/.eslintrc.cjs @@ -23,7 +23,7 @@ module.exports = { }, parser: "@typescript-eslint/parser", parserOptions: { - project: "tsconfig.json", + project: ["tsconfig.json", "tsconfig/tsconfig.bin.cjs.json"], sourceType: "module", ecmaVersion: 2020, }, diff --git a/js/gulp/arrow-task.js b/js/gulp/arrow-task.js index 411a817ddc09e..2de20947dc2f5 100644 --- a/js/gulp/arrow-task.js +++ b/js/gulp/arrow-task.js @@ -15,19 +15,18 @@ // specific language governing permissions and limitations // under the License. -import { targetDir, observableFromStreams } from './util.js'; +import { mainExport, targetDir, observableFromStreams } from './util.js'; -import { deleteAsync as del } from 'del'; import gulp from 'gulp'; +import path from 'path'; import { mkdirp } from 'mkdirp'; +import * as fs from 'fs/promises'; import gulpRename from 'gulp-rename'; import gulpReplace from 'gulp-replace'; import { memoizeTask } from './memoize-task.js'; import { ReplaySubject, forkJoin as ObservableForkJoin } from 'rxjs'; import { share } from 'rxjs/operators'; -import util from 'util'; -import stream from 'stream'; -const pipeline = util.promisify(stream.pipeline); +import { pipeline } from 'stream/promises'; export const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target) { const out = targetDir(target); @@ -54,9 +53,20 @@ export const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target }))({}); export const arrowTSTask = ((cache) => memoizeTask(cache, async function copyTS(target, format) { + const umd = targetDir(`es5`, `umd`); const out = targetDir(target, format); - await mkdirp(out); - await pipeline(gulp.src(`src/**/*`), gulp.dest(out)); - await del(`${out}/**/*.js`); -}))({}); + const arrowUMD = path.join(umd, `${mainExport}.js`); + const arrow2csvUMD = path.join(umd, `bin`, `arrow2csv.js`); + + await mkdirp(path.join(out, 'bin')); + await Promise.all([ + pipeline(gulp.src(`src/**/*`), gulp.dest(out)), + pipeline( + gulp.src([arrowUMD, arrow2csvUMD]), + gulpReplace(`../${mainExport}.js`, `./${mainExport}.js`), + gulp.dest(path.join(out, 'bin')) + ), + fs.writeFile(path.join(out, 'bin', 'package.json'), '{"type": "commonjs"}') + ]); +}))({}); diff --git a/js/gulp/typescript-task.js b/js/gulp/typescript-task.js index 02192192327ad..31769e3b1b236 100644 --- a/js/gulp/typescript-task.js +++ b/js/gulp/typescript-task.js @@ -19,12 +19,13 @@ import { targetDir, tsconfigName, observableFromStreams, shouldRunInChildProcess import gulp from 'gulp'; import path from 'path'; -import ts from 'gulp-typescript'; import tsc from 'typescript'; +import ts from 'gulp-typescript'; +import * as fs from 'fs/promises'; import sourcemaps from 'gulp-sourcemaps'; import { memoizeTask } from './memoize-task.js'; -import { ReplaySubject, forkJoin as ObservableForkJoin } from 'rxjs'; -import { mergeWith, takeLast, share } from 'rxjs/operators'; +import { ReplaySubject, forkJoin as ObservableForkJoin, defer as ObservableDefer } from 'rxjs'; +import { mergeWith, takeLast, share, concat } from 'rxjs/operators'; export const typescriptTask = ((cache) => memoizeTask(cache, function typescript(target, format) { if (shouldRunInChildProcess(target, format)) { @@ -44,10 +45,15 @@ export default typescriptTask; export function compileBinFiles(target, format) { const out = targetDir(target, format); const tsconfigPath = path.join(`tsconfig`, `tsconfig.${tsconfigName('bin', 'cjs')}.json`); - return compileTypescript(path.join(out, 'bin'), tsconfigPath, { target }); + const tsconfigOverrides = format === 'esm' ? { target, module: 'ES2015' } : { target }; + return compileTypescript(out, tsconfigPath, tsconfigOverrides, false) + .pipe(takeLast(1)) + .pipe(concat(ObservableDefer(() => { + return fs.chmod(path.join(out, 'bin', 'arrow2csv.js'), 0o755); + }))); } -function compileTypescript(out, tsconfigPath, tsconfigOverrides) { +function compileTypescript(out, tsconfigPath, tsconfigOverrides, writeSourcemaps = true) { const tsProject = ts.createProject(tsconfigPath, { typescript: tsc, ...tsconfigOverrides }); const { stream: { js, dts } } = observableFromStreams( tsProject.src(), sourcemaps.init(), @@ -56,7 +62,15 @@ function compileTypescript(out, tsconfigPath, tsconfigOverrides) { const writeSources = observableFromStreams(tsProject.src(), gulp.dest(path.join(out, 'src'))); const writeDTypes = observableFromStreams(dts, sourcemaps.write('./', { includeContent: false, sourceRoot: './src' }), gulp.dest(out)); const mapFile = tsProject.options.module === tsc.ModuleKind.ES2015 ? esmMapFile : cjsMapFile; - const writeJS = observableFromStreams(js, sourcemaps.write('./', { mapFile, includeContent: false, sourceRoot: './src' }), gulp.dest(out)); + const writeJSArgs = writeSourcemaps ? [ + js, + sourcemaps.write('./', { mapFile, includeContent: false, sourceRoot: './src' }), + gulp.dest(out) + ] : [ + js, + gulp.dest(out) + ]; + const writeJS = observableFromStreams(...writeJSArgs); return ObservableForkJoin([writeSources, writeDTypes, writeJS]); } diff --git a/js/gulpfile.js b/js/gulpfile.js index 6544b987b73f6..bf84a4a9e1b49 100644 --- a/js/gulpfile.js +++ b/js/gulpfile.js @@ -54,6 +54,10 @@ knownTargets.forEach((target) => { )); }); +gulp.task(`build:ts`, gulp.series( + `build:es5:umd`, `clean:ts`, `compile:ts`, `package:ts` +)); + // The main "apache-arrow" module builds the es2015/umd, es2015/cjs, // es2015/esm, and esnext/umd targets, then copies and renames the // compiled output into the apache-arrow folder diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts index dc44e10b9206f..4a6394c266b1b 100644 --- a/js/src/Arrow.ts +++ b/js/src/Arrow.ts @@ -99,6 +99,7 @@ import * as util_bit_ from './util/bit.js'; import * as util_math_ from './util/math.js'; import * as util_buffer_ from './util/buffer.js'; import * as util_vector_ from './util/vector.js'; +import * as util_pretty_ from './util/pretty.js'; import { compareSchemas, compareFields, compareTypes } from './visitor/typecomparator.js'; /** @ignore */ @@ -109,6 +110,7 @@ export const util = { ...util_math_, ...util_buffer_, ...util_vector_, + ...util_pretty_, compareSchemas, compareFields, compareTypes, diff --git a/js/src/bin/arrow2csv.ts b/js/src/bin/arrow2csv.ts old mode 100644 new mode 100755 index eae7f5805c41c..39db8c17497cd --- a/js/src/bin/arrow2csv.ts +++ b/js/src/bin/arrow2csv.ts @@ -21,8 +21,7 @@ import * as fs from 'fs'; import * as stream from 'stream'; -import { valueToString } from '../util/pretty.js'; -import { Schema, RecordBatch, RecordBatchReader, AsyncByteQueue } from '../Arrow.node.js'; +import { Schema, RecordBatch, RecordBatchReader, AsyncByteQueue, util } from '../Arrow.js'; import commandLineUsage from 'command-line-usage'; import commandLineArgs from 'command-line-args'; @@ -58,9 +57,10 @@ type ToStringState = { if (state.closed) { break; } for await (reader of recordBatchReaders(source)) { hasReaders = true; - const transformToString = batchesToString(state, reader.schema); + const batches = stream.Readable.from(reader); + const toString = batchesToString(state, reader.schema); await pipeTo( - reader.pipe(transformToString), + batches.pipe(toString), process.stdout, { end: false } ).catch(() => state.closed = true); // Handle EPIPE errors } @@ -129,7 +129,7 @@ function batchesToString(state: ToStringState, schema: Schema) { let maxColWidths = [10]; const { hr, sep, metadata } = state; - const header = ['row_id', ...schema.fields.map((f) => `${f}`)].map(val => valueToString(val)); + const header = ['row_id', ...schema.fields.map((f) => `${f}`)].map(val => util.valueToString(val)); state.maxColWidths = header.map((x, i) => Math.max(maxColWidths[i] || 0, x.length)); @@ -181,7 +181,7 @@ function batchesToString(state: ToStringState, schema: Schema) { if (rowId % 350 === 0) { this.push(`${formatRow(header, maxColWidths, sep)}\n`); } - this.push(`${formatRow([rowId++, ...row.toArray()].map(v => valueToString(v)), maxColWidths, sep)}\n`); + this.push(`${formatRow([rowId++, ...row.toArray()].map(v => util.valueToString(v)), maxColWidths, sep)}\n`); } } cb(); @@ -202,7 +202,7 @@ function formatMetadataValue(value = '') { try { parsed = JSON.stringify(JSON.parse(value), null, 2); } catch { parsed = value; } - return valueToString(parsed).split('\n').join('\n '); + return util.valueToString(parsed).split('\n').join('\n '); } function formatMetadata(metadata: Map) { @@ -236,7 +236,7 @@ function measureColumnWidths(rowId: number, batch: RecordBatch, maxColWidths: nu (val.length * elementWidth) // width of stringified 2^N-1 ); } else { - maxColWidths[j + 1] = Math.max(maxColWidths[j + 1] || 0, valueToString(val).length); + maxColWidths[j + 1] = Math.max(maxColWidths[j + 1] || 0, util.valueToString(val).length); } ++j; } diff --git a/js/tsconfig/tsconfig.base.json b/js/tsconfig/tsconfig.base.json index fb4ecb38b5892..0d7fefd90949f 100644 --- a/js/tsconfig/tsconfig.base.json +++ b/js/tsconfig/tsconfig.base.json @@ -1,5 +1,5 @@ { - "exclude": ["../node_modules"], + "exclude": ["../node_modules", "../src/bin/*.ts"], "include": ["../src/**/*.ts"], "compileOnSave": false, "compilerOptions": { diff --git a/matlab/src/cpp/arrow/matlab/error/error.h b/matlab/src/cpp/arrow/matlab/error/error.h index 2b3009d51eb5a..4ff77da8d8360 100644 --- a/matlab/src/cpp/arrow/matlab/error/error.h +++ b/matlab/src/cpp/arrow/matlab/error/error.h @@ -174,10 +174,7 @@ namespace arrow::matlab::error { static const char* INVALID_TIME_UNIT = "arrow:type:InvalidTimeUnit"; static const char* FIELD_FAILED_TO_CREATE_TYPE_PROXY = "arrow:field:FailedToCreateTypeProxy"; static const char* ARRAY_FAILED_TO_CREATE_TYPE_PROXY = "arrow:array:FailedToCreateTypeProxy"; - static const char* ARROW_TABULAR_SCHEMA_INVALID_NUMERIC_FIELD_INDEX = "arrow:tabular:schema:InvalidNumericFieldIndex"; - static const char* ARROW_TABULAR_SCHEMA_UNKNOWN_FIELD_NAME = "arrow:tabular:schema:UnknownFieldName"; static const char* ARROW_TABULAR_SCHEMA_AMBIGUOUS_FIELD_NAME = "arrow:tabular:schema:AmbiguousFieldName"; - static const char* ARROW_TABULAR_SCHEMA_NUMERIC_FIELD_INDEX_WITH_EMPTY_SCHEMA = "arrow:tabular:schema:NumericFieldIndexWithEmptySchema"; static const char* UNKNOWN_PROXY_FOR_ARRAY_TYPE = "arrow:array:UnknownProxyForArrayType"; static const char* RECORD_BATCH_NUMERIC_INDEX_WITH_EMPTY_RECORD_BATCH = "arrow:tabular:recordbatch:NumericIndexWithEmptyRecordBatch"; static const char* RECORD_BATCH_INVALID_NUMERIC_COLUMN_INDEX = "arrow:tabular:recordbatch:InvalidNumericColumnIndex"; @@ -195,6 +192,7 @@ namespace arrow::matlab::error { static const char* CHUNKED_ARRAY_MAKE_FAILED = "arrow:chunkedarray:MakeFailed"; static const char* CHUNKED_ARRAY_NUMERIC_INDEX_WITH_EMPTY_CHUNKED_ARRAY = "arrow:chunkedarray:NumericIndexWithEmptyChunkedArray"; static const char* CHUNKED_ARRAY_INVALID_NUMERIC_CHUNK_INDEX = "arrow:chunkedarray:InvalidNumericChunkIndex"; - - + + static const char* INDEX_EMPTY_CONTAINER = "arrow:index:EmptyContainer"; + static const char* INDEX_OUT_OF_RANGE = "arrow:index:OutOfRange"; } diff --git a/matlab/src/cpp/arrow/matlab/index/validate.cc b/matlab/src/cpp/arrow/matlab/index/validate.cc new file mode 100644 index 0000000000000..b24653f1b814c --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/index/validate.cc @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/index/validate.h" + +#include + +namespace arrow::matlab::index { + + namespace { + std::string makeEmptyContainerErrorMessage() { + return "Numeric indexing using the field method is not supported for objects with zero fields."; + } + + std::string makeIndexOutOfRangeErrorMessage(const int32_t matlab_index, const int32_t num_fields) { + std::stringstream error_message_stream; + error_message_stream << "Invalid field index: "; + // matlab uses 1-based indexing + error_message_stream << matlab_index; + error_message_stream << ". Field index must be between 1 and the number of fields ("; + error_message_stream << num_fields; + error_message_stream << ")."; + return error_message_stream.str(); + } + } // anonymous namespace + + arrow::Status validateNonEmptyContainer(const int32_t num_fields) { + if (num_fields == 0) { + const auto msg = makeEmptyContainerErrorMessage(); + return arrow::Status::Invalid(std::move(msg)); + } + return arrow::Status::OK(); + } + + arrow::Status validateInRange(const int32_t matlab_index, const int32_t num_fields) { + if (matlab_index < 1 || matlab_index > num_fields) { + const auto msg = makeIndexOutOfRangeErrorMessage(matlab_index, num_fields); + return arrow::Status::Invalid(std::move(msg)); + } + return arrow::Status::OK(); + } +} \ No newline at end of file diff --git a/matlab/src/cpp/arrow/matlab/index/validate.h b/matlab/src/cpp/arrow/matlab/index/validate.h new file mode 100644 index 0000000000000..40e109c19e9ef --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/index/validate.h @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/status.h" + +namespace arrow::matlab::index { + + arrow::Status validateNonEmptyContainer(const int32_t num_fields); + arrow::Status validateInRange(const int32_t matlab_index, const int32_t num_fields); +} \ No newline at end of file diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index 4035725f2b382..ebeb020a9e7c7 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -33,6 +33,7 @@ #include "arrow/matlab/type/proxy/date64_type.h" #include "arrow/matlab/type/proxy/time32_type.h" #include "arrow/matlab/type/proxy/time64_type.h" +#include "arrow/matlab/type/proxy/struct_type.h" #include "arrow/matlab/type/proxy/field.h" #include "arrow/matlab/io/feather/proxy/writer.h" #include "arrow/matlab/io/feather/proxy/reader.h" @@ -81,6 +82,7 @@ libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name, REGISTER_PROXY(arrow.type.proxy.Time64Type , arrow::matlab::type::proxy::Time64Type); REGISTER_PROXY(arrow.type.proxy.Date32Type , arrow::matlab::type::proxy::Date32Type); REGISTER_PROXY(arrow.type.proxy.Date64Type , arrow::matlab::type::proxy::Date64Type); + REGISTER_PROXY(arrow.type.proxy.StructType , arrow::matlab::type::proxy::StructType); REGISTER_PROXY(arrow.io.feather.proxy.Writer , arrow::matlab::io::feather::proxy::Writer); REGISTER_PROXY(arrow.io.feather.proxy.Reader , arrow::matlab::io::feather::proxy::Reader); diff --git a/matlab/src/cpp/arrow/matlab/tabular/proxy/schema.cc b/matlab/src/cpp/arrow/matlab/tabular/proxy/schema.cc index 62fe863ca8b5f..ec1ac1eecb2fd 100644 --- a/matlab/src/cpp/arrow/matlab/tabular/proxy/schema.cc +++ b/matlab/src/cpp/arrow/matlab/tabular/proxy/schema.cc @@ -18,6 +18,7 @@ #include "arrow/matlab/error/error.h" #include "arrow/matlab/tabular/proxy/schema.h" #include "arrow/matlab/type/proxy/field.h" +#include "arrow/matlab/index/validate.h" #include "libmexclass/proxy/ProxyManager.h" #include "libmexclass/error/Error.h" @@ -28,25 +29,6 @@ namespace arrow::matlab::tabular::proxy { - namespace { - - libmexclass::error::Error makeUnknownFieldNameError(const std::string& name) { - using namespace libmexclass::error; - std::stringstream error_message_stream; - error_message_stream << "Unknown field name: '"; - error_message_stream << name; - error_message_stream << "'."; - return Error{error::ARROW_TABULAR_SCHEMA_UNKNOWN_FIELD_NAME, error_message_stream.str()}; - } - - libmexclass::error::Error makeEmptySchemaError() { - using namespace libmexclass::error; - return Error{error::ARROW_TABULAR_SCHEMA_NUMERIC_FIELD_INDEX_WITH_EMPTY_SCHEMA, - "Numeric indexing using the field method is not supported for schemas with no fields."}; - } - - } - Schema::Schema(std::shared_ptr schema) : schema{std::move(schema)} { REGISTER_METHOD(Schema, getFieldByIndex); REGISTER_METHOD(Schema, getFieldByName); @@ -86,37 +68,27 @@ namespace arrow::matlab::tabular::proxy { mda::StructArray args = context.inputs[0]; const mda::TypedArray index_mda = args[0]["Index"]; const auto matlab_index = int32_t(index_mda[0]); - // Note: MATLAB uses 1-based indexing, so subtract 1. - // arrow::Schema::field does not do any bounds checking. - const int32_t index = matlab_index - 1; - const auto num_fields = schema->num_fields(); - if (num_fields == 0) { - const auto& error = makeEmptySchemaError(); - context.error = error; - return; - } + // Validate there is at least 1 field + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT( + index::validateNonEmptyContainer(schema->num_fields()), + context, + error::INDEX_EMPTY_CONTAINER); - if (matlab_index < 1 || matlab_index > num_fields) { - using namespace libmexclass::error; - const std::string& error_message_id = std::string{error::ARROW_TABULAR_SCHEMA_INVALID_NUMERIC_FIELD_INDEX}; - std::stringstream error_message_stream; - error_message_stream << "Invalid field index: "; - error_message_stream << matlab_index; - error_message_stream << ". Field index must be between 1 and the number of fields ("; - error_message_stream << num_fields; - error_message_stream << ")."; - const std::string& error_message = error_message_stream.str(); - context.error = Error{error_message_id, error_message}; - return; - } + // Validate the matlab index provided is within the range [1, num_fields] + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT( + index::validateInRange(matlab_index, schema->num_fields()), + context, + error::INDEX_OUT_OF_RANGE); - const auto& field = schema->field(index); - auto field_proxy = std::make_shared(field); - const auto field_proxy_id = ProxyManager::manageProxy(field_proxy); - const auto field_proxy_id_mda = factory.createScalar(field_proxy_id); + // Note: MATLAB uses 1-based indexing, so subtract 1. + // arrow::Schema::field does not do any bounds checking. + const int32_t index = matlab_index - 1; - context.outputs[0] = field_proxy_id_mda; + auto field = schema->field(index); + auto field_proxy = std::make_shared(std::move(field)); + auto field_proxy_id = ProxyManager::manageProxy(field_proxy); + context.outputs[0] = factory.createScalar(field_proxy_id); } void Schema::getFieldByName(libmexclass::proxy::method::Context& context) { @@ -135,9 +107,7 @@ namespace arrow::matlab::tabular::proxy { const auto field = schema->GetFieldByName(name); auto field_proxy = std::make_shared(field); const auto field_proxy_id = ProxyManager::manageProxy(field_proxy); - const auto field_proxy_id_mda = factory.createScalar(field_proxy_id); - - context.outputs[0] = field_proxy_id_mda; + context.outputs[0] = factory.createScalar(field_proxy_id); } void Schema::getNumFields(libmexclass::proxy::method::Context& context) { diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/struct_type.cc b/matlab/src/cpp/arrow/matlab/type/proxy/struct_type.cc new file mode 100644 index 0000000000000..fbb8dc3f6edbe --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/struct_type.cc @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/type/proxy/struct_type.h" +#include "arrow/matlab/type/proxy/field.h" +#include "libmexclass/proxy/ProxyManager.h" + +namespace arrow::matlab::type::proxy { + + StructType::StructType(std::shared_ptr struct_type) : Type(std::move(struct_type)) {} + + libmexclass::proxy::MakeResult StructType::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + namespace mda = ::matlab::data; + using StructTypeProxy = arrow::matlab::type::proxy::StructType; + + mda::StructArray args = constructor_arguments[0]; + const mda::TypedArray field_proxy_ids_mda = args[0]["FieldProxyIDs"]; + + std::vector> fields; + fields.reserve(field_proxy_ids_mda.getNumberOfElements()); + for (const auto proxy_id : field_proxy_ids_mda) { + using namespace libmexclass::proxy; + auto proxy = std::static_pointer_cast(ProxyManager::getProxy(proxy_id)); + auto field = proxy->unwrap(); + fields.push_back(field); + } + + auto struct_type = std::static_pointer_cast(arrow::struct_(fields)); + return std::make_shared(std::move(struct_type)); + } +} \ No newline at end of file diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/struct_type.h b/matlab/src/cpp/arrow/matlab/type/proxy/struct_type.h new file mode 100644 index 0000000000000..8ec6217b34278 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/struct_type.h @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/matlab/type/proxy/type.h" + +namespace arrow::matlab::type::proxy { + + class StructType : public arrow::matlab::type::proxy::Type { + + public: + StructType(std::shared_ptr struct_type); + + ~StructType() {} + + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); +}; + +} \ No newline at end of file diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/type.cc b/matlab/src/cpp/arrow/matlab/type/proxy/type.cc index 1eed4e6141347..1cbaaf328ee86 100644 --- a/matlab/src/cpp/arrow/matlab/type/proxy/type.cc +++ b/matlab/src/cpp/arrow/matlab/type/proxy/type.cc @@ -15,7 +15,11 @@ // specific language governing permissions and limitations // under the License. + +#include "arrow/matlab/error/error.h" +#include "arrow/matlab/index/validate.h" #include "arrow/matlab/type/proxy/type.h" +#include "arrow/matlab/type/proxy/field.h" #include "libmexclass/proxy/ProxyManager.h" @@ -24,6 +28,7 @@ namespace arrow::matlab::type::proxy { Type::Type(std::shared_ptr type) : data_type{std::move(type)} { REGISTER_METHOD(Type, getTypeID); REGISTER_METHOD(Type, getNumFields); + REGISTER_METHOD(Type, getFieldByIndex); REGISTER_METHOD(Type, isEqual); } @@ -47,6 +52,36 @@ namespace arrow::matlab::type::proxy { context.outputs[0] = num_fields_mda; } + void Type::getFieldByIndex(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::ArrayFactory factory; + + mda::StructArray args = context.inputs[0]; + const mda::TypedArray index_mda = args[0]["Index"]; + const auto matlab_index = int32_t(index_mda[0]); + + // Validate there is at least 1 field + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT( + index::validateNonEmptyContainer(data_type->num_fields()), + context, + error::INDEX_EMPTY_CONTAINER); + + // Validate the matlab index provided is within the range [1, num_fields] + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT( + index::validateInRange(matlab_index, data_type->num_fields()), + context, + error::INDEX_OUT_OF_RANGE); + + // Note: MATLAB uses 1-based indexing, so subtract 1. + // arrow::DataType::field does not do any bounds checking. + const int32_t index = matlab_index - 1; + + auto field = data_type->field(index); + auto field_proxy = std::make_shared(std::move(field)); + auto field_proxy_id = libmexclass::proxy::ProxyManager::manageProxy(field_proxy); + context.outputs[0] = factory.createScalar(field_proxy_id); + } + void Type::isEqual(libmexclass::proxy::method::Context& context) { namespace mda = ::matlab::data; diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/type.h b/matlab/src/cpp/arrow/matlab/type/proxy/type.h index efd2b8255aa28..3a6b287a9254e 100644 --- a/matlab/src/cpp/arrow/matlab/type/proxy/type.h +++ b/matlab/src/cpp/arrow/matlab/type/proxy/type.h @@ -37,6 +37,8 @@ class Type : public libmexclass::proxy::Proxy { void getNumFields(libmexclass::proxy::method::Context& context); + void getFieldByIndex(libmexclass::proxy::method::Context& context); + void isEqual(libmexclass::proxy::method::Context& context); std::shared_ptr data_type; diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/wrap.cc b/matlab/src/cpp/arrow/matlab/type/proxy/wrap.cc index 91a1e353496c7..3dd86e91409fa 100644 --- a/matlab/src/cpp/arrow/matlab/type/proxy/wrap.cc +++ b/matlab/src/cpp/arrow/matlab/type/proxy/wrap.cc @@ -24,6 +24,7 @@ #include "arrow/matlab/type/proxy/date32_type.h" #include "arrow/matlab/type/proxy/date64_type.h" #include "arrow/matlab/type/proxy/string_type.h" +#include "arrow/matlab/type/proxy/struct_type.h" namespace arrow::matlab::type::proxy { @@ -64,6 +65,8 @@ namespace arrow::matlab::type::proxy { return std::make_shared(std::static_pointer_cast(type)); case ID::STRING: return std::make_shared(std::static_pointer_cast(type)); + case ID::STRUCT: + return std::make_shared(std::static_pointer_cast(type)); default: return arrow::Status::NotImplemented("Unsupported DataType: " + type->ToString()); } diff --git a/matlab/src/matlab/+arrow/+type/+traits/StructTraits.m b/matlab/src/matlab/+arrow/+type/+traits/StructTraits.m new file mode 100644 index 0000000000000..a8ed98f8ae468 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/StructTraits.m @@ -0,0 +1,36 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef StructTraits < arrow.type.traits.TypeTraits + + properties (Constant) + % TODO: When arrow.array.StructArray is implemented, set these + % properties appropriately + ArrayConstructor = missing + ArrayClassName = missing + ArrayProxyClassName = missing + ArrayStaticConstructor = missing + + TypeConstructor = @arrow.type.StructType + TypeClassName = "arrow.type.StructType" + TypeProxyClassName = "arrow.type.proxy.StructType" + + % TODO: When arrow.array.StructArray is implemented, set these + % properties appropriately + MatlabConstructor = missing + MatlabClassName = missing + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/traits.m b/matlab/src/matlab/+arrow/+type/+traits/traits.m index 78804fdccb3f0..f737108ce5f76 100644 --- a/matlab/src/matlab/+arrow/+type/+traits/traits.m +++ b/matlab/src/matlab/+arrow/+type/+traits/traits.m @@ -56,6 +56,8 @@ typeTraits = Date32Traits(); case ID.Date64 typeTraits = Date64Traits(); + case ID.Struct + typeTraits = StructTraits(); otherwise error("arrow:type:traits:UnsupportedArrowTypeID", "Unsupported Arrow type ID: " + type); end diff --git a/matlab/src/matlab/+arrow/+type/ID.m b/matlab/src/matlab/+arrow/+type/ID.m index 646edb85c6632..b2c4facbe4065 100644 --- a/matlab/src/matlab/+arrow/+type/ID.m +++ b/matlab/src/matlab/+arrow/+type/ID.m @@ -37,5 +37,11 @@ Timestamp (18) Time32 (19) Time64 (20) + % IntervalMonths (21) + % IntervalDayTime (22) + % Decimal128 (23) + % Decimal256 (24) + % List (25) + Struct (26) end end diff --git a/matlab/src/matlab/+arrow/+type/StructType.m b/matlab/src/matlab/+arrow/+type/StructType.m new file mode 100644 index 0000000000000..6c1318f6376f3 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/StructType.m @@ -0,0 +1,46 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef StructType < arrow.type.Type + + methods + function obj = StructType(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.StructType")} + end + import arrow.internal.proxy.validate + obj@arrow.type.Type(proxy); + end + end + + methods(Access = protected) + function groups = getDisplayPropertyGroups(obj) + targets = ["ID", "Fields"]; + groups = matlab.mixin.util.PropertyGroup(targets); + end + end + + methods (Hidden) + % TODO: Consider using a mixin approach to add this behavior. For + % example, ChunkedArray's toMATLAB method could check if its + % Type inherits from a mixin called "Preallocateable" (or something + % more descriptive). If so, we can call preallocateMATLABArray + % in the toMATLAB method. + function preallocateMATLABArray(~) + error("arrow:type:UnsupportedFunction", ... + "preallocateMATLABArray is not supported for StructType"); + end + end +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/Type.m b/matlab/src/matlab/+arrow/+type/Type.m index 24f83e0267058..6dc4fbc438f34 100644 --- a/matlab/src/matlab/+arrow/+type/Type.m +++ b/matlab/src/matlab/+arrow/+type/Type.m @@ -19,6 +19,7 @@ properties (Dependent, GetAccess=public, SetAccess=private) ID + Fields NumFields end @@ -41,6 +42,29 @@ function typeID = get.ID(obj) typeID = arrow.type.ID(obj.Proxy.getTypeID()); end + + function F = field(obj, idx) + import arrow.internal.validate.* + + idx = index.numeric(idx, "int32", AllowNonScalar=false); + args = struct(Index=idx); + proxyID = obj.Proxy.getFieldByIndex(args); + proxy = libmexclass.proxy.Proxy(Name="arrow.type.proxy.Field", ID=proxyID); + F = arrow.type.Field(proxy); + end + + function fields = get.Fields(obj) + numFields = obj.NumFields; + if numFields == 0 + fields = arrow.type.Field.empty(0, 0); + else + fields = cell(1, numFields); + for ii = 1:numFields + fields{ii} = obj.field(ii); + end + fields = horzcat(fields{:}); + end + end end methods(Access = protected) diff --git a/matlab/src/matlab/+arrow/struct.m b/matlab/src/matlab/+arrow/struct.m new file mode 100644 index 0000000000000..2fdbd6a9864fd --- /dev/null +++ b/matlab/src/matlab/+arrow/struct.m @@ -0,0 +1,43 @@ +%STRUCT Constructs an arrow.type.StructType object + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = struct(fields) + arguments(Repeating) + fields(1, :) arrow.type.Field {mustBeNonempty} + end + + % Must have at least one Field in a Struct + if isempty(fields) + error("arrow:struct:TooFewInputs", ... + "Must supply at least one arrow.type.Field"); + end + + fields = horzcat(fields{:}); + + % Extract the corresponding Proxy IDs from each of the + % supplied arrow.type.Field objects. + numFields = numel(fields); + fieldProxyIDs = zeros(1, numFields, "uint64"); + for ii = 1:numFields + fieldProxyIDs(ii) = fields(ii).Proxy.ID; + end + + % Construct an Arrow Field Proxy in C++ from the supplied Field Proxy IDs. + args = struct(FieldProxyIDs=fieldProxyIDs); + proxy = arrow.internal.proxy.create("arrow.type.proxy.StructType", args); + type = arrow.type.StructType(proxy); +end \ No newline at end of file diff --git a/matlab/test/arrow/tabular/tSchema.m b/matlab/test/arrow/tabular/tSchema.m index 3220236d4aabe..e4c706d9a3d6c 100644 --- a/matlab/test/arrow/tabular/tSchema.m +++ b/matlab/test/arrow/tabular/tSchema.m @@ -239,7 +239,7 @@ function GetFieldByNameWithWhitespace(testCase) testCase.verifyEqual(field.Type.ID, arrow.type.ID.UInt32); end - function ErrorIfInvalidNumericFieldIndex(testCase) + function ErrorIfIndexIsOutOfRange(testCase) % Verify that an error is thrown when trying to access a field % with an invalid numeric index (e.g. greater than NumFields). schema = arrow.schema([... @@ -250,7 +250,7 @@ function ErrorIfInvalidNumericFieldIndex(testCase) % Index is greater than NumFields. index = 100; - testCase.verifyError(@() schema.field(index), "arrow:tabular:schema:InvalidNumericFieldIndex"); + testCase.verifyError(@() schema.field(index), "arrow:index:OutOfRange"); end function ErrorIfFieldNameDoesNotExist(testCase) @@ -376,7 +376,7 @@ function EmptySchema(testCase) testCase.verifyEqual(schema.FieldNames, string.empty(1, 0)); testCase.verifyEqual(schema.Fields, arrow.type.Field.empty(0, 0)); testCase.verifyError(@() schema.field(0), "arrow:badsubscript:NonPositive"); - testCase.verifyError(@() schema.field(1), "arrow:tabular:schema:NumericFieldIndexWithEmptySchema"); + testCase.verifyError(@() schema.field(1), "arrow:index:EmptyContainer"); % 0x1 empty Field array. fields = arrow.type.Field.empty(0, 1); @@ -385,7 +385,7 @@ function EmptySchema(testCase) testCase.verifyEqual(schema.FieldNames, string.empty(1, 0)); testCase.verifyEqual(schema.Fields, arrow.type.Field.empty(0, 0)); testCase.verifyError(@() schema.field(0), "arrow:badsubscript:NonPositive"); - testCase.verifyError(@() schema.field(1), "arrow:tabular:schema:NumericFieldIndexWithEmptySchema"); + testCase.verifyError(@() schema.field(1), "arrow:index:EmptyContainer"); % 1x0 empty Field array. fields = arrow.type.Field.empty(1, 0); @@ -394,7 +394,7 @@ function EmptySchema(testCase) testCase.verifyEqual(schema.FieldNames, string.empty(1, 0)); testCase.verifyEqual(schema.Fields, arrow.type.Field.empty(0, 0)); testCase.verifyError(@() schema.field(0), "arrow:badsubscript:NonPositive"); - testCase.verifyError(@() schema.field(1), "arrow:tabular:schema:NumericFieldIndexWithEmptySchema"); + testCase.verifyError(@() schema.field(1), "arrow:index:EmptyContainer"); end function GetFieldByNameWithChar(testCase) diff --git a/matlab/test/arrow/type/hFixedWidthType.m b/matlab/test/arrow/type/hFixedWidthType.m index adb234bbd3f38..b23c21a6b4feb 100644 --- a/matlab/test/arrow/type/hFixedWidthType.m +++ b/matlab/test/arrow/type/hFixedWidthType.m @@ -49,6 +49,31 @@ function TestNumFields(testCase) testCase.verifyEqual(arrowType.NumFields, int32(0)); end + function TestFieldsProperty(testCase) + % Verify Fields is a 0x0 arrow.type.Field array. + type = testCase.ArrowType; + fields = type.Fields; + testCase.verifyEqual(fields, arrow.type.Field.empty(0, 0)); + end + + function FieldsNoSetter(testCase) + % Verify the Fields property is not settable. + type = testCase.ArrowType; + testCase.verifyError(@() setfield(type, "Fields", "1"), "MATLAB:class:SetProhibited"); + end + + function InvalidFieldIndex(testCase) + % Verify the field() method throws the expected error message + % when given an invalid index. + type = testCase.ArrowType; + + testCase.verifyError(@() type.field(0), "arrow:badsubscript:NonPositive"); + testCase.verifyError(@() type.field("A"), "arrow:badsubscript:NonNumeric"); + + % NOTE: For FixedWidthTypes, Fields is always empty. + testCase.verifyError(@() type.field(1), "arrow:index:EmptyContainer"); + end + function TestBitWidthNoSetter(testCase) % Verify that an error is thrown when trying to set the value % of the BitWidth property. diff --git a/matlab/test/arrow/type/tField.m b/matlab/test/arrow/type/tField.m index dba7190b49ce2..1a89c0077b5ae 100644 --- a/matlab/test/arrow/type/tField.m +++ b/matlab/test/arrow/type/tField.m @@ -42,6 +42,7 @@ function TestSupportedTypes(testCase) arrow.float64, ... arrow.string, ... arrow.timestamp, ... + arrow.struct(arrow.field("A", arrow.float32())) }; for ii = 1:numel(supportedTypes) supportedType = supportedTypes{ii}; diff --git a/matlab/test/arrow/type/tID.m b/matlab/test/arrow/type/tID.m index b69cd89842d73..e97d77e81c124 100644 --- a/matlab/test/arrow/type/tID.m +++ b/matlab/test/arrow/type/tID.m @@ -46,7 +46,8 @@ function CastToUInt64(testCase) ID.Date64, 17, ... ID.Timestamp, 18, ... ID.Time32, 19, ... - ID.Time64, 20 ... + ID.Time64, 20, ... + ID.Struct, 26 ... ); enumValues = typeIDs.keys(); diff --git a/matlab/test/arrow/type/tStringType.m b/matlab/test/arrow/type/tStringType.m index e2a16ab133dbd..3d518b3da3320 100644 --- a/matlab/test/arrow/type/tStringType.m +++ b/matlab/test/arrow/type/tStringType.m @@ -64,6 +64,31 @@ function IsEqualFalse(testCase) testCase.verifyFalse(isequal(typeArray1, typeArray2)); end + function TestFieldsProperty(testCase) + % Verify Fields is a 0x0 arrow.type.Field array. + type = arrow.string(); + fields = type.Fields; + testCase.verifyEqual(fields, arrow.type.Field.empty(0, 0)); + end + + function FieldsNoSetter(testCase) + % Verify the Fields property is not settable. + type = arrow.string(); + testCase.verifyError(@() setfield(type, "Fields", "1"), "MATLAB:class:SetProhibited"); + end + + function InvalidFieldIndex(testCase) + % Verify the field() method throws the expected error message + % when given an invalid index. + type = arrow.string(); + + testCase.verifyError(@() type.field(0), "arrow:badsubscript:NonPositive"); + testCase.verifyError(@() type.field("A"), "arrow:badsubscript:NonNumeric"); + + % NOTE: For StringType, Fields is always empty. + testCase.verifyError(@() type.field(1), "arrow:index:EmptyContainer"); + end + end end diff --git a/matlab/test/arrow/type/tStructType.m b/matlab/test/arrow/type/tStructType.m new file mode 100644 index 0000000000000..f0585823f8dcf --- /dev/null +++ b/matlab/test/arrow/type/tStructType.m @@ -0,0 +1,190 @@ +% TSTRUCTTYPE Unit tests for arrow.type.StructType + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tStructType < matlab.unittest.TestCase + + properties (Constant) + Field1 = arrow.field("A", arrow.float64()) + Field2 = arrow.field("C", arrow.boolean()) + Field3 = arrow.field("B", arrow.timestamp(TimeUnit="Microsecond", TimeZone="America/New_York")); + end + + methods (Test) + function Basic(tc) + % Verify arrow.struct() returns an arrow.type.StructType + % object. + type = arrow.struct(tc.Field1); + className = string(class(type)); + tc.verifyEqual(className, "arrow.type.StructType"); + tc.verifyEqual(type.ID, arrow.type.ID.Struct); + end + + function TooFewInputsError(tc) + % Verify arrow.struct() errors if given zero input arguments. + fcn = @() arrow.struct(); + tc.verifyError(fcn, "arrow:struct:TooFewInputs"); + end + + function InvalidInputTypeError(tc) + % Verify arrow.struct() errors if any one of the input + % arguments is not an arrow.type.Field object. + fcn = @() arrow.struct(1); + tc.verifyError(fcn, "MATLAB:validation:UnableToConvert"); + end + + function EmptyFieldError(tc) + % Verify arrow.struct() errors if given an empty + % arrow.type.Field array as one of its inputs. + fcn = @() arrow.struct(tc.Field1, arrow.type.Field.empty(0, 0)); + tc.verifyError(fcn, "MATLAB:validators:mustBeNonempty"); + end + + function NumFieldsGetter(tc) + % Verify the NumFields getter returns the expected value. + type = arrow.struct(tc.Field1); + tc.verifyEqual(type.NumFields, int32(1)); + + type = arrow.struct(tc.Field1, tc.Field2); + tc.verifyEqual(type.NumFields, int32(2)); + + type = arrow.struct(tc.Field1, tc.Field2, tc.Field3); + tc.verifyEqual(type.NumFields, int32(3)); + end + + function NumFieldsNoSetter(tc) + % Verify the NumFields property is not settable. + type = arrow.struct(tc.Field1); + fcn = @() setfield(type, "NumFields", 20); + tc.verifyError(fcn, "MATLAB:class:SetProhibited"); + end + + function FieldsGetter(tc) + % Verify the Fields getter returns the expected + % arrow.type.Field array. + type = arrow.struct(tc.Field1, tc.Field2, tc.Field3); + actual = type.Fields; + expected = [tc.Field1, tc.Field2, tc.Field3]; + tc.verifyEqual(actual, expected); + end + + function FieldsNoSetter(tc) + % Verify the Fields property is not settable. + type = arrow.struct(tc.Field1, tc.Field2, tc.Field3); + fcn = @() setfield(type, "Fields", tc.Field3); + tc.verifyError(fcn, "MATLAB:class:SetProhibited"); + end + + function IDGetter(tc) + % Verify the ID getter returns the expected enum value. + type = arrow.struct(tc.Field1); + actual = type.ID; + expected = arrow.type.ID.Struct; + tc.verifyEqual(actual, expected); + end + + function IDNoSetter(tc) + % Verify the ID property is not settable. + type = arrow.struct(tc.Field1); + fcn = @() setfield(type, "ID", arrow.type.ID.Boolean); + tc.verifyError(fcn, "MATLAB:class:SetProhibited"); + end + + function FieldMethod(tc) + % Verify the field method returns the expected arrow.type.Field + % with respect to the index provided. + type = arrow.struct(tc.Field1, tc.Field2, tc.Field3); + + % Extract the 1st field + actual1 = type.field(1); + expected1 = tc.Field1; + tc.verifyEqual(actual1, expected1); + + % Extract the 2nd field + actual2 = type.field(2); + expected2 = tc.Field2; + tc.verifyEqual(actual2, expected2); + + % Extract the 3rd field + actual3 = type.field(3); + expected3 = tc.Field3; + tc.verifyEqual(actual3, expected3); + end + + function FieldIndexOutOfRangeError(tc) + % Verify field() throws an error if provided an index that + % exceeds NumFields. + type = arrow.struct(tc.Field1, tc.Field2, tc.Field3); + fcn = @() type.field(100); + tc.verifyError(fcn, "arrow:index:OutOfRange"); + end + + function FieldIndexNonScalarError(tc) + % Verify field() throws an error if provided a nonscalar array + % of indices. + type = arrow.struct(tc.Field1, tc.Field2, tc.Field3); + fcn = @() type.field([1 2]); + tc.verifyError(fcn, "arrow:badsubscript:NonScalar"); + end + + function FieldIndexNonNumberError(tc) + % Verify field() throws an error if not provided a number as + % the index. + + type = arrow.struct(tc.Field1, tc.Field2, tc.Field3); + fcn = @() type.field("A"); + tc.verifyError(fcn, "arrow:badsubscript:NonNumeric"); + end + + function IsEqualTrue(tc) + % Verify two StructTypes are considered equal if their Fields + % properties are equal. + + type1 = arrow.struct(tc.Field1, tc.Field2, tc.Field3); + type2 = arrow.struct(tc.Field1, tc.Field2, tc.Field3); + + tc.verifyTrue(isequal(type1, type2)); + tc.verifyTrue(isequal(type1, type2, type2, type1)); + + % Non-scalar arrow.type.StructType arrays + type3 = [type1 type2]; + type4 = [type1 type2]; + tc.verifyTrue(isequal(type3, type4)); + end + + function IsEqualFalse(tc) + % Verify isequal returns false when expected. + type1 = arrow.struct(tc.Field1, tc.Field2, tc.Field3); + type2 = arrow.struct(tc.Field1, tc.Field2); + type3 = arrow.struct(tc.Field1, tc.Field3, tc.Field2); + + % Fields properties have different lengths + tc.verifyFalse(isequal(type1, type2)); + + % The corresponding elements in the Fields arrays are not equal + tc.verifyFalse(isequal(type1, type3)); + + % Non-scalar arrow.type.StructType arrays + type4 = [type1 type2]; + type5 = [type1; type2]; + type6 = [type1 type2]; + type7 = [type1 type3]; + tc.verifyFalse(isequal(type4, type5)); + tc.verifyFalse(isequal(type6, type7)); + + end + end +end \ No newline at end of file diff --git a/matlab/test/arrow/type/tTypeDisplay.m b/matlab/test/arrow/type/tTypeDisplay.m index 4d3c023da71ab..f84c5ab56e270 100644 --- a/matlab/test/arrow/type/tTypeDisplay.m +++ b/matlab/test/arrow/type/tTypeDisplay.m @@ -189,7 +189,7 @@ function TestDateType(testCase, DateType) testCase.verifyEqual(actualDisplay, expectedDisplay); end - function Display(testCase) + function TimestampTypeDisplay(testCase) % Verify the display of TimestampType objects. % % Example: @@ -211,6 +211,32 @@ function Display(testCase) actualDisplay = evalc('disp(type)'); testCase.verifyEqual(actualDisplay, expectedDisplay); end + + function StructTypeDisplay(testCase) + % Verify the display of StructType objects. + % + % Example: + % + % StructType with properties: + % + % ID: Struct + % Fields: [1x2 arrow.type.Field] + + fieldA = arrow.field("A", arrow.int32()); + fieldB = arrow.field("B", arrow.timestamp(TimeZone="America/Anchorage")); + type = arrow.struct(fieldA, fieldB); %#ok + classnameLink = makeLinkString(FullClassName="arrow.type.StructType", ClassName="StructType", BoldFont=true); + header = " " + classnameLink + " with properties:" + newline; + body = strjust(pad(["ID:"; "Fields:"])); + dimensionString = makeDimensionString([1 2]); + fieldString = compose("[%s %s]", dimensionString, "arrow.type.Field"); + body = body + " " + ["Struct"; fieldString]; + body = " " + body; + footer = string(newline); + expectedDisplay = char(strjoin([header body' footer], newline)); + actualDisplay = evalc('disp(type)'); + testCase.verifyDisplay(actualDisplay, expectedDisplay); + end end methods diff --git a/matlab/test/arrow/type/traits/tStructTraits.m b/matlab/test/arrow/type/traits/tStructTraits.m new file mode 100644 index 0000000000000..6a97b1e1852d6 --- /dev/null +++ b/matlab/test/arrow/type/traits/tStructTraits.m @@ -0,0 +1,31 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tStructTraits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.StructTraits + ArrayConstructor = missing + ArrayClassName = missing + ArrayProxyClassName = missing + ArrayStaticConstructor = missing + TypeConstructor = @arrow.type.StructType + TypeClassName = "arrow.type.StructType" + TypeProxyClassName = "arrow.type.proxy.StructType" + MatlabConstructor = missing + MatlabClassName = missing + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/ttraits.m b/matlab/test/arrow/type/traits/ttraits.m index cdc5990ed03ba..2880645f2957c 100644 --- a/matlab/test/arrow/type/traits/ttraits.m +++ b/matlab/test/arrow/type/traits/ttraits.m @@ -199,6 +199,18 @@ function TestDate64(testCase) testCase.verifyEqual(actualTraits, expectedTraits); end + function TestStruct(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.Struct; + expectedTraits = StructTraits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + function TestMatlabUInt8(testCase) import arrow.type.traits.* diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index a5c0b079b34a6..40c6b5a51d4fe 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -65,10 +65,13 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/time_type.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/time32_type.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/time64_type.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/struct_type.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/field.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/wrap.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/writer.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/reader.cc") + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/reader.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/index/validate.cc") + set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy") diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 0c1744febbe1e..609307528d2ec 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -2145,7 +2145,8 @@ class QuantileOptions(_QuantileOptions): Parameters ---------- q : double or sequence of double, default 0.5 - Quantiles to compute. All values must be in [0, 1]. + Probability levels of the quantiles to compute. All values must be in + [0, 1]. interpolation : str, default "linear" How to break ties between competing data points for a given quantile. Accepted values are: @@ -2182,7 +2183,8 @@ class TDigestOptions(_TDigestOptions): Parameters ---------- q : double or sequence of double, default 0.5 - Quantiles to approximate. All values must be in [0, 1]. + Probability levels of the quantiles to approximate. All values must be + in [0, 1]. delta : int, default 100 Compression parameter for the T-digest algorithm. buffer_size : int, default 500 diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx index 8f5688de29072..d29fa125e2061 100644 --- a/python/pyarrow/_dataset.pyx +++ b/python/pyarrow/_dataset.pyx @@ -319,7 +319,7 @@ cdef class Dataset(_Weakrefable): partition information or internal metadata found in the data source, e.g. Parquet statistics. Otherwise filters the loaded RecordBatches before yielding them. - batch_size : int, default 128Ki + batch_size : int, default 131_072 The maximum row count for scanned record batches. If scanned record batches are overflowing memory then this method can be called to reduce their size. @@ -441,7 +441,7 @@ cdef class Dataset(_Weakrefable): partition information or internal metadata found in the data source, e.g. Parquet statistics. Otherwise filters the loaded RecordBatches before yielding them. - batch_size : int, default 128Ki + batch_size : int, default 131_072 The maximum row count for scanned record batches. If scanned record batches are overflowing memory then this method can be called to reduce their size. @@ -519,7 +519,7 @@ cdef class Dataset(_Weakrefable): partition information or internal metadata found in the data source, e.g. Parquet statistics. Otherwise filters the loaded RecordBatches before yielding them. - batch_size : int, default 128Ki + batch_size : int, default 131_072 The maximum row count for scanned record batches. If scanned record batches are overflowing memory then this method can be called to reduce their size. @@ -597,7 +597,7 @@ cdef class Dataset(_Weakrefable): partition information or internal metadata found in the data source, e.g. Parquet statistics. Otherwise filters the loaded RecordBatches before yielding them. - batch_size : int, default 128Ki + batch_size : int, default 131_072 The maximum row count for scanned record batches. If scanned record batches are overflowing memory then this method can be called to reduce their size. @@ -675,7 +675,7 @@ cdef class Dataset(_Weakrefable): partition information or internal metadata found in the data source, e.g. Parquet statistics. Otherwise filters the loaded RecordBatches before yielding them. - batch_size : int, default 128Ki + batch_size : int, default 131_072 The maximum row count for scanned record batches. If scanned record batches are overflowing memory then this method can be called to reduce their size. @@ -730,7 +730,7 @@ cdef class Dataset(_Weakrefable): partition information or internal metadata found in the data source, e.g. Parquet statistics. Otherwise filters the loaded RecordBatches before yielding them. - batch_size : int, default 128Ki + batch_size : int, default 131_072 The maximum row count for scanned record batches. If scanned record batches are overflowing memory then this method can be called to reduce their size. @@ -1411,7 +1411,7 @@ cdef class Fragment(_Weakrefable): partition information or internal metadata found in the data source, e.g. Parquet statistics. Otherwise filters the loaded RecordBatches before yielding them. - batch_size : int, default 128Ki + batch_size : int, default 131_072 The maximum row count for scanned record batches. If scanned record batches are overflowing memory then this method can be called to reduce their size. @@ -1491,7 +1491,7 @@ cdef class Fragment(_Weakrefable): partition information or internal metadata found in the data source, e.g. Parquet statistics. Otherwise filters the loaded RecordBatches before yielding them. - batch_size : int, default 128Ki + batch_size : int, default 131_072 The maximum row count for scanned record batches. If scanned record batches are overflowing memory then this method can be called to reduce their size. @@ -1574,7 +1574,7 @@ cdef class Fragment(_Weakrefable): partition information or internal metadata found in the data source, e.g. Parquet statistics. Otherwise filters the loaded RecordBatches before yielding them. - batch_size : int, default 128Ki + batch_size : int, default 131_072 The maximum row count for scanned record batches. If scanned record batches are overflowing memory then this method can be called to reduce their size. @@ -1653,7 +1653,7 @@ cdef class Fragment(_Weakrefable): partition information or internal metadata found in the data source, e.g. Parquet statistics. Otherwise filters the loaded RecordBatches before yielding them. - batch_size : int, default 128Ki + batch_size : int, default 131_072 The maximum row count for scanned record batches. If scanned record batches are overflowing memory then this method can be called to reduce their size. @@ -1731,7 +1731,7 @@ cdef class Fragment(_Weakrefable): partition information or internal metadata found in the data source, e.g. Parquet statistics. Otherwise filters the loaded RecordBatches before yielding them. - batch_size : int, default 128Ki + batch_size : int, default 131_072 The maximum row count for scanned record batches. If scanned record batches are overflowing memory then this method can be called to reduce their size. @@ -1786,7 +1786,7 @@ cdef class Fragment(_Weakrefable): partition information or internal metadata found in the data source, e.g. Parquet statistics. Otherwise filters the loaded RecordBatches before yielding them. - batch_size : int, default 128Ki + batch_size : int, default 131_072 The maximum row count for scanned record batches. If scanned record batches are overflowing memory then this method can be called to reduce their size. @@ -3436,7 +3436,7 @@ cdef class Scanner(_Weakrefable): partition information or internal metadata found in the data source, e.g. Parquet statistics. Otherwise filters the loaded RecordBatches before yielding them. - batch_size : int, default 128Ki + batch_size : int, default 131_072 The maximum row count for scanned record batches. If scanned record batches are overflowing memory then this method can be called to reduce their size. @@ -3515,7 +3515,7 @@ cdef class Scanner(_Weakrefable): partition information or internal metadata found in the data source, e.g. Parquet statistics. Otherwise filters the loaded RecordBatches before yielding them. - batch_size : int, default 128Ki + batch_size : int, default 131_072 The maximum row count for scanned record batches. If scanned record batches are overflowing memory then this method can be called to reduce their size. @@ -3601,7 +3601,7 @@ cdef class Scanner(_Weakrefable): partition information or internal metadata found in the data source, e.g. Parquet statistics. Otherwise filters the loaded RecordBatches before yielding them. - batch_size : int, default 128Ki + batch_size : int, default 131_072 The maximum row count for scanned record batches. If scanned record batches are overflowing memory then this method can be called to reduce their size. diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index e26b1ad3291b5..e36d8b2f04315 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -2363,6 +2363,79 @@ cdef class MapArray(ListArray): Returns ------- map_array : MapArray + + Examples + -------- + First, let's understand the structure of our dataset when viewed in a rectangular data model. + The total of 5 respondents answered the question "How much did you like the movie x?". + The value -1 in the integer array means that the value is missing. The boolean array + represents the null bitmask corresponding to the missing values in the integer array. + + >>> import pyarrow as pa + >>> movies_rectangular = np.ma.masked_array([ + ... [10, -1, -1], + ... [8, 4, 5], + ... [-1, 10, 3], + ... [-1, -1, -1], + ... [-1, -1, -1] + ... ], + ... [ + ... [False, True, True], + ... [False, False, False], + ... [True, False, False], + ... [True, True, True], + ... [True, True, True], + ... ]) + + To represent the same data with the MapArray and from_arrays, the data is + formed like this: + + >>> offsets = [ + ... 0, # -- row 1 start + ... 1, # -- row 2 start + ... 4, # -- row 3 start + ... 6, # -- row 4 start + ... 6, # -- row 5 start + ... 6, # -- row 5 end + ... ] + >>> movies = [ + ... "Dark Knight", # ---------------------------------- row 1 + ... "Dark Knight", "Meet the Parents", "Superman", # -- row 2 + ... "Meet the Parents", "Superman", # ----------------- row 3 + ... ] + >>> likings = [ + ... 10, # -------- row 1 + ... 8, 4, 5, # --- row 2 + ... 10, 3 # ------ row 3 + ... ] + >>> pa.MapArray.from_arrays(offsets, movies, likings).to_pandas() + 0 [(Dark Knight, 10)] + 1 [(Dark Knight, 8), (Meet the Parents, 4), (Sup... + 2 [(Meet the Parents, 10), (Superman, 3)] + 3 [] + 4 [] + dtype: object + + If the data in the empty rows needs to be marked as missing, it's possible + to do so by modifying the offsets argument, so that we specify `None` as + the starting positions of the rows we want marked as missing. The end row + offset still has to refer to the existing value from keys (and values): + + >>> offsets = [ + ... 0, # ----- row 1 start + ... 1, # ----- row 2 start + ... 4, # ----- row 3 start + ... None, # -- row 4 start + ... None, # -- row 5 start + ... 6, # ----- row 5 end + ... ] + >>> pa.MapArray.from_arrays(offsets, movies, likings).to_pandas() + 0 [(Dark Knight, 10)] + 1 [(Dark Knight, 8), (Meet the Parents, 4), (Sup... + 2 [(Meet the Parents, 10), (Superman, 3)] + 3 None + 4 None + dtype: object """ cdef: Array _offsets, _keys, _items diff --git a/python/pyarrow/compat.pxi b/python/pyarrow/compat.pxi index 98aa1f2433ef0..8cf106d5609b5 100644 --- a/python/pyarrow/compat.pxi +++ b/python/pyarrow/compat.pxi @@ -33,16 +33,10 @@ def encode_file_path(path): ordered_dict = dict -try: - import pickle5 as builtin_pickle -except ImportError: - import pickle as builtin_pickle - - try: import cloudpickle as pickle except ImportError: - pickle = builtin_pickle + import pickle def tobytes(o): diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py index 567bea8ac05e8..36655c7d12863 100644 --- a/python/pyarrow/fs.py +++ b/python/pyarrow/fs.py @@ -356,7 +356,12 @@ def get_file_info_selector(self, selector): selector.base_dir, maxdepth=maxdepth, withdirs=True, detail=True ) for path, info in selected_files.items(): - infos.append(self._create_file_info(path, info)) + _path = path.strip("/") + base_dir = selector.base_dir.strip("/") + # Need to exclude base directory from selected files if present + # (fsspec filesystems, see GH-37555) + if _path != base_dir: + infos.append(self._create_file_info(path, info)) return infos diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index e3018ab4704f0..460e932b86273 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -21,6 +21,7 @@ from libc.stdlib cimport malloc, free import codecs +import pickle import re import sys import threading @@ -1368,7 +1369,7 @@ cdef class Buffer(_Weakrefable): def __reduce_ex__(self, protocol): if protocol >= 5: - bufobj = builtin_pickle.PickleBuffer(self) + bufobj = pickle.PickleBuffer(self) elif self.buffer.get().is_mutable(): # Need to pass a bytearray to recreate a mutable buffer when # unpickling. diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index 12f1cc431293c..4e5c868efd4c8 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -26,13 +26,14 @@ from itertools import zip_longest import json import operator +import pickle import re import warnings import numpy as np import pyarrow as pa -from pyarrow.lib import _pandas_api, builtin_pickle, frombytes # noqa +from pyarrow.lib import _pandas_api, frombytes # noqa _logical_type_map = {} @@ -720,7 +721,7 @@ def _reconstruct_block(item, columns=None, extension_columns=None): klass=_int.DatetimeTZBlock, dtype=dtype) elif 'object' in item: - block = _int.make_block(builtin_pickle.loads(block_arr), + block = _int.make_block(pickle.loads(block_arr), placement=placement) elif 'py_array' in item: # create ExtensionBlock diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index ffaebd2418a58..9f8b347d56294 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -19,6 +19,7 @@ from cpython.pycapsule cimport PyCapsule_CheckExact, PyCapsule_GetPointer import atexit from collections.abc import Mapping +import pickle import re import sys import warnings @@ -1699,12 +1700,12 @@ cdef class PyExtensionType(ExtensionType): .format(type(self).__name__)) def __arrow_ext_serialize__(self): - return builtin_pickle.dumps(self) + return pickle.dumps(self) @classmethod def __arrow_ext_deserialize__(cls, storage_type, serialized): try: - ty = builtin_pickle.loads(serialized) + ty = pickle.loads(serialized) except Exception: # For some reason, it's impossible to deserialize the # ExtensionType instance. Perhaps the serialized data is diff --git a/python/requirements-wheel-test.txt b/python/requirements-wheel-test.txt index c23a30f70e838..9de0acb754079 100644 --- a/python/requirements-wheel-test.txt +++ b/python/requirements-wheel-test.txt @@ -1,7 +1,6 @@ cffi cython hypothesis -pickle5; platform_system != "Windows" and python_version < "3.8" pytest pytest-lazy-fixture pytz diff --git a/r/NEWS.md b/r/NEWS.md index d80efbf8de18e..2e2db1ad5d3fa 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -19,6 +19,10 @@ # arrow 13.0.0.9000 +# arrow 13.0.0.1 + +* Remove reference to legacy timezones to prevent CRAN check failures (#37671) + # arrow 13.0.0 ## Breaking changes diff --git a/r/configure b/r/configure index 0586cbe071007..593f60bbddad9 100755 --- a/r/configure +++ b/r/configure @@ -278,6 +278,16 @@ do_autobrew () { # Once libarrow is obtained, this function sets `PKG_LIBS`, `PKG_DIRS`, and `PKG_CFLAGS` # either from pkg-config or by inferring things about the directory in $1 set_pkg_vars () { + if [ "$PKG_CONFIG_AVAILABLE" = "true" ]; then + set_lib_dir_with_pc + else + set_lib_dir_without_pc $1 + fi + + # Check cmake options for enabled features. This uses LIB_DIR that + # is set by the above set_lib_dir_* call. + add_feature_flags + if [ "$PKG_CONFIG_AVAILABLE" = "true" ]; then set_pkg_vars_with_pc else @@ -288,23 +298,27 @@ set_pkg_vars () { if [ "$ARROW_R_CXXFLAGS" ]; then PKG_CFLAGS="$PKG_CFLAGS $ARROW_R_CXXFLAGS" fi - - # Finally, check cmake options for enabled features - add_feature_flags } # If we have pkg-config, it will tell us what libarrow needs -set_pkg_vars_with_pc () { +set_lib_dir_with_pc () { LIB_DIR="`${PKG_CONFIG} --variable=libdir --silence-errors ${PKG_CONFIG_NAME}`" - PKG_CFLAGS="`${PKG_CONFIG} --cflags --silence-errors ${PKG_CONFIG_NAME}` $PKG_CFLAGS" - PKG_LIBS=`${PKG_CONFIG} --libs-only-l --libs-only-other --silence-errors ${PKG_CONFIG_NAME}` - PKG_DIRS=`${PKG_CONFIG} --libs-only-L --silence-errors ${PKG_CONFIG_NAME}` +} +set_pkg_vars_with_pc () { + pkg_config_names="${PKG_CONFIG_NAME} ${PKG_CONFIG_NAMES_FEATURES}" + PKG_CFLAGS="`${PKG_CONFIG} --cflags --silence-errors ${pkg_config_names}` $PKG_CFLAGS" + PKG_CFLAGS="$PKG_CFLAGS $PKG_CFLAGS_FEATURES" + PKG_LIBS=`${PKG_CONFIG} --libs-only-l --libs-only-other --silence-errors ${pkg_config_names}` + PKG_LIBS="$PKG_LIBS $PKG_LIBS_FEATURES" + PKG_DIRS=`${PKG_CONFIG} --libs-only-L --silence-errors ${pkg_config_names}` } # If we don't have pkg-config, we can make some inferences -set_pkg_vars_without_pc () { +set_lib_dir_without_pc () { LIB_DIR="$1/lib" - PKG_CFLAGS="-I$1/include $PKG_CFLAGS" +} +set_pkg_vars_without_pc () { + PKG_CFLAGS="-I$1/include $PKG_CFLAGS $PKG_CFLAGS_FEATURES" if grep -q "_GLIBCXX_USE_CXX11_ABI=0" "${LIB_DIR}/pkgconfig/arrow.pc"; then PKG_CFLAGS="${PKG_CFLAGS} -D_GLIBCXX_USE_CXX11_ABI=0" fi @@ -312,10 +326,12 @@ set_pkg_vars_without_pc () { if [ "${OPENSSL_ROOT_DIR}" != "" ]; then PKG_DIRS="${PKG_DIRS} -L${OPENSSL_ROOT_DIR}/lib" fi - PKG_LIBS="-larrow" + PKG_LIBS="$PKG_LIBS_FEATURES_WITHOUT_PC" + PKG_LIBS="$PKG_LIBS -larrow" if [ -n "$(find "$LIB_DIR" -name 'libarrow_bundled_dependencies.*')" ]; then PKG_LIBS="$PKG_LIBS -larrow_bundled_dependencies" fi + PKG_LIBS="$PKG_LIBS $PKG_LIBS_FEATURES" # If on Raspberry Pi, need to manually link against latomic # See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81358 for similar example @@ -327,6 +343,11 @@ set_pkg_vars_without_pc () { } add_feature_flags () { + PKG_CFLAGS_FEATURES="" + PKG_CONFIG_NAMES_FEATURES="" + PKG_LIBS_FEATURES="" + PKG_LIBS_FEATURES_WITHOUT_PC="" + # Now we need to check what features it was built with and enable # the corresponding feature flags in the R bindings (-DARROW_R_WITH_stuff). # We do this by inspecting ArrowOptions.cmake, which the libarrow build @@ -336,39 +357,43 @@ add_feature_flags () { echo "*** $ARROW_OPTS_CMAKE not found; some features will not be enabled" else if arrow_built_with ARROW_PARQUET; then - PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_PARQUET" - PKG_LIBS="-lparquet $PKG_LIBS" + PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_PARQUET" + PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES parquet" + PKG_LIBS_FEATURES_WITHOUT_PC="-lparquet $PKG_LIBS_FEATURES_WITHOUT_PC" # NOTE: parquet is assumed to have the same -L flag as arrow # so there is no need to add its location to PKG_DIRS fi if arrow_built_with ARROW_DATASET; then - PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_DATASET" - PKG_LIBS="-larrow_dataset $PKG_LIBS" - # NOTE: arrow-dataset is assumed to have the same -L flag as arrow + PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_DATASET" + PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-dataset" + PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_dataset $PKG_LIBS_FEATURES_WITHOUT_PC" + # NOTE: arrow_dataset is assumed to have the same -L flag as arrow # so there is no need to add its location to PKG_DIRS fi if arrow_built_with ARROW_ACERO; then - PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_ACERO" - PKG_LIBS="-larrow_acero $PKG_LIBS" - # NOTE: arrow-acero is assumed to have the same -L flag as arrow + PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_ACERO" + PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-acero" + PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_acero $PKG_LIBS_FEATURES_WITHOUT_PC" + # NOTE: arrow_acero is assumed to have the same -L flag as arrow # so there is no need to add its location to PKG_DIRS fi if arrow_built_with ARROW_SUBSTRAIT; then - PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_SUBSTRAIT" - PKG_LIBS="-larrow_substrait $PKG_LIBS" - # NOTE: arrow-substrait is assumed to have the same -L flag as arrow + PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_SUBSTRAIT" + PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-substrait" + PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_substrait $PKG_LIBS_FEATURES_WITHOUT_PC" + # NOTE: arrow_substrait is assumed to have the same -L flag as arrow # so there is no need to add its location to PKG_DIRS fi if arrow_built_with ARROW_JSON; then - PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_JSON" + PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_JSON" fi if arrow_built_with ARROW_S3; then - PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_S3" - PKG_LIBS="$PKG_LIBS $S3_LIBS" + PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_S3" + PKG_LIBS_FEATURES="$PKG_LIBS_FEATURES $S3_LIBS" fi if arrow_built_with ARROW_GCS; then - PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_GCS" - PKG_LIBS="$PKG_LIBS $GCS_LIBS" + PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_GCS" + PKG_LIBS_FEATURES="$PKG_LIBS_FEATURES $GCS_LIBS" fi fi } diff --git a/r/src/memorypool.cpp b/r/src/memorypool.cpp index 027aa8ef2aa8d..696e913eadc70 100644 --- a/r/src/memorypool.cpp +++ b/r/src/memorypool.cpp @@ -16,8 +16,8 @@ // under the License. #include -#include #include "./arrow_types.h" +#include "./safe-call-into-r.h" class GcMemoryPool : public arrow::MemoryPool { public: @@ -59,17 +59,17 @@ class GcMemoryPool : public arrow::MemoryPool { if (call().ok()) { return arrow::Status::OK(); } else { - auto lock = mutex_.Lock(); - // ARROW-10080: Allocation may fail spuriously since the garbage collector is lazy. // Force it to run then try again in case any reusable allocations have been freed. - static cpp11::function gc = cpp11::package("base")["gc"]; - gc(); + arrow::Status r_call = SafeCallIntoRVoid([] { + cpp11::function gc = cpp11::package("base")["gc"]; + gc(); + }); + ARROW_RETURN_NOT_OK(r_call); + return call(); } - return call(); } - arrow::util::Mutex mutex_; arrow::MemoryPool* pool_; }; diff --git a/r/tests/testthat/helper-data.R b/r/tests/testthat/helper-data.R index 1088be6850143..0631cfccae3fc 100644 --- a/r/tests/testthat/helper-data.R +++ b/r/tests/testthat/helper-data.R @@ -59,9 +59,9 @@ haven_data <- tibble::tibble( example_with_times <- tibble::tibble( date = Sys.Date() + 1:10, posixct = lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10, - posixct_tz = lubridate::ymd_hms("2018-10-07 19:04:05", tz = "US/Eastern") + 1:10, + posixct_tz = lubridate::ymd_hms("2018-10-07 19:04:05", tz = "America/New_York") + 1:10, posixlt = as.POSIXlt(lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10), - posixlt_tz = as.POSIXlt(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "US/Eastern") + 1:10) + posixlt_tz = as.POSIXlt(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "America/New_York") + 1:10) ) verses <- list( diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 960faa8bb751b..b29c1f4e09dde 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -283,8 +283,8 @@ test_that("array supports POSIXct (ARROW-3340)", { times[5] <- NA expect_array_roundtrip(times, timestamp("us", "UTC")) - times2 <- lubridate::ymd_hms("2018-10-07 19:04:05", tz = "US/Eastern") + 1:10 - expect_array_roundtrip(times2, timestamp("us", "US/Eastern")) + times2 <- lubridate::ymd_hms("2018-10-07 19:04:05", tz = "America/New_York") + 1:10 + expect_array_roundtrip(times2, timestamp("us", "America/New_York")) }) test_that("array uses local timezone for POSIXct without timezone", { diff --git a/r/tests/testthat/test-arrow.R b/r/tests/testthat/test-arrow.R index 071a5ad3d982c..c6ae27ac52296 100644 --- a/r/tests/testthat/test-arrow.R +++ b/r/tests/testthat/test-arrow.R @@ -64,6 +64,6 @@ test_that("MemoryPool calls gc() to free memory when allocation fails (ARROW-100 on.exit(suppressMessages(untrace(gc))) # We expect this should fail because we don't have this much memory, # but it should gc() and retry (and fail again) - expect_error(BufferOutputStream$create(2**60)) + expect_error(BufferOutputStream$create(2**60), "Out of memory") expect_true(env$gc_was_called) }) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index bcd2584851b70..e707a194a3626 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -3606,7 +3606,7 @@ test_that("with_tz() and force_tz() works", { "2012-01-01 01:02:03" ), tz = "UTC") - timestamps_non_utc <- force_tz(timestamps, "US/Central") + timestamps_non_utc <- force_tz(timestamps, "America/Chicago") nonexistent <- as_datetime(c( "2015-03-29 02:30:00", @@ -3622,10 +3622,10 @@ test_that("with_tz() and force_tz() works", { .input %>% mutate( timestamps_with_tz_1 = with_tz(timestamps, "UTC"), - timestamps_with_tz_2 = with_tz(timestamps, "US/Central"), + timestamps_with_tz_2 = with_tz(timestamps, "America/Chicago"), timestamps_with_tz_3 = with_tz(timestamps, "Asia/Kolkata"), timestamps_force_tz_1 = force_tz(timestamps, "UTC"), - timestamps_force_tz_2 = force_tz(timestamps, "US/Central"), + timestamps_force_tz_2 = force_tz(timestamps, "America/Chicago"), timestamps_force_tz_3 = force_tz(timestamps, "Asia/Kolkata") ) %>% collect(), @@ -3636,7 +3636,7 @@ test_that("with_tz() and force_tz() works", { .input %>% mutate( timestamps_with_tz_1 = with_tz(timestamps, "UTC"), - timestamps_with_tz_2 = with_tz(timestamps, "US/Central"), + timestamps_with_tz_2 = with_tz(timestamps, "America/Chicago"), timestamps_with_tz_3 = with_tz(timestamps, "Asia/Kolkata") ) %>% collect(), @@ -3733,17 +3733,17 @@ test_that("with_tz() and force_tz() can add timezone to timestamp without timezo expect_equal( arrow_table(timestamps = timestamps) %>% - mutate(timestamps = with_tz(timestamps, "US/Central")) %>% + mutate(timestamps = with_tz(timestamps, "America/Chicago")) %>% compute(), - arrow_table(timestamps = timestamps$cast(timestamp("s", "US/Central"))) + arrow_table(timestamps = timestamps$cast(timestamp("s", "America/Chicago"))) ) expect_equal( arrow_table(timestamps = timestamps) %>% - mutate(timestamps = force_tz(timestamps, "US/Central")) %>% + mutate(timestamps = force_tz(timestamps, "America/Chicago")) %>% compute(), arrow_table( - timestamps = call_function("assume_timezone", timestamps, options = list(timezone = "US/Central")) + timestamps = call_function("assume_timezone", timestamps, options = list(timezone = "America/Chicago")) ) ) }) diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index dca277c80948c..3d908c05cab07 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -473,17 +473,25 @@ build_libarrow <- function(src_dir, dst_dir) { env_vars <- env_vars_as_string(env_var_list) cat("**** arrow", ifelse(quietly, "", paste("with", env_vars)), "\n") - status <- suppressWarnings(system( - paste(env_vars, "inst/build_arrow_static.sh"), - ignore.stdout = quietly, ignore.stderr = quietly + + build_log_path <- tempfile(fileext = ".log") + status <- suppressWarnings(system2( + "bash", + "inst/build_arrow_static.sh", + env = env_vars, + stdout = ifelse(quietly, build_log_path, ""), + stderr = ifelse(quietly, build_log_path, "") )) + if (status != 0) { # It failed :( - cat( - "**** Error building Arrow C++.", - ifelse(env_is("ARROW_R_DEV", "true"), "", "Re-run with ARROW_R_DEV=true for debug information."), - "\n" - ) + cat("**** Error building Arrow C++.", "\n") + if (quietly) { + cat("**** Printing contents of build log because the build failed", + "while ARROW_R_DEV was set to FALSE\n") + cat(readLines(build_log_path), sep = "\n") + cat("**** Complete build log may still be present at", build_log_path, "\n") + } } invisible(status) } diff --git a/ruby/red-arrow/ext/arrow/raw-records.cpp b/ruby/red-arrow/ext/arrow/raw-records.cpp index 0043ca3aaf2cc..bbe421971d11a 100644 --- a/ruby/red-arrow/ext/arrow/raw-records.cpp +++ b/ruby/red-arrow/ext/arrow/raw-records.cpp @@ -305,9 +305,10 @@ namespace red_arrow { } VALUE - record_batch_each_raw_record(VALUE rb_record_batch){ + record_batch_each_raw_record(VALUE rb_record_batch) { auto garrow_record_batch = GARROW_RECORD_BATCH(RVAL2GOBJ(rb_record_batch)); auto record_batch = garrow_record_batch_get_raw(garrow_record_batch).get(); + RETURN_SIZED_ENUMERATOR(rb_record_batch, 0, nullptr, record_batch->num_rows()); try { RawRecordsProducer producer; @@ -323,6 +324,7 @@ namespace red_arrow { table_each_raw_record(VALUE rb_table) { auto garrow_table = GARROW_TABLE(RVAL2GOBJ(rb_table)); auto table = garrow_table_get_raw(garrow_table).get(); + RETURN_SIZED_ENUMERATOR(rb_table, 0, nullptr, table->num_rows()); try { RawRecordsProducer producer; diff --git a/ruby/red-arrow/test/each-raw-record/test-basic-arrays.rb b/ruby/red-arrow/test/each-raw-record/test-basic-arrays.rb index dbbbd79ee0acb..d32031dec4ec0 100644 --- a/ruby/red-arrow/test/each-raw-record/test-basic-arrays.rb +++ b/ruby/red-arrow/test/each-raw-record/test-basic-arrays.rb @@ -22,12 +22,8 @@ def test_null [nil], [nil], ] - iterated_records = [] target = build({column: :null}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_boolean @@ -36,12 +32,8 @@ def test_boolean [nil], [false], ] - iterated_records = [] target = build({column: :boolean}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int8 @@ -50,12 +42,8 @@ def test_int8 [nil], [(2 ** 7) - 1], ] - iterated_records = [] target = build({column: :int8}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint8 @@ -64,12 +52,8 @@ def test_uint8 [nil], [(2 ** 8) - 1], ] - iterated_records = [] target = build({column: :uint8}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int16 @@ -78,12 +62,8 @@ def test_int16 [nil], [(2 ** 15) - 1], ] - iterated_records = [] target = build({column: :int16}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint16 @@ -92,12 +72,8 @@ def test_uint16 [nil], [(2 ** 16) - 1], ] - iterated_records = [] target = build({column: :uint16}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int32 @@ -106,12 +82,8 @@ def test_int32 [nil], [(2 ** 31) - 1], ] - iterated_records = [] target = build({column: :int32}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint32 @@ -120,12 +92,8 @@ def test_uint32 [nil], [(2 ** 32) - 1], ] - iterated_records = [] target = build({column: :uint32}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int64 @@ -134,12 +102,8 @@ def test_int64 [nil], [(2 ** 63) - 1], ] - iterated_records = [] target = build({column: :int64}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint64 @@ -148,12 +112,8 @@ def test_uint64 [nil], [(2 ** 64) - 1], ] - iterated_records = [] target = build({column: :uint64}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_half_float @@ -162,12 +122,8 @@ def test_half_float [nil], [1.5], ] - iterated_records = [] target = build({column: :half_float}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_float @@ -176,12 +132,8 @@ def test_float [nil], [1.0], ] - iterated_records = [] target = build({column: :float}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_double @@ -190,12 +142,8 @@ def test_double [nil], [1.0], ] - iterated_records = [] target = build({column: :double}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_binary @@ -204,12 +152,8 @@ def test_binary [nil], ["\xff".b], ] - iterated_records = [] target = build({column: :binary}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_string @@ -218,12 +162,8 @@ def test_string [nil], ["\u3042"], # U+3042 HIRAGANA LETTER A ] - iterated_records = [] target = build({column: :string}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_date32 @@ -232,12 +172,8 @@ def test_date32 [nil], [Date.new(2017, 8, 23)], ] - iterated_records = [] target = build({column: :date32}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_date64 @@ -246,12 +182,8 @@ def test_date64 [nil], [DateTime.new(2017, 8, 23, 14, 57, 2)], ] - iterated_records = [] target = build({column: :date64}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_second @@ -260,7 +192,6 @@ def test_timestamp_second [nil], [Time.parse("2017-08-23T14:57:02Z")], ] - iterated_records = [] target = build({ column: { type: :timestamp, @@ -268,10 +199,7 @@ def test_timestamp_second } }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_milli @@ -280,7 +208,6 @@ def test_timestamp_milli [nil], [Time.parse("2017-08-23T14:57:02.987Z")], ] - iterated_records = [] target = build({ column: { type: :timestamp, @@ -288,10 +215,7 @@ def test_timestamp_milli } }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_micro @@ -300,7 +224,6 @@ def test_timestamp_micro [nil], [Time.parse("2017-08-23T14:57:02.987654Z")], ] - iterated_records = [] target = build({ column: { type: :timestamp, @@ -308,10 +231,7 @@ def test_timestamp_micro } }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_nano @@ -320,7 +240,6 @@ def test_timestamp_nano [nil], [Time.parse("2017-08-23T14:57:02.987654321Z")], ] - iterated_records = [] target = build({ column: { type: :timestamp, @@ -328,10 +247,7 @@ def test_timestamp_nano } }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time32_second @@ -341,7 +257,6 @@ def test_time32_second [nil], [Arrow::Time.new(unit, 60 * 60 * 2 + 9)], # 02:00:09 ] - iterated_records = [] target = build({ column: { type: :time32, @@ -349,10 +264,7 @@ def test_time32_second } }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time32_milli @@ -362,7 +274,6 @@ def test_time32_milli [nil], [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987)], # 02:00:09.987 ] - iterated_records = [] target = build({ column: { type: :time32, @@ -370,10 +281,7 @@ def test_time32_milli } }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time64_micro @@ -385,7 +293,6 @@ def test_time64_micro # 02:00:09.987654 [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654)], ] - iterated_records = [] target = build({ column: { type: :time64, @@ -393,10 +300,7 @@ def test_time64_micro } }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time64_nano @@ -408,7 +312,6 @@ def test_time64_nano # 02:00:09.987654321 [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321)], ] - iterated_records = [] target = build({ column: { type: :time64, @@ -416,10 +319,7 @@ def test_time64_nano } }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_decimal128 @@ -428,7 +328,6 @@ def test_decimal128 [nil], [BigDecimal("29.29")], ] - iterated_records = [] target = build({ column: { type: :decimal128, @@ -437,10 +336,7 @@ def test_decimal128 } }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_decimal256 @@ -449,7 +345,6 @@ def test_decimal256 [nil], [BigDecimal("29.29")], ] - iterated_records = [] target = build({ column: { type: :decimal256, @@ -458,10 +353,7 @@ def test_decimal256 } }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_month_interval @@ -470,12 +362,8 @@ def test_month_interval [nil], [12], ] - iterated_records = [] target = build({column: :month_interval}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_day_time_interval @@ -484,12 +372,8 @@ def test_day_time_interval [nil], [{day: 2, millisecond: 300}], ] - iterated_records = [] target = build({column: :day_time_interval}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_month_day_nano_interval @@ -498,12 +382,8 @@ def test_month_day_nano_interval [nil], [{month: 2, day: 3, nanosecond: 400}], ] - iterated_records = [] target = build({column: :month_day_nano_interval}, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end end diff --git a/ruby/red-arrow/test/each-raw-record/test-dense-union-array.rb b/ruby/red-arrow/test/each-raw-record/test-dense-union-array.rb index 7c784cccde3a1..b75ce4b4e00d5 100644 --- a/ruby/red-arrow/test/each-raw-record/test-dense-union-array.rb +++ b/ruby/red-arrow/test/each-raw-record/test-dense-union-array.rb @@ -92,13 +92,9 @@ def test_null records = [ [{"0" => nil}], ] - iterated_records = [] target = build(:null, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_boolean @@ -106,13 +102,9 @@ def test_boolean [{"0" => true}], [{"1" => nil}], ] - iterated_records = [] target = build(:boolean, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_int8 @@ -120,13 +112,9 @@ def test_int8 [{"0" => -(2 ** 7)}], [{"1" => nil}], ] - iterated_records = [] target = build(:int8, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_uint8 @@ -134,13 +122,9 @@ def test_uint8 [{"0" => (2 ** 8) - 1}], [{"1" => nil}], ] - iterated_records = [] target = build(:uint8, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_int16 @@ -148,13 +132,9 @@ def test_int16 [{"0" => -(2 ** 15)}], [{"1" => nil}], ] - iterated_records = [] target = build(:int16, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_uint16 @@ -162,13 +142,9 @@ def test_uint16 [{"0" => (2 ** 16) - 1}], [{"1" => nil}], ] - iterated_records = [] target = build(:uint16, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_int32 @@ -176,13 +152,9 @@ def test_int32 [{"0" => -(2 ** 31)}], [{"1" => nil}], ] - iterated_records = [] target = build(:int32, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_uint32 @@ -190,13 +162,9 @@ def test_uint32 [{"0" => (2 ** 32) - 1}], [{"1" => nil}], ] - iterated_records = [] target = build(:uint32, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_int64 @@ -204,13 +172,9 @@ def test_int64 [{"0" => -(2 ** 63)}], [{"1" => nil}], ] - iterated_records = [] target = build(:int64, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_uint64 @@ -218,13 +182,9 @@ def test_uint64 [{"0" => (2 ** 64) - 1}], [{"1" => nil}], ] - iterated_records = [] target = build(:uint64, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_float @@ -232,13 +192,9 @@ def test_float [{"0" => -1.0}], [{"1" => nil}], ] - iterated_records = [] target = build(:float, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_double @@ -246,13 +202,9 @@ def test_double [{"0" => -1.0}], [{"1" => nil}], ] - iterated_records = [] target = build(:double, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_binary @@ -260,13 +212,9 @@ def test_binary [{"0" => "\xff".b}], [{"1" => nil}], ] - iterated_records = [] target = build(:binary, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_string @@ -274,13 +222,9 @@ def test_string [{"0" => "Ruby"}], [{"1" => nil}], ] - iterated_records = [] target = build(:string, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_date32 @@ -288,13 +232,9 @@ def test_date32 [{"0" => Date.new(1960, 1, 1)}], [{"1" => nil}], ] - iterated_records = [] target = build(:date32, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - target.raw_records) + target.each_raw_record.to_a) end def test_date64 @@ -302,13 +242,9 @@ def test_date64 [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}], [{"1" => nil}], ] - iterated_records = [] target = build(:date64, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_timestamp_second @@ -316,17 +252,13 @@ def test_timestamp_second [{"0" => Time.parse("1960-01-01T02:09:30Z")}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :timestamp, unit: :second, }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_timestamp_milli @@ -334,17 +266,13 @@ def test_timestamp_milli [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :timestamp, unit: :milli, }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_timestamp_micro @@ -352,17 +280,13 @@ def test_timestamp_micro [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :timestamp, unit: :micro, }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_timestamp_nano @@ -370,17 +294,13 @@ def test_timestamp_nano [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :timestamp, unit: :nano, }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_time32_second @@ -390,17 +310,13 @@ def test_time32_second [{"0" => Arrow::Time.new(unit, 60 * 10)}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :time32, unit: :second, }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_time32_milli @@ -410,17 +326,13 @@ def test_time32_milli [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :time32, unit: :milli, }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_time64_micro @@ -430,17 +342,13 @@ def test_time64_micro [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :time64, unit: :micro, }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_time64_nano @@ -450,17 +358,13 @@ def test_time64_nano [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :time64, unit: :nano, }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_decimal128 @@ -468,18 +372,14 @@ def test_decimal128 [{"0" => BigDecimal("92.92")}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :decimal128, precision: 8, scale: 2, }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_decimal256 @@ -487,18 +387,14 @@ def test_decimal256 [{"0" => BigDecimal("92.92")}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :decimal256, precision: 38, scale: 2, }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_month_interval @@ -506,13 +402,9 @@ def test_month_interval [{"0" => 1}], [{"1" => nil}], ] - iterated_records = [] target = build(:month_interval, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_day_time_interval @@ -520,13 +412,9 @@ def test_day_time_interval [{"0" => {day: 1, millisecond: 100}}], [{"1" => nil}], ] - iterated_records = [] target = build(:day_time_interval, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_month_day_nano_interval @@ -534,13 +422,9 @@ def test_month_day_nano_interval [{"0" => {month: 1, day: 1, nanosecond: 100}}], [{"1" => nil}], ] - iterated_records = [] target = build(:month_day_nano_interval, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_list @@ -548,7 +432,6 @@ def test_list [{"0" => [true, nil, false]}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :list, field: { @@ -557,11 +440,8 @@ def test_list }, }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_struct @@ -570,7 +450,6 @@ def test_struct [{"1" => nil}], [{"0" => {"sub_field" => nil}}], ] - iterated_records = [] target = build({ type: :struct, fields: [ @@ -581,11 +460,8 @@ def test_struct ], }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_map @@ -593,18 +469,14 @@ def test_map [{"0" => {"key1" => true, "key2" => nil}}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :map, key: :string, item: :boolean, }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_sparse_union @@ -614,7 +486,6 @@ def test_sparse_union [{"0" => {"field2" => 29}}], [{"0" => {"field2" => nil}}], ] - iterated_records = [] target = build({ type: :sparse_union, fields: [ @@ -630,11 +501,8 @@ def test_sparse_union type_codes: [0, 1], }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(remove_field_names(records)), - iterated_records) + target.each_raw_record.to_a) end def test_dense_union @@ -644,7 +512,6 @@ def test_dense_union [{"0" => {"field2" => 29}}], [{"0" => {"field2" => nil}}], ] - iterated_records = [] target = build({ type: :dense_union, fields: [ @@ -660,11 +527,8 @@ def test_dense_union type_codes: [0, 1], }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(remove_field_names(records)), - iterated_records) + target.each_raw_record.to_a) end def test_dictionary @@ -673,7 +537,6 @@ def test_dictionary [{"1" => nil}], [{"0" => "GLib"}], ] - iterated_records = [] target = build({ type: :dictionary, index_data_type: :int8, @@ -681,11 +544,8 @@ def test_dictionary ordered: false, }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_field_names(records), - iterated_records) + target.each_raw_record.to_a) end end diff --git a/ruby/red-arrow/test/each-raw-record/test-dictionary-array.rb b/ruby/red-arrow/test/each-raw-record/test-dictionary-array.rb index edc6c33cc091d..f85294b30aab8 100644 --- a/ruby/red-arrow/test/each-raw-record/test-dictionary-array.rb +++ b/ruby/red-arrow/test/each-raw-record/test-dictionary-array.rb @@ -29,12 +29,8 @@ def test_null [nil], [nil], ] - iterated_records = [] target = build(Arrow::NullArray.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_boolean @@ -43,12 +39,8 @@ def test_boolean [nil], [false], ] - iterated_records = [] target = build(Arrow::BooleanArray.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int8 @@ -57,12 +49,8 @@ def test_int8 [nil], [(2 ** 7) - 1], ] - iterated_records = [] target = build(Arrow::Int8Array.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint8 @@ -71,12 +59,8 @@ def test_uint8 [nil], [(2 ** 8) - 1], ] - iterated_records = [] target = build(Arrow::UInt8Array.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int16 @@ -85,12 +69,8 @@ def test_int16 [nil], [(2 ** 15) - 1], ] - iterated_records = [] target = build(Arrow::Int16Array.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint16 @@ -99,12 +79,8 @@ def test_uint16 [nil], [(2 ** 16) - 1], ] - iterated_records = [] target = build(Arrow::UInt16Array.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int32 @@ -113,12 +89,8 @@ def test_int32 [nil], [(2 ** 31) - 1], ] - iterated_records = [] target = build(Arrow::Int32Array.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint32 @@ -127,12 +99,8 @@ def test_uint32 [nil], [(2 ** 32) - 1], ] - iterated_records = [] target = build(Arrow::UInt32Array.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int64 @@ -141,12 +109,8 @@ def test_int64 [nil], [(2 ** 63) - 1], ] - iterated_records = [] target = build(Arrow::Int64Array.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint64 @@ -155,12 +119,8 @@ def test_uint64 [nil], [(2 ** 64) - 1], ] - iterated_records = [] target = build(Arrow::UInt64Array.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_float @@ -169,12 +129,8 @@ def test_float [nil], [1.0], ] - iterated_records = [] target = build(Arrow::FloatArray.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_double @@ -183,12 +139,8 @@ def test_double [nil], [1.0], ] - iterated_records = [] target = build(Arrow::DoubleArray.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_binary @@ -197,12 +149,8 @@ def test_binary [nil], ["\xff".b], ] - iterated_records = [] target = build(Arrow::BinaryArray.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_string @@ -211,12 +159,8 @@ def test_string [nil], ["\u3042"], # U+3042 HIRAGANA LETTER A ] - iterated_records = [] - target = build(Arrow::StringArray.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + target = build(Arrow::StringArray.new(records.collect(&:first))) + assert_equal(records, target.each_raw_record.to_a) end def test_date32 @@ -225,12 +169,8 @@ def test_date32 [nil], [Date.new(2017, 8, 23)], ] - iterated_records = [] target = build(Arrow::Date32Array.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_date64 @@ -239,12 +179,8 @@ def test_date64 [nil], [DateTime.new(2017, 8, 23, 14, 57, 2)], ] - iterated_records = [] target = build(Arrow::Date64Array.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_second @@ -253,12 +189,8 @@ def test_timestamp_second [nil], [Time.parse("2017-08-23T14:57:02Z")], ] - iterated_records = [] target = build(Arrow::TimestampArray.new(:second, records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_milli @@ -267,12 +199,8 @@ def test_timestamp_milli [nil], [Time.parse("2017-08-23T14:57:02.987Z")], ] - iterated_records = [] target = build(Arrow::TimestampArray.new(:milli, records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_micro @@ -281,12 +209,8 @@ def test_timestamp_micro [nil], [Time.parse("2017-08-23T14:57:02.987654Z")], ] - iterated_records = [] target = build(Arrow::TimestampArray.new(:micro, records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_nano @@ -295,12 +219,8 @@ def test_timestamp_nano [nil], [Time.parse("2017-08-23T14:57:02.987654321Z")], ] - iterated_records = [] target = build(Arrow::TimestampArray.new(:nano, records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time32_second @@ -310,12 +230,8 @@ def test_time32_second [nil], [Arrow::Time.new(unit, 60 * 60 * 2 + 9)], # 02:00:09 ] - iterated_records = [] target = build(Arrow::Time32Array.new(unit, records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time32_milli @@ -325,12 +241,8 @@ def test_time32_milli [nil], [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987)], # 02:00:09.987 ] - iterated_records = [] target = build(Arrow::Time32Array.new(unit, records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time64_micro @@ -342,12 +254,8 @@ def test_time64_micro # 02:00:09.987654 [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654)], ] - iterated_records = [] target = build(Arrow::Time64Array.new(unit, records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time64_nano @@ -359,12 +267,8 @@ def test_time64_nano # 02:00:09.987654321 [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321)], ] - iterated_records = [] target = build(Arrow::Time64Array.new(unit, records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_decimal128 @@ -373,13 +277,9 @@ def test_decimal128 [nil], [BigDecimal("29.29")], ] - iterated_records = [] data_type = Arrow::Decimal128DataType.new(8, 2) target = build(Arrow::Decimal128Array.new(data_type, records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_decimal256 @@ -388,13 +288,9 @@ def test_decimal256 [nil], [BigDecimal("29.29")], ] - iterated_records = [] data_type = Arrow::Decimal256DataType.new(38, 2) target = build(Arrow::Decimal256Array.new(data_type, records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, target.raw_records) + assert_equal(records, target.each_raw_record.to_a) end def test_month_interval @@ -403,12 +299,8 @@ def test_month_interval [nil], [12], ] - iterated_records = [] target = build(Arrow::MonthIntervalArray.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_day_time_interval @@ -417,12 +309,8 @@ def test_day_time_interval [nil], [{day: 2, millisecond: 300}], ] - iterated_records = [] target = build(Arrow::DayTimeIntervalArray.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_month_day_nano_interval @@ -431,12 +319,8 @@ def test_month_day_nano_interval [nil], [{month: 2, day: 3, nanosecond: 400}], ] - iterated_records = [] target = build(Arrow::MonthDayNanoIntervalArray.new(records.collect(&:first))) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end end diff --git a/ruby/red-arrow/test/each-raw-record/test-list-array.rb b/ruby/red-arrow/test/each-raw-record/test-list-array.rb index 64cc5839bd757..f6f92abf6679e 100644 --- a/ruby/red-arrow/test/each-raw-record/test-list-array.rb +++ b/ruby/red-arrow/test/each-raw-record/test-list-array.rb @@ -38,12 +38,8 @@ def test_null [[nil, nil, nil]], [nil], ] - iterated_records = [] target = build(:null, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_boolean @@ -51,12 +47,8 @@ def test_boolean [[true, nil, false]], [nil], ] - iterated_records = [] target = build(:boolean, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int8 @@ -64,12 +56,8 @@ def test_int8 [[-(2 ** 7), nil, (2 ** 7) - 1]], [nil], ] - iterated_records = [] target = build(:int8, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint8 @@ -77,12 +65,8 @@ def test_uint8 [[0, nil, (2 ** 8) - 1]], [nil], ] - iterated_records = [] target = build(:uint8, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int16 @@ -90,12 +74,8 @@ def test_int16 [[-(2 ** 15), nil, (2 ** 15) - 1]], [nil], ] - iterated_records = [] target = build(:int16, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint16 @@ -103,12 +83,8 @@ def test_uint16 [[0, nil, (2 ** 16) - 1]], [nil], ] - iterated_records = [] target = build(:uint16, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int32 @@ -116,12 +92,8 @@ def test_int32 [[-(2 ** 31), nil, (2 ** 31) - 1]], [nil], ] - iterated_records = [] target = build(:int32, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint32 @@ -129,12 +101,8 @@ def test_uint32 [[0, nil, (2 ** 32) - 1]], [nil], ] - iterated_records = [] target = build(:uint32, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int64 @@ -142,12 +110,8 @@ def test_int64 [[-(2 ** 63), nil, (2 ** 63) - 1]], [nil], ] - iterated_records = [] target = build(:int64, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint64 @@ -155,12 +119,8 @@ def test_uint64 [[0, nil, (2 ** 64) - 1]], [nil], ] - iterated_records = [] target = build(:uint64, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_float @@ -168,12 +128,8 @@ def test_float [[-1.0, nil, 1.0]], [nil], ] - iterated_records = [] target = build(:float, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_double @@ -181,12 +137,8 @@ def test_double [[-1.0, nil, 1.0]], [nil], ] - iterated_records = [] target = build(:double, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_binary @@ -194,12 +146,8 @@ def test_binary [["\x00".b, nil, "\xff".b]], [nil], ] - iterated_records = [] target = build(:binary, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_string @@ -213,12 +161,8 @@ def test_string ], [nil], ] - iterated_records = [] target = build(:string, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_date32 @@ -232,12 +176,8 @@ def test_date32 ], [nil], ] - iterated_records = [] target = build(:date32, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_date64 @@ -251,12 +191,8 @@ def test_date64 ], [nil], ] - iterated_records = [] target = build(:date64, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_second @@ -270,17 +206,12 @@ def test_timestamp_second ], [nil], ] - iterated_records = [] target = build({ type: :timestamp, unit: :second, }, records) - iterated_records = [] - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_milli @@ -294,17 +225,12 @@ def test_timestamp_milli ], [nil], ] - iterated_records = [] target = build({ type: :timestamp, unit: :milli, }, records) - iterated_records = [] - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_micro @@ -318,17 +244,12 @@ def test_timestamp_micro ], [nil], ] - iterated_records = [] target = build({ type: :timestamp, unit: :micro, }, records) - iterated_records = [] - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_nano @@ -342,16 +263,12 @@ def test_timestamp_nano ], [nil], ] - iterated_records = [] target = build({ type: :timestamp, unit: :nano, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time32_second @@ -368,16 +285,12 @@ def test_time32_second ], [nil], ] - iterated_records = [] target = build({ type: :time32, unit: :second, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time32_milli @@ -394,16 +307,12 @@ def test_time32_milli ], [nil], ] - iterated_records = [] target = build({ type: :time32, unit: :milli, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time64_micro @@ -420,16 +329,12 @@ def test_time64_micro ], [nil], ] - iterated_records = [] target = build({ type: :time64, unit: :micro, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time64_nano @@ -446,16 +351,12 @@ def test_time64_nano ], [nil], ] - iterated_records = [] target = build({ type: :time64, unit: :nano, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_decimal128 @@ -469,17 +370,13 @@ def test_decimal128 ], [nil], ] - iterated_records = [] target = build({ type: :decimal128, precision: 8, scale: 2, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_decimal256 @@ -493,17 +390,13 @@ def test_decimal256 ], [nil], ] - iterated_records = [] target = build({ type: :decimal256, precision: 38, scale: 2, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_month_interval @@ -511,12 +404,8 @@ def test_month_interval [[1, nil, 12]], [nil], ] - iterated_records = [] target = build(:month_interval, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_day_time_interval @@ -530,12 +419,8 @@ def test_day_time_interval ], [nil], ] - iterated_records = [] target = build(:day_time_interval, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_month_day_nano_interval @@ -549,12 +434,8 @@ def test_month_day_nano_interval ], [nil], ] - iterated_records = [] target = build(:month_day_nano_interval, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_list @@ -574,7 +455,6 @@ def test_list ], [nil], ] - iterated_records = [] target = build({ type: :list, field: { @@ -583,10 +463,7 @@ def test_list }, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_struct @@ -600,7 +477,6 @@ def test_struct ], [nil], ] - iterated_records = [] target = build({ type: :struct, fields: [ @@ -611,10 +487,7 @@ def test_struct ], }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_map @@ -627,17 +500,13 @@ def test_map ], [nil], ] - iterated_records = [] target = build({ type: :map, key: :string, item: :boolean, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def remove_union_field_names(records) @@ -670,7 +539,6 @@ def test_sparse_union ], [nil], ] - iterated_records = [] target = build({ type: :sparse_union, fields: [ @@ -686,11 +554,8 @@ def test_sparse_union type_codes: [0, 1], }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_union_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_dense_union @@ -705,7 +570,6 @@ def test_dense_union ], [nil], ] - iterated_records = [] target = build({ type: :dense_union, fields: [ @@ -721,11 +585,8 @@ def test_dense_union type_codes: [0, 1], }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_union_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_dictionary @@ -739,7 +600,6 @@ def test_dictionary ], [nil], ] - iterated_records = [] target = build({ type: :dictionary, index_data_type: :int8, @@ -747,10 +607,7 @@ def test_dictionary ordered: false, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end end diff --git a/ruby/red-arrow/test/each-raw-record/test-map-array.rb b/ruby/red-arrow/test/each-raw-record/test-map-array.rb index b802275a7f00f..f256543a853e3 100644 --- a/ruby/red-arrow/test/each-raw-record/test-map-array.rb +++ b/ruby/red-arrow/test/each-raw-record/test-map-array.rb @@ -31,12 +31,8 @@ def test_null [{"key1" => nil}], [nil], ] - iterated_records = [] target = build(:null, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_boolean @@ -44,12 +40,8 @@ def test_boolean [{"key1" => true, "key2" => nil}], [nil], ] - iterated_records = [] target = build(:boolean, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int8 @@ -57,12 +49,8 @@ def test_int8 [{"key1" => -(2 ** 7), "key2" => nil}], [nil], ] - iterated_records = [] target = build(:int8, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint8 @@ -70,12 +58,8 @@ def test_uint8 [{"key1" => (2 ** 8) - 1, "key2" => nil}], [nil], ] - iterated_records = [] target = build(:uint8, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int16 @@ -83,12 +67,8 @@ def test_int16 [{"key1" => -(2 ** 15), "key2" => nil}], [nil], ] - iterated_records = [] target = build(:int16, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint16 @@ -96,12 +76,8 @@ def test_uint16 [{"key1" => (2 ** 16) - 1, "key2" => nil}], [nil], ] - iterated_records = [] target = build(:uint16, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int32 @@ -109,12 +85,8 @@ def test_int32 [{"key1" => -(2 ** 31), "key2" => nil}], [nil], ] - iterated_records = [] target = build(:int32, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint32 @@ -122,12 +94,8 @@ def test_uint32 [{"key1" => (2 ** 32) - 1, "key2" => nil}], [nil], ] - iterated_records = [] target = build(:uint32, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int64 @@ -135,12 +103,8 @@ def test_int64 [{"key1" => -(2 ** 63), "key2" => nil}], [nil], ] - iterated_records = [] target = build(:int64, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint64 @@ -148,12 +112,8 @@ def test_uint64 [{"key1" => (2 ** 64) - 1, "key2" => nil}], [nil], ] - iterated_records = [] target = build(:uint64, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_float @@ -161,12 +121,8 @@ def test_float [{"key1" => -1.0, "key2" => nil}], [nil], ] - iterated_records = [] target = build(:float, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_double @@ -174,12 +130,8 @@ def test_double [{"key1" => -1.0, "key2" => nil}], [nil], ] - iterated_records = [] target = build(:double, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_binary @@ -187,12 +139,8 @@ def test_binary [{"key1" => "\xff".b, "key2" => nil}], [nil], ] - iterated_records = [] target = build(:binary, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_string @@ -200,12 +148,8 @@ def test_string [{"key1" => "Ruby", "key2" => nil}], [nil], ] - iterated_records = [] target = build(:string, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_date32 @@ -213,12 +157,8 @@ def test_date32 [{"key1" => Date.new(1960, 1, 1), "key2" => nil}], [nil], ] - iterated_records = [] target = build(:date32, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_date64 @@ -226,12 +166,8 @@ def test_date64 [{"key1" => DateTime.new(1960, 1, 1, 2, 9, 30), "key2" => nil}], [nil], ] - iterated_records = [] target = build(:date64, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_second @@ -239,16 +175,12 @@ def test_timestamp_second [{"key1" => Time.parse("1960-01-01T02:09:30Z"), "key2" => nil}], [nil], ] - iterated_records = [] target = build({ type: :timestamp, unit: :second, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_milli @@ -256,16 +188,12 @@ def test_timestamp_milli [{"key1" => Time.parse("1960-01-01T02:09:30.123Z"), "key2" => nil}], [nil], ] - iterated_records = [] target = build({ type: :timestamp, unit: :milli, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_micro @@ -273,16 +201,12 @@ def test_timestamp_micro [{"key1" => Time.parse("1960-01-01T02:09:30.123456Z"), "key2" => nil}], [nil], ] - iterated_records = [] target = build({ type: :timestamp, unit: :micro, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_nano @@ -290,16 +214,12 @@ def test_timestamp_nano [{"key1" => Time.parse("1960-01-01T02:09:30.123456789Z"), "key2" => nil}], [nil], ] - iterated_records = [] target = build({ type: :timestamp, unit: :nano, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time32_second @@ -309,16 +229,12 @@ def test_time32_second [{"key1" => Arrow::Time.new(unit, 60 * 10), "key2" => nil}], [nil], ] - iterated_records = [] target = build({ type: :time32, unit: :second, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time32_milli @@ -328,16 +244,12 @@ def test_time32_milli [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123), "key2" => nil}], [nil], ] - iterated_records = [] target = build({ type: :time32, unit: :milli, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time64_micro @@ -347,16 +259,12 @@ def test_time64_micro [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456), "key2" => nil}], [nil], ] - iterated_records = [] target = build({ type: :time64, unit: :micro, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time64_nano @@ -366,16 +274,12 @@ def test_time64_nano [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), "key2" => nil}], [nil], ] - iterated_records = [] target = build({ type: :time64, unit: :nano, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_decimal128 @@ -383,17 +287,13 @@ def test_decimal128 [{"key1" => BigDecimal("92.92"), "key2" => nil}], [nil], ] - iterated_records = [] target = build({ type: :decimal128, precision: 8, scale: 2, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_decimal256 @@ -401,17 +301,13 @@ def test_decimal256 [{"key1" => BigDecimal("92.92"), "key2" => nil}], [nil], ] - iterated_records = [] target = build({ type: :decimal256, precision: 38, scale: 2, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_month_interval @@ -419,12 +315,8 @@ def test_month_interval [{"key1" => 1, "key2" => nil}], [nil], ] - iterated_records = [] target = build(:month_interval, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_day_time_interval @@ -437,12 +329,8 @@ def test_day_time_interval ], [nil], ] - iterated_records = [] target = build(:day_time_interval, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_month_day_nano_interval @@ -455,12 +343,8 @@ def test_month_day_nano_interval ], [nil], ] - iterated_records = [] target = build(:month_day_nano_interval, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_list @@ -468,7 +352,6 @@ def test_list [{"key1" => [true, nil, false], "key2" => nil}], [nil], ] - iterated_records = [] target = build({ type: :list, field: { @@ -477,10 +360,7 @@ def test_list }, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_struct @@ -488,7 +368,6 @@ def test_struct [{"key1" => {"field" => true}, "key2" => nil, "key3" => {"field" => nil}}], [nil], ] - iterated_records = [] target = build({ type: :struct, fields: [ @@ -499,10 +378,7 @@ def test_struct ], }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_map @@ -510,17 +386,13 @@ def test_map [{"key1" => {"sub_key1" => true, "sub_key2" => nil}, "key2" => nil}], [nil], ] - iterated_records = [] target = build({ type: :map, key: :string, item: :boolean, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def remove_union_field_names(records) @@ -552,7 +424,6 @@ def test_sparse_union ], [nil], ] - iterated_records = [] target = build({ type: :sparse_union, fields: [ @@ -568,11 +439,8 @@ def test_sparse_union type_codes: [0, 1], }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_union_field_names(records), - target.raw_records) + target.each_raw_record.to_a) end def test_dense_union @@ -587,7 +455,6 @@ def test_dense_union ], [nil], ] - iterated_records = [] target = build({ type: :dense_union, fields: [ @@ -603,11 +470,8 @@ def test_dense_union type_codes: [0, 1], }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_union_field_names(records), - target.raw_records) + target.each_raw_record.to_a) end def test_dictionary @@ -615,7 +479,6 @@ def test_dictionary [{"key1" => "Ruby", "key2" => nil, "key3" => "GLib"}], [nil], ] - iterated_records = [] target = build({ type: :dictionary, index_data_type: :int8, @@ -623,10 +486,7 @@ def test_dictionary ordered: false, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end end diff --git a/ruby/red-arrow/test/each-raw-record/test-multiple-columns.rb b/ruby/red-arrow/test/each-raw-record/test-multiple-columns.rb index c0547d324d26d..b222ed89afe25 100644 --- a/ruby/red-arrow/test/each-raw-record/test-multiple-columns.rb +++ b/ruby/red-arrow/test/each-raw-record/test-multiple-columns.rb @@ -22,17 +22,13 @@ def test_3_elements [nil, 0, "GLib"], [false, 2 ** 8 - 1, nil], ] - iterated_records = [] target = build([ {name: :column0, type: :boolean}, {name: :column1, type: :uint8}, {name: :column2, type: :string}, ], records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_4_elements @@ -41,7 +37,6 @@ def test_4_elements [nil, 0, "GLib", nil], [false, 2 ** 8 - 1, nil, (2 ** 63) - 1], ] - iterated_records = [] target = build([ {name: :column0, type: :boolean}, {name: :column1, type: :uint8}, @@ -49,10 +44,7 @@ def test_4_elements {name: :column3, type: :int64}, ], records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end end diff --git a/ruby/red-arrow/test/each-raw-record/test-sparse-union-array.rb b/ruby/red-arrow/test/each-raw-record/test-sparse-union-array.rb index 4b1b941fb2079..36b0884bbc91c 100644 --- a/ruby/red-arrow/test/each-raw-record/test-sparse-union-array.rb +++ b/ruby/red-arrow/test/each-raw-record/test-sparse-union-array.rb @@ -82,12 +82,8 @@ def test_null records = [ [{"0" => nil}], ] - iterated_records = [] target = build(:null, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_boolean @@ -95,12 +91,8 @@ def test_boolean [{"0" => true}], [{"1" => nil}], ] - iterated_records = [] target = build(:boolean, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_int8 @@ -108,12 +100,8 @@ def test_int8 [{"0" => -(2 ** 7)}], [{"1" => nil}], ] - iterated_records = [] target = build(:int8, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_uint8 @@ -121,12 +109,8 @@ def test_uint8 [{"0" => (2 ** 8) - 1}], [{"1" => nil}], ] - iterated_records = [] target = build(:uint8, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_int16 @@ -134,12 +118,8 @@ def test_int16 [{"0" => -(2 ** 15)}], [{"1" => nil}], ] - iterated_records = [] target = build(:int16, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_uint16 @@ -147,12 +127,8 @@ def test_uint16 [{"0" => (2 ** 16) - 1}], [{"1" => nil}], ] - iterated_records = [] target = build(:uint16, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_int32 @@ -160,12 +136,8 @@ def test_int32 [{"0" => -(2 ** 31)}], [{"1" => nil}], ] - iterated_records = [] target = build(:int32, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_uint32 @@ -173,12 +145,8 @@ def test_uint32 [{"0" => (2 ** 32) - 1}], [{"1" => nil}], ] - iterated_records = [] target = build(:uint32, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_int64 @@ -186,12 +154,8 @@ def test_int64 [{"0" => -(2 ** 63)}], [{"1" => nil}], ] - iterated_records = [] target = build(:int64, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_uint64 @@ -199,12 +163,8 @@ def test_uint64 [{"0" => (2 ** 64) - 1}], [{"1" => nil}], ] - iterated_records = [] target = build(:uint64, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_float @@ -212,12 +172,8 @@ def test_float [{"0" => -1.0}], [{"1" => nil}], ] - iterated_records = [] target = build(:float, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_double @@ -225,12 +181,8 @@ def test_double [{"0" => -1.0}], [{"1" => nil}], ] - iterated_records = [] target = build(:double, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_binary @@ -238,12 +190,8 @@ def test_binary [{"0" => "\xff".b}], [{"1" => nil}], ] - iterated_records = [] target = build(:binary, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_string @@ -251,12 +199,8 @@ def test_string [{"0" => "Ruby"}], [{"1" => nil}], ] - iterated_records = [] target = build(:string, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_date32 @@ -264,12 +208,8 @@ def test_date32 [{"0" => Date.new(1960, 1, 1)}], [{"1" => nil}], ] - iterated_records = [] target = build(:date32, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_date64 @@ -277,12 +217,8 @@ def test_date64 [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}], [{"1" => nil}], ] - iterated_records = [] target = build(:date64, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_timestamp_second @@ -290,16 +226,12 @@ def test_timestamp_second [{"0" => Time.parse("1960-01-01T02:09:30Z")}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :timestamp, unit: :second, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_timestamp_milli @@ -307,16 +239,12 @@ def test_timestamp_milli [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :timestamp, unit: :milli, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end @@ -325,16 +253,12 @@ def test_timestamp_micro [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :timestamp, unit: :micro, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_timestamp_nano @@ -342,16 +266,12 @@ def test_timestamp_nano [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :timestamp, unit: :nano, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_time32_second @@ -361,16 +281,12 @@ def test_time32_second [{"0" => Arrow::Time.new(unit, 60 * 10)}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :time32, unit: :second, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_time32_milli @@ -380,16 +296,12 @@ def test_time32_milli [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :time32, unit: :milli, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_time64_micro @@ -399,16 +311,12 @@ def test_time64_micro [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :time64, unit: :micro, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_time64_nano @@ -418,16 +326,12 @@ def test_time64_nano [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :time64, unit: :nano, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_decimal128 @@ -435,17 +339,13 @@ def test_decimal128 [{"0" => BigDecimal("92.92")}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :decimal128, precision: 8, scale: 2, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_decimal256 @@ -453,17 +353,13 @@ def test_decimal256 [{"0" => BigDecimal("92.92")}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :decimal256, precision: 38, scale: 2, }, - records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_month_interval @@ -471,12 +367,8 @@ def test_month_interval [{"0" => 1}], [{"1" => nil}], ] - iterated_records = [] target = build(:month_interval, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_day_time_interval @@ -484,12 +376,8 @@ def test_day_time_interval [{"0" => {day: 1, millisecond: 100}}], [{"1" => nil}], ] - iterated_records = [] target = build(:day_time_interval, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_month_day_nano_interval @@ -497,12 +385,8 @@ def test_month_day_nano_interval [{"0" => {month: 1, day: 1, nanosecond: 100}}], [{"1" => nil}], ] - iterated_records = [] target = build(:month_day_nano_interval, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_list @@ -510,7 +394,6 @@ def test_list [{"0" => [true, nil, false]}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :list, field: { @@ -519,10 +402,7 @@ def test_list }, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_struct @@ -531,7 +411,6 @@ def test_struct [{"1" => nil}], [{"0" => {"sub_field" => nil}}], ] - iterated_records = [] target = build({ type: :struct, fields: [ @@ -542,10 +421,7 @@ def test_struct ], }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_map @@ -553,17 +429,13 @@ def test_map [{"0" => {"key1" => true, "key2" => nil}}], [{"1" => nil}], ] - iterated_records = [] target = build({ type: :map, key: :string, item: :boolean, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(records), iterated_records) + assert_equal(remove_field_names(records), target.each_raw_record.to_a) end def test_sparse_union @@ -573,7 +445,6 @@ def test_sparse_union [{"0" => {"field2" => 29}}], [{"0" => {"field2" => nil}}], ] - iterated_records = [] target = build({ type: :sparse_union, fields: [ @@ -589,10 +460,8 @@ def test_sparse_union type_codes: [0, 1], }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(remove_field_names(records)), iterated_records) + assert_equal(remove_field_names(remove_field_names(records)), + target.each_raw_record.to_a) end def test_dense_union @@ -602,7 +471,6 @@ def test_dense_union [{"0" => {"field2" => 29}}], [{"0" => {"field2" => nil}}], ] - iterated_records = [] target = build({ type: :dense_union, fields: [ @@ -618,10 +486,8 @@ def test_dense_union type_codes: [0, 1], }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(remove_field_names(remove_field_names(records)), iterated_records) + assert_equal(remove_field_names(remove_field_names(records)), + target.each_raw_record.to_a) end def test_dictionary diff --git a/ruby/red-arrow/test/each-raw-record/test-struct-array.rb b/ruby/red-arrow/test/each-raw-record/test-struct-array.rb index de6e78aaeef4c..91a3d09af4e84 100644 --- a/ruby/red-arrow/test/each-raw-record/test-struct-array.rb +++ b/ruby/red-arrow/test/each-raw-record/test-struct-array.rb @@ -40,12 +40,8 @@ def test_null [{"field" => nil}], [nil], ] - iterated_records = [] target = build(:null, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_boolean @@ -54,12 +50,8 @@ def test_boolean [nil], [{"field" => nil}], ] - iterated_records = [] target = build(:boolean, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int8 @@ -68,12 +60,8 @@ def test_int8 [nil], [{"field" => nil}], ] - iterated_records = [] target = build(:int8, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint8 @@ -82,12 +70,8 @@ def test_uint8 [nil], [{"field" => nil}], ] - iterated_records = [] target = build(:uint8, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int16 @@ -96,12 +80,8 @@ def test_int16 [nil], [{"field" => nil}], ] - iterated_records = [] target = build(:int16, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint16 @@ -110,12 +90,8 @@ def test_uint16 [nil], [{"field" => nil}], ] - iterated_records = [] target = build(:uint16, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int32 @@ -124,12 +100,8 @@ def test_int32 [nil], [{"field" => nil}], ] - iterated_records = [] target = build(:int32, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint32 @@ -138,12 +110,8 @@ def test_uint32 [nil], [{"field" => nil}], ] - iterated_records = [] target = build(:uint32, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_int64 @@ -152,12 +120,8 @@ def test_int64 [nil], [{"field" => nil}], ] - iterated_records = [] target = build(:int64, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_uint64 @@ -166,12 +130,8 @@ def test_uint64 [nil], [{"field" => nil}], ] - iterated_records = [] target = build(:uint64, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_float @@ -180,12 +140,8 @@ def test_float [nil], [{"field" => nil}], ] - iterated_records = [] target = build(:float, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_double @@ -194,12 +150,8 @@ def test_double [nil], [{"field" => nil}], ] - iterated_records = [] target = build(:double, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_binary @@ -208,12 +160,8 @@ def test_binary [nil], [{"field" => nil}], ] - iterated_records = [] target = build(:binary, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_string @@ -222,12 +170,8 @@ def test_string [nil], [{"field" => nil}], ] - iterated_records = [] - target = build(:string, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + target = build(:string, records) + assert_equal(records, target.each_raw_record.to_a) end def test_date32 @@ -236,12 +180,8 @@ def test_date32 [nil], [{"field" => nil}], ] - iterated_records = [] target = build(:date32, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_date64 @@ -250,12 +190,8 @@ def test_date64 [nil], [{"field" => nil}], ] - iterated_records = [] target = build(:date64, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_second @@ -264,16 +200,12 @@ def test_timestamp_second [nil], [{"field" => nil}], ] - iterated_records = [] target = build({ type: :timestamp, unit: :second, }, - records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_milli @@ -282,16 +214,12 @@ def test_timestamp_milli [nil], [{"field" => nil}], ] - iterated_records = [] target = build({ type: :timestamp, unit: :milli, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_micro @@ -300,16 +228,12 @@ def test_timestamp_micro [nil], [{"field" => nil}], ] - iterated_records = [] target = build({ type: :timestamp, unit: :micro, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_timestamp_nano @@ -318,16 +242,12 @@ def test_timestamp_nano [nil], [{"field" => nil}], ] - iterated_records = [] target = build({ type: :timestamp, unit: :nano, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time32_second @@ -338,16 +258,12 @@ def test_time32_second [nil], [{"field" => nil}], ] - iterated_records = [] target = build({ type: :time32, unit: :second, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time32_milli @@ -358,16 +274,12 @@ def test_time32_milli [nil], [{"field" => nil}], ] - iterated_records = [] target = build({ type: :time32, unit: :milli, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time64_micro @@ -378,16 +290,12 @@ def test_time64_micro [nil], [{"field" => nil}], ] - iterated_records = [] target = build({ type: :time64, unit: :micro, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_time64_nano @@ -398,16 +306,12 @@ def test_time64_nano [nil], [{"field" => nil}], ] - iterated_records = [] target = build({ type: :time64, unit: :nano, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_decimal128 @@ -416,17 +320,13 @@ def test_decimal128 [nil], [{"field" => nil}], ] - iterated_records = [] target = build({ type: :decimal128, precision: 8, scale: 2, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_decimal256 @@ -435,17 +335,13 @@ def test_decimal256 [nil], [{"field" => nil}], ] - iterated_records = [] target = build({ type: :decimal256, precision: 38, scale: 2, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_month_interval @@ -454,12 +350,8 @@ def test_month_interval [nil], [{"field" => nil}], ] - iterated_records = [] target = build(:month_interval, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_day_time_interval @@ -468,12 +360,8 @@ def test_day_time_interval [nil], [{"field" => nil}], ] - iterated_records = [] target = build(:day_time_interval, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_month_day_nano_interval @@ -482,12 +370,8 @@ def test_month_day_nano_interval [nil], [{"field" => nil}], ] - iterated_records = [] target = build(:month_day_nano_interval, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_list @@ -496,7 +380,6 @@ def test_list [nil], [{"field" => nil}], ] - iterated_records = [] target = build({ type: :list, field: { @@ -505,10 +388,7 @@ def test_list }, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_struct @@ -518,7 +398,6 @@ def test_struct [{"field" => nil}], [{"field" => {"sub_field" => nil}}], ] - iterated_records = [] target = build({ type: :struct, fields: [ @@ -529,10 +408,7 @@ def test_struct ], }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def test_map @@ -541,17 +417,13 @@ def test_map [nil], [{"field" => nil}], ] - iterated_records = [] target = build({ type: :map, key: :string, item: :boolean, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end def remove_union_field_names(records) @@ -576,7 +448,6 @@ def test_sparse_union [{"field" => {"field2" => 29}}], [{"field" => {"field2" => nil}}], ] - iterated_records = [] target = build({ type: :sparse_union, fields: [ @@ -592,11 +463,8 @@ def test_sparse_union type_codes: [0, 1], }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_union_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_dense_union @@ -607,7 +475,6 @@ def test_dense_union [{"field" => {"field2" => 29}}], [{"field" => {"field2" => nil}}], ] - iterated_records = [] target = build({ type: :dense_union, fields: [ @@ -623,11 +490,8 @@ def test_dense_union type_codes: [0, 1], }, records) - target.each_raw_record do |record| - iterated_records << record - end assert_equal(remove_union_field_names(records), - iterated_records) + target.each_raw_record.to_a) end def test_dictionary @@ -637,7 +501,6 @@ def test_dictionary [{"field" => nil}], [{"field" => "GLib"}], ] - iterated_records = [] target = build({ type: :dictionary, index_data_type: :int8, @@ -645,14 +508,11 @@ def test_dictionary ordered: false, }, records) - target.each_raw_record do |record| - iterated_records << record - end - assert_equal(records, iterated_records) + assert_equal(records, target.each_raw_record.to_a) end end -class RawRecordsRecordBatchStructArrayTest < Test::Unit::TestCase +class EachRawRecordRecordBatchStructArrayTest < Test::Unit::TestCase include EachRawRecordStructArrayTests def build(type, records) @@ -660,7 +520,7 @@ def build(type, records) end end -class RawRecordsTableStructArrayTest < Test::Unit::TestCase +class EachRawRecordTableStructArrayTest < Test::Unit::TestCase include EachRawRecordStructArrayTests def build(type, records) diff --git a/ruby/red-arrow/test/each-raw-record/test-table.rb b/ruby/red-arrow/test/each-raw-record/test-table.rb index b5bd80127c8b0..96dbe576382b2 100644 --- a/ruby/red-arrow/test/each-raw-record/test-table.rb +++ b/ruby/red-arrow/test/each-raw-record/test-table.rb @@ -41,11 +41,7 @@ class EachRawRecordTableTest < Test::Unit::TestCase record_batches = raw_record_batches.collect do |record_batch| Arrow::RecordBatch.new(schema, record_batch) end - iterated_records = [] table = Arrow::Table.new(schema, record_batches) - table.each_raw_record do |record| - iterated_records << record - end - assert_equal(raw_records, iterated_records) + assert_equal(raw_records, table.each_raw_record.to_a) end end diff --git a/swift/ArrowFlight/Tests/ArrowFlightTests/FlightTest.swift b/swift/ArrowFlight/Tests/ArrowFlightTests/FlightTest.swift index d0db593b10304..3fd52af08b82f 100644 --- a/swift/ArrowFlight/Tests/ArrowFlightTests/FlightTest.swift +++ b/swift/ArrowFlight/Tests/ArrowFlightTests/FlightTest.swift @@ -225,6 +225,25 @@ public class FlightClientTester { XCTAssertEqual(num_call, 1) } + func doGetTestFlightData() async throws { + let ticket = FlightTicket("flight_ticket test".data(using: .utf8)!) + var num_call = 0 + try await client?.doGet(ticket, flightDataClosure: { flightData in + let reader = ArrowReader(); + let result = reader.fromStream(flightData.dataBody) + switch result { + case .success(let rb): + XCTAssertEqual(rb.schema?.fields.count, 3) + XCTAssertEqual(rb.batches[0].length, 4) + num_call += 1 + case .failure(let error): + throw error + } + }) + + XCTAssertEqual(num_call, 1) + } + func doPutTest() async throws { let rb = try makeRecordBatch() var num_call = 0 @@ -290,6 +309,7 @@ final class FlightTest: XCTestCase { try await clientImpl.doActionTest() try await clientImpl.getSchemaTest() try await clientImpl.doGetTest() + try await clientImpl.doGetTestFlightData() try await clientImpl.doPutTest() try await clientImpl.doExchangeTest()