Skip to content

Commit

Permalink
Merge branch 'main' into bump-lz4-to-latest
Browse files Browse the repository at this point in the history
  • Loading branch information
mapleFU committed Aug 6, 2024
2 parents 544537d + 51d50d7 commit 57f5c60
Show file tree
Hide file tree
Showing 200 changed files with 6,929 additions and 1,890 deletions.
12 changes: 6 additions & 6 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
/matlab/ @kevingurney @kou @sgilmore10
/python/pyarrow/_flight.pyx @lidavidm
/python/pyarrow/**/*gandiva* @wjones127
/r/ @thisisnic
/r/ @jonkeane @thisisnic
/ruby/ @kou
/swift/ @kou

Expand All @@ -53,19 +53,19 @@
# *.txt

# PR CI and repository files
/.github/ @assignUser @kou @raulcd
/.github/ @assignUser @jonkeane @kou @raulcd
.asf.yaml @assignUser @kou @raulcd
.pre-commit-config.yaml @raulcd
.travis.yml @assignUser @kou @raulcd
appveyor.yml @assignUser @kou @raulcd
# .git*

# release scripts, archery etc.
/ci/ @assignUser @kou @raulcd
/dev/ @assignUser @kou @raulcd
/ci/ @assignUser @jonkeane @kou @raulcd
/dev/ @assignUser @jonkeane @kou @raulcd
.dockerignore @raulcd
.env @assignUser @kou @raulcd
docker-compose.yml @assignUser @kou @raulcd
.env @assignUser @jonkeane @kou @raulcd
docker-compose.yml @assignUser @jonkeane @kou @raulcd

# R specific packaging tooling
/r/configure* @assignUser
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ jobs:
- name: Run minimal example
run: |
cd cpp/examples/minimal_build
docker-compose run --rm minimal
docker compose run --rm minimal
macos:
name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} C++
Expand Down
28 changes: 28 additions & 0 deletions ci/docker/conda-python-cpython-debug.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

ARG repo
ARG arch
ARG python=3.8
FROM ${repo}:${arch}-conda-python-${python}

# (Docker oddity: ARG needs to be repeated after FROM)
ARG python=3.8
RUN mamba install -y "conda-forge/label/python_debug::python=${python}[build=*_cpython]" && \
mamba clean --all
# Quick check that we do have a debug mode CPython
RUN python -c "import sys; sys.gettotalrefcount()"
104 changes: 104 additions & 0 deletions ci/docker/ubuntu-24.04-cpp-minimal.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

ARG base=amd64/ubuntu:24.04
FROM ${base}

SHELL ["/bin/bash", "-o", "pipefail", "-c"]

RUN echo "debconf debconf/frontend select Noninteractive" | \
debconf-set-selections

RUN apt-get update -y -q && \
apt-get install -y -q \
build-essential \
ccache \
cmake \
curl \
git \
libssl-dev \
libcurl4-openssl-dev \
python3-pip \
tzdata \
tzdata-legacy \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists*

# Installs LLVM toolchain, for Gandiva and testing other compilers
#
# Note that this is installed before the base packages to improve iteration
# while debugging package list with docker build.
ARG llvm
RUN latest_system_llvm=14 && \
if [ ${llvm} -gt ${latest_system_llvm} ]; then \
apt-get update -y -q && \
apt-get install -y -q --no-install-recommends \
apt-transport-https \
ca-certificates \
gnupg \
lsb-release \
wget && \
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
code_name=$(lsb_release --codename --short) && \
if [ ${llvm} -gt 10 ]; then \
echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${llvm} main" > \
/etc/apt/sources.list.d/llvm.list; \
fi; \
fi && \
apt-get update -y -q && \
apt-get install -y -q --no-install-recommends \
clang-${llvm} \
llvm-${llvm}-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists*

COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_minio.sh latest /usr/local

COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

ENV ARROW_ACERO=ON \
ARROW_AZURE=OFF \
ARROW_BUILD_TESTS=ON \
ARROW_DATASET=ON \
ARROW_FLIGHT=ON \
ARROW_GANDIVA=ON \
ARROW_GCS=ON \
ARROW_HDFS=ON \
ARROW_HOME=/usr/local \
ARROW_INSTALL_NAME_RPATH=OFF \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
ARROW_USE_CCACHE=ON \
ARROW_WITH_BROTLI=ON \
ARROW_WITH_BZ2=ON \
ARROW_WITH_LZ4=ON \
ARROW_WITH_OPENTELEMETRY=OFF \
ARROW_WITH_SNAPPY=ON \
ARROW_WITH_ZLIB=ON \
ARROW_WITH_ZSTD=ON \
CMAKE_GENERATOR="Unix Makefiles" \
PARQUET_BUILD_EXAMPLES=ON \
PARQUET_BUILD_EXECUTABLES=ON \
PATH=/usr/lib/ccache/:$PATH \
PYTHON=python3
5 changes: 5 additions & 0 deletions cpp/cmake_modules/BuildUtils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,11 @@ function(ADD_TEST_CASE REL_TEST_NAME)
"${EXECUTABLE_OUTPUT_PATH};$ENV{CONDA_PREFIX}/lib")
endif()

# Ensure using bundled GoogleTest when we use bundled GoogleTest.
# ARROW_GTEST_GTEST_HEADERS is defined only when we use bundled
# GoogleTest.
target_link_libraries(${TEST_NAME} PRIVATE ${ARROW_GTEST_GTEST_HEADERS})

if(ARG_STATIC_LINK_LIBS)
# Customize link libraries
target_link_libraries(${TEST_NAME} PRIVATE ${ARG_STATIC_LINK_LIBS})
Expand Down
12 changes: 9 additions & 3 deletions cpp/cmake_modules/Findlz4Alt.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,15 @@ endif()
find_package(lz4 ${find_package_args})
if(lz4_FOUND)
set(lz4Alt_FOUND TRUE)
# Conan uses lz4::lz4 not LZ4::lz4
if(NOT TARGET LZ4::lz4 AND TARGET lz4::lz4)
add_library(LZ4::lz4 ALIAS lz4::lz4)
if(NOT TARGET LZ4::lz4)
# Conan uses lz4::lz4 not LZ4::lz4
if(TARGET lz4::lz4)
add_library(LZ4::lz4 ALIAS lz4::lz4)
elseif(ARROW_LZ4_USE_SHARED)
add_library(LZ4::lz4 ALIAS LZ4::lz4_shared)
else()
add_library(LZ4::lz4 ALIAS LZ4::lz4_static)
endif()
endif()
return()
endif()
Expand Down
15 changes: 14 additions & 1 deletion cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2306,6 +2306,10 @@ function(build_gtest)
install(DIRECTORY "${googletest_SOURCE_DIR}/googlemock/include/"
"${googletest_SOURCE_DIR}/googletest/include/"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
add_library(arrow::GTest::gtest_headers INTERFACE IMPORTED)
target_include_directories(arrow::GTest::gtest_headers
INTERFACE "${googletest_SOURCE_DIR}/googlemock/include/"
"${googletest_SOURCE_DIR}/googletest/include/")
install(TARGETS gmock gmock_main gtest gtest_main
EXPORT arrow_testing_targets
RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
Expand Down Expand Up @@ -2350,12 +2354,14 @@ if(ARROW_TESTING)

string(APPEND ARROW_TESTING_PC_LIBS " $<TARGET_FILE:GTest::gtest>")
endif()
set(ARROW_GTEST_GTEST_HEADERS)
set(ARROW_GTEST_GMOCK GTest::gmock)
set(ARROW_GTEST_GTEST GTest::gtest)
set(ARROW_GTEST_GTEST_MAIN GTest::gtest_main)
else()
string(APPEND ARROW_TESTING_PC_CFLAGS " -I\${includedir}/arrow-gtest")
string(APPEND ARROW_TESTING_PC_LIBS " -larrow_gtest")
set(ARROW_GTEST_GTEST_HEADERS arrow::GTest::gtest_headers)
set(ARROW_GTEST_GMOCK arrow::GTest::gmock)
set(ARROW_GTEST_GTEST arrow::GTest::gtest)
set(ARROW_GTEST_GTEST_MAIN arrow::GTest::gtest_main)
Expand Down Expand Up @@ -2882,6 +2888,10 @@ macro(build_absl)
set(ABSL_INCLUDE_DIR "${ABSL_PREFIX}/include")
set(ABSL_CMAKE_ARGS "${EP_COMMON_CMAKE_ARGS}" -DABSL_RUN_TESTS=OFF
"-DCMAKE_INSTALL_PREFIX=${ABSL_PREFIX}")
if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0)
set(ABSL_CXX_FLAGS "${EP_CXX_FLAGS} -include stdint.h")
list(APPEND ABSL_CMAKE_ARGS "-DCMAKE_CXX_FLAGS=${ABSL_CXX_FLAGS}")
endif()
set(ABSL_BUILD_BYPRODUCTS)
set(ABSL_LIBRARIES)

Expand Down Expand Up @@ -4506,9 +4516,12 @@ function(build_orc)
OFF
CACHE BOOL "" FORCE)
get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
if(NOT LZ4_INCLUDE_DIR)
find_path(LZ4_INCLUDE_DIR NAMES lz4.h)
endif()
get_filename_component(LZ4_ROOT "${LZ4_INCLUDE_DIR}" DIRECTORY)
set(LZ4_HOME
${LZ4_ROOT}
"${LZ4_ROOT}"
CACHE STRING "" FORCE)
set(LZ4_LIBRARY
LZ4::lz4
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ arrow_add_object_library(ARROW_ARRAY
array/concatenate.cc
array/data.cc
array/diff.cc
array/statistics.cc
array/util.cc
array/validate.cc)

Expand Down Expand Up @@ -1168,6 +1169,7 @@ add_arrow_test(array_test
array/array_struct_test.cc
array/array_union_test.cc
array/array_view_test.cc
array/statistics_test.cc
PRECOMPILED_HEADERS
"$<$<COMPILE_LANGUAGE:CXX>:arrow/testing/pch.h>")

Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/array/builder_binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -500,9 +500,9 @@ class ARROW_EXPORT StringHeapBuilder {
ARROW_RETURN_NOT_OK(Reserve(length));
}

auto v =
util::ToBinaryView(value, static_cast<int32_t>(length),
static_cast<int32_t>(blocks_.size() - 1), current_offset_);
auto v = util::ToNonInlineBinaryView(value, static_cast<int32_t>(length),
static_cast<int32_t>(blocks_.size() - 1),
current_offset_);

memcpy(current_out_buffer_, value, static_cast<size_t>(length));
current_out_buffer_ += length;
Expand Down
21 changes: 21 additions & 0 deletions cpp/src/arrow/array/statistics.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

// This empty .cc file is for embedding not inlined symbols in
// arrow::ArrayStatistics into libarrow.

#include "arrow/array/statistics.h"
76 changes: 76 additions & 0 deletions cpp/src/arrow/array/statistics.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <cstdint>
#include <optional>
#include <string>
#include <string_view>
#include <variant>

#include "arrow/util/float16.h"
#include "arrow/util/visibility.h"

namespace arrow {

/// \brief Statistics for an Array
///
/// Apache Arrow format doesn't have statistics but data source such
/// as Apache Parquet may have statistics. Statistics associated with
/// data source can be read unified API via this class.
struct ARROW_EXPORT ArrayStatistics {
using ValueType =
std::variant<bool, int8_t, uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t,
uint64_t, util::Float16, float, double, std::string, std::string_view>;

ArrayStatistics() = default;
~ArrayStatistics() = default;

/// \brief The number of null values, may not be set
std::optional<int64_t> null_count = std::nullopt;

/// \brief The number of distinct values, may not be set
std::optional<int64_t> distinct_count = std::nullopt;

/// \brief The minimum value, may not be set
std::optional<ValueType> min = std::nullopt;

/// \brief Whether the minimum value is exact or not, may not be set
std::optional<bool> is_min_exact = std::nullopt;

/// \brief The maximum value, may not be set
std::optional<ValueType> max = std::nullopt;

/// \brief Whether the maximum value is exact or not, may not be set
std::optional<bool> is_max_exact = std::nullopt;

/// \brief Check two statistics for equality
bool Equals(const ArrayStatistics& other) const {
return null_count == other.null_count && distinct_count == other.distinct_count &&
min == other.min && is_min_exact == other.is_min_exact && max == other.max &&
is_max_exact == other.is_max_exact;
}

/// \brief Check two statistics for equality
bool operator==(const ArrayStatistics& other) const { return Equals(other); }

/// \brief Check two statistics for not equality
bool operator!=(const ArrayStatistics& other) const { return !Equals(other); }
};

} // namespace arrow
Loading

0 comments on commit 57f5c60

Please sign in to comment.