Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/CurtHagenlocher/arrow into …
Browse files Browse the repository at this point in the history
…Duration
  • Loading branch information
CurtHagenlocher committed Oct 5, 2023
2 parents 441a710 + 1cad7a7 commit cc85740
Show file tree
Hide file tree
Showing 283 changed files with 12,958 additions and 5,981 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ jobs:
restore-keys: ${{ matrix.image }}-
- name: Setup Python
run: |
sudo apt install -y --no-install-recommends python3 python3-pip
sudo apt update
sudo apt install -y --no-install-recommends python3 python3-dev python3-pip
- name: Setup Archery
run: python3 -m pip install -e dev/archery[docker]
- name: Execute Docker Build
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ jobs:
submodules: recursive
- name: Setup Python
run: |
sudo apt install -y --no-install-recommends python3 python3-pip
sudo apt update
sudo apt install -y --no-install-recommends python3 python3-dev python3-pip
- name: Setup Archery
run: python3 -m pip install -e dev/archery[docker]
- name: Execute Docker Build
Expand Down
6 changes: 0 additions & 6 deletions .github/workflows/java_nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,6 @@ jobs:
fi
echo $PREFIX
archery crossbow download-artifacts -f java-jars -t binaries $PREFIX
- name: Cache Repo
uses: actions/cache@v3
with:
path: repo
key: java-nightly-${{ github.run_id }}
restore-keys: java-nightly
- name: Sync from Remote
uses: ./arrow/.github/actions/sync-nightlies
with:
Expand Down
13 changes: 13 additions & 0 deletions ci/appveyor-cpp-build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,19 @@ set ARROW_HOME=%CONDA_PREFIX%\Library
@rem ARROW-3075; pkgconfig is broken for Parquet for now
set PARQUET_HOME=%CONDA_PREFIX%\Library

@rem Download IANA Timezone Database to a non-standard location to
@rem test the configurability of the timezone database path
curl https://data.iana.org/time-zones/releases/tzdata2021e.tar.gz --output tzdata.tar.gz || exit /B
mkdir %USERPROFILE%\Downloads\test\tzdata
tar --extract --file tzdata.tar.gz --directory %USERPROFILE%\Downloads\test\tzdata
curl https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml ^
--output %USERPROFILE%\Downloads\test\tzdata\windowsZones.xml || exit /B
@rem Remove the database from the default location
rmdir /s /q %USERPROFILE%\Downloads\tzdata
@rem Set the env var for the non-standard location of the database
@rem (only needed for testing purposes)
set PYARROW_TZDATA_PATH=%USERPROFILE%\Downloads\test\tzdata

python setup.py develop -q || exit /B

set PYTHONDEVMODE=1
Expand Down
2 changes: 1 addition & 1 deletion ci/conda_env_sphinx.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ breathe
doxygen
ipython
numpydoc
pydata-sphinx-theme==0.8
pydata-sphinx-theme
sphinx-autobuild
sphinx-design
sphinx-copybutton
Expand Down
4 changes: 3 additions & 1 deletion ci/docker/conda-integration.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@ ARG go=1.19.13
# Install Archery and integration dependencies
COPY ci/conda_env_archery.txt /arrow/ci/

# Pin Python until pythonnet is made compatible with 3.12
# (https://github.com/pythonnet/pythonnet/pull/2249)
RUN mamba install -q -y \
--file arrow/ci/conda_env_archery.txt \
"python>=3.7" \
"python < 3.12" \
numpy \
compilers \
maven=${maven} \
Expand Down
4 changes: 2 additions & 2 deletions ci/scripts/install_pandas.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pandas=$1
numpy=${2:-"latest"}

if [ "${numpy}" = "nightly" ]; then
pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre numpy
pip install --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple --pre numpy
elif [ "${numpy}" = "latest" ]; then
pip install numpy
else
Expand All @@ -38,7 +38,7 @@ fi
if [ "${pandas}" = "upstream_devel" ]; then
pip install git+https://github.com/pandas-dev/pandas.git
elif [ "${pandas}" = "nightly" ]; then
pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre pandas
pip install --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple --pre pandas
elif [ "${pandas}" = "latest" ]; then
pip install pandas
else
Expand Down
5 changes: 5 additions & 0 deletions ci/scripts/integration_arrow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ arrow_dir=${1}
gold_dir=$arrow_dir/testing/data/arrow-ipc-stream/integration

pip install -e $arrow_dir/dev/archery[integration]
# For C# C Data Interface testing
pip install pythonnet

# Get more detailed context on crashes
export PYTHONFAULTHANDLER=1

# Rust can be enabled by exporting ARCHERY_INTEGRATION_WITH_RUST=1
time archery integration \
Expand Down
12 changes: 7 additions & 5 deletions ci/scripts/js_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@ yarn lint:ci
yarn build

if [ "${BUILD_DOCS_JS}" == "ON" ]; then
if [ "$(git config --get remote.origin.url)" == "https://github.com/apache/arrow.git" ]; then
yarn doc
elif [ "$(git config --get remote.upstream.url)" == "https://github.com/apache/arrow.git" ]; then
yarn doc --gitRemote upstream
elif [ "$(git config --get remote.apache.url)" == "[email protected]:apache/arrow.git" ]; then
# If apache or upstream are defined use those as remote.
# Otherwise use origin which could be a fork on PRs.
if [ "$(git config --get remote.apache.url)" == "[email protected]:apache/arrow.git" ]; then
yarn doc --gitRemote apache
elif [[ "$(git config --get remote.upstream.url)" =~ "https://github.com/apache/arrow" ]]; then
yarn doc --gitRemote upstream
elif [[ "$(basename -s .git $(git config --get remote.origin.url))" == "arrow" ]]; then
yarn doc
else
echo "Failed to build docs because the remote is not set correctly. Please set the origin or upstream remote to https://github.com/apache/arrow.git or the apache remote to [email protected]:apache/arrow.git."
exit 0
Expand Down
6 changes: 6 additions & 0 deletions cpp/Brewfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ brew "aws-sdk-cpp"
brew "bash"
brew "boost"
brew "brotli"
brew "bzip2"
brew "c-ares"
brew "curl"
brew "ccache"
brew "cmake"
brew "flatbuffers"
Expand All @@ -29,14 +31,18 @@ brew "googletest"
brew "grpc"
brew "llvm@14"
brew "lz4"
brew "mimalloc"
brew "ninja"
brew "node"
brew "openssl@3"
brew "pkg-config"
brew "protobuf"
brew "python"
brew "rapidjson"
brew "re2"
brew "snappy"
brew "thrift"
brew "utf8proc"
brew "wget"
brew "xsimd"
brew "zstd"
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ set(ARROW_DOC_DIR "share/doc/${PROJECT_NAME}")
set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support")

set(ARROW_LLVM_VERSIONS
"17.0"
"16.0"
"15.0"
"14.0"
Expand Down
24 changes: 14 additions & 10 deletions cpp/cmake_modules/FindLLVMAlt.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,20 @@ if(LLVM_FOUND)
target_link_libraries(LLVM::LLVM_LIBS INTERFACE LLVM)
else()
# Find the libraries that correspond to the LLVM components
llvm_map_components_to_libnames(LLVM_LIBS
core
mcjit
native
ipo
bitreader
target
linker
analysis
debuginfodwarf)
set(LLVM_TARGET_COMPONENTS
analysis
bitreader
core
debuginfodwarf
ipo
linker
mcjit
native
target)
if(LLVM_VERSION_MAJOR GREATER_EQUAL 14)
list(APPEND LLVM_TARGET_COMPONENTS passes)
endif()
llvm_map_components_to_libnames(LLVM_LIBS ${LLVM_TARGET_COMPONENTS})
target_link_libraries(LLVM::LLVM_LIBS INTERFACE ${LLVM_LIBS})

if(TARGET LLVMSupport AND NOT ARROW_ZSTD_USE_SHARED)
Expand Down
15 changes: 11 additions & 4 deletions cpp/cmake_modules/SetupCxxFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -456,11 +456,18 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STRE
# Don't complain about optimization passes that were not possible
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-pass-failed")

# Avoid clang / libc++ error about C++17 aligned allocation on macOS.
# See https://chromium.googlesource.com/chromium/src/+/eee44569858fc650b635779c4e34be5cb0c73186%5E%21/#F0
# for details.
if(APPLE)
set(CXX_ONLY_FLAGS "${CXX_ONLY_FLAGS} -fno-aligned-new")
# Avoid clang / libc++ error about C++17 aligned allocation on macOS.
# See https://chromium.googlesource.com/chromium/src/+/eee44569858fc650b635779c4e34be5cb0c73186%5E%21/#F0
# for details.
string(APPEND CXX_ONLY_FLAGS " -fno-aligned-new")

if(CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20)
# Avoid C++17 std::get 'not available' issue on macOS 10.13
# This will be required until atleast R 4.4 is released and
# CRAN (hopefully) stops checking on 10.13
string(APPEND CXX_ONLY_FLAGS " -D_LIBCPP_DISABLE_AVAILABILITY")
endif()
endif()
endif()

Expand Down
21 changes: 21 additions & 0 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1308,13 +1308,34 @@ macro(build_snappy)
set(SNAPPY_CMAKE_ARGS
${EP_COMMON_CMAKE_ARGS} -DSNAPPY_BUILD_TESTS=OFF -DSNAPPY_BUILD_BENCHMARKS=OFF
"-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}")
# Snappy unconditionaly enables Werror when building with clang this can lead
# to build failues by way of new compiler warnings. This adds a flag to disable
# Werror to the very end of the invocation to override the snappy internal setting.
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
foreach(CONFIG DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
list(APPEND
SNAPPY_CMAKE_ARGS
"-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS_${CONFIG}} -Wno-error"
)
endforeach()
endif()

if(APPLE AND CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20)
# On macOS 10.13 we need to explicitly add <functional> to avoid a missing include error
# This can be removed once CRAN no longer checks on macOS 10.13
find_program(PATCH patch REQUIRED)
set(SNAPPY_PATCH_COMMAND ${PATCH} -p1 -i ${CMAKE_CURRENT_LIST_DIR}/snappy.diff)
else()
set(SNAPPY_PATCH_COMMAND)
endif()

externalproject_add(snappy_ep
${EP_COMMON_OPTIONS}
BUILD_IN_SOURCE 1
INSTALL_DIR ${SNAPPY_PREFIX}
URL ${SNAPPY_SOURCE_URL}
URL_HASH "SHA256=${ARROW_SNAPPY_BUILD_SHA256_CHECKSUM}"
PATCH_COMMAND ${SNAPPY_PATCH_COMMAND}
CMAKE_ARGS ${SNAPPY_CMAKE_ARGS}
BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}")

Expand Down
12 changes: 12 additions & 0 deletions cpp/cmake_modules/snappy.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
diff --git a/snappy.cc b/snappy.cc
index d414718..5b0d0d6 100644
--- a/snappy.cc
+++ b/snappy.cc
@@ -83,6 +83,7 @@
#include <string>
#include <utility>
#include <vector>
+#include <functional>

namespace snappy {

5 changes: 5 additions & 0 deletions cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,12 @@ static std::vector<std::shared_ptr<DataType>> TestArrayUtilitiesAgainstTheseType
large_utf8(),
list(utf8()),
list(int64()), // NOTE: Regression case for ARROW-9071/MakeArrayOfNull
list(large_utf8()),
list(list(int64())),
list(list(large_utf8())),
large_list(utf8()),
large_list(large_utf8()),
large_list(list(large_utf8())),
fixed_size_list(utf8(), 3),
fixed_size_list(int64(), 4),
dictionary(int32(), utf8()),
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/arrow/array/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -366,9 +366,11 @@ class NullArrayFactory {
}

template <typename T>
enable_if_var_size_list<T, Status> Visit(const T&) {
enable_if_var_size_list<T, Status> Visit(const T& type) {
// values array may be empty, but there must be at least one offset of 0
return MaxOf(sizeof(typename T::offset_type) * (length_ + 1));
RETURN_NOT_OK(MaxOf(sizeof(typename T::offset_type) * (length_ + 1)));
RETURN_NOT_OK(MaxOf(GetBufferLength(type.value_type(), length_)));
return Status::OK();
}

template <typename T>
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/arrow/array/validate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -713,8 +713,10 @@ struct ValidateArrayImpl {
}

// An empty list array can have 0 offsets
const auto required_offsets = (data.length > 0) ? data.length + data.offset + 1 : 0;
const auto offsets_byte_size = data.buffers[1]->size();
const auto required_offsets = ((data.length > 0) || (offsets_byte_size > 0))
? data.length + data.offset + 1
: 0;
if (offsets_byte_size / static_cast<int32_t>(sizeof(offset_type)) <
required_offsets) {
return Status::Invalid("Offsets buffer size (bytes): ", offsets_byte_size,
Expand Down
49 changes: 40 additions & 9 deletions cpp/src/arrow/compute/kernels/codegen_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ TypeHolder CommonTemporal(const TypeHolder* begin, size_t count) {
bool saw_date32 = false;
bool saw_date64 = false;
bool saw_duration = false;
bool saw_time_since_midnight = false;
const TypeHolder* end = begin + count;
for (auto it = begin; it != end; it++) {
auto id = it->type->id();
Expand All @@ -271,6 +272,18 @@ TypeHolder CommonTemporal(const TypeHolder* begin, size_t count) {
finest_unit = std::max(finest_unit, ty.unit());
continue;
}
case Type::TIME32: {
const auto& type = checked_cast<const Time32Type&>(*it->type);
finest_unit = std::max(finest_unit, type.unit());
saw_time_since_midnight = true;
continue;
}
case Type::TIME64: {
const auto& type = checked_cast<const Time64Type&>(*it->type);
finest_unit = std::max(finest_unit, type.unit());
saw_time_since_midnight = true;
continue;
}
case Type::DURATION: {
const auto& ty = checked_cast<const DurationType&>(*it->type);
finest_unit = std::max(finest_unit, ty.unit());
Expand All @@ -282,15 +295,33 @@ TypeHolder CommonTemporal(const TypeHolder* begin, size_t count) {
}
}

if (timezone) {
// At least one timestamp seen
return timestamp(finest_unit, *timezone);
} else if (saw_date64) {
return date64();
} else if (saw_date32) {
return date32();
} else if (saw_duration) {
return duration(finest_unit);
bool saw_timestamp_or_date = timezone || saw_date64 || saw_date32 || saw_duration;

if (saw_time_since_midnight && saw_timestamp_or_date) {
// Cannot find common type
return TypeHolder(nullptr);
}
if (saw_timestamp_or_date) {
if (timezone) {
// At least one timestamp seen
return timestamp(finest_unit, *timezone);
} else if (saw_date64) {
return date64();
} else if (saw_date32) {
return date32();
} else if (saw_duration) {
return duration(finest_unit);
}
}
if (saw_time_since_midnight) {
switch (finest_unit) {
case TimeUnit::SECOND:
case TimeUnit::MILLI:
return time32(finest_unit);
case TimeUnit::MICRO:
case TimeUnit::NANO:
return time64(finest_unit);
}
}
return TypeHolder(nullptr);
}
Expand Down
12 changes: 12 additions & 0 deletions cpp/src/arrow/compute/kernels/codegen_internal_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,18 @@ TEST(TestDispatchBest, CommonTemporal) {
args = {timestamp(TimeUnit::SECOND, "America/Phoenix"),
timestamp(TimeUnit::SECOND, "UTC")};
ASSERT_EQ(CommonTemporal(args.data(), args.size()), nullptr);

args = {time32(TimeUnit::SECOND), time32(TimeUnit::MILLI)};
AssertTypeEqual(*time32(TimeUnit::MILLI), *CommonTemporal(args.data(), args.size()));

args = {time32(TimeUnit::SECOND), time64(TimeUnit::NANO)};
AssertTypeEqual(*time64(TimeUnit::NANO), *CommonTemporal(args.data(), args.size()));

args = {date32(), time32(TimeUnit::SECOND)};
ASSERT_EQ(CommonTemporal(args.data(), args.size()), nullptr);

args = {timestamp(TimeUnit::SECOND), time32(TimeUnit::SECOND)};
ASSERT_EQ(CommonTemporal(args.data(), args.size()), nullptr);
}

TEST(TestDispatchBest, CommonTemporalResolution) {
Expand Down
Loading

0 comments on commit cc85740

Please sign in to comment.