Skip to content

Commit

Permalink
Merge branch 'Union' of https://github.com/CurtHagenlocher/arrow into…
Browse files Browse the repository at this point in the history
… Union
  • Loading branch information
CurtHagenlocher committed Sep 23, 2023
2 parents 143a469 + ddac4d3 commit 3ef09de
Show file tree
Hide file tree
Showing 141 changed files with 5,145 additions and 749 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/matlab.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ jobs:
run: sudo apt-get install ninja-build
- name: Install MATLAB
uses: matlab-actions/setup-matlab@v1
with:
release: R2023a
- name: Install ccache
run: sudo apt-get install ccache
- name: Setup ccache
Expand Down Expand Up @@ -99,6 +101,8 @@ jobs:
run: brew install ninja
- name: Install MATLAB
uses: matlab-actions/setup-matlab@v1
with:
release: R2023a
- name: Install ccache
run: brew install ccache
- name: Setup ccache
Expand Down Expand Up @@ -135,6 +139,8 @@ jobs:
fetch-depth: 0
- name: Install MATLAB
uses: matlab-actions/setup-matlab@v1
with:
release: R2023a
- name: Download Timezone Database
shell: bash
run: ci/scripts/download_tz_database.sh
Expand Down
17 changes: 8 additions & 9 deletions c_glib/arrow-glib/compute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3346,7 +3346,7 @@ garrow_set_lookup_options_get_property(GObject *object,
g_value_set_object(value, priv->value_set);
break;
case PROP_SET_LOOKUP_OPTIONS_SKIP_NULLS:
g_value_set_boolean(value, options->skip_nulls);
g_value_set_boolean(value, options->skip_nulls.has_value() && options->skip_nulls.value());
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
Expand Down Expand Up @@ -3398,13 +3398,11 @@ garrow_set_lookup_options_class_init(GArrowSetLookupOptionsClass *klass)
*
* Since: 6.0.0
*/
spec = g_param_spec_boolean("skip-nulls",
"Skip NULLs",
"Whether NULLs are skipped or not",
options.skip_nulls,
static_cast<GParamFlags>(G_PARAM_READWRITE));
g_object_class_install_property(gobject_class,
PROP_SET_LOOKUP_OPTIONS_SKIP_NULLS,
auto skip_nulls = (options.skip_nulls.has_value() && options.skip_nulls.value());
spec =
g_param_spec_boolean("skip-nulls", "Skip NULLs", "Whether NULLs are skipped or not",
skip_nulls, static_cast<GParamFlags>(G_PARAM_READWRITE));
g_object_class_install_property(gobject_class, PROP_SET_LOOKUP_OPTIONS_SKIP_NULLS,
spec);
}

Expand Down Expand Up @@ -6458,9 +6456,10 @@ garrow_set_lookup_options_new_raw(
arrow_copied_options.get());
auto value_set =
garrow_datum_new_raw(&(arrow_copied_set_lookup_options->value_set));
auto skip_nulls = (arrow_options->skip_nulls.has_value() && arrow_options->skip_nulls.value());
auto options = g_object_new(GARROW_TYPE_SET_LOOKUP_OPTIONS,
"value-set", value_set,
"skip-nulls", arrow_options->skip_nulls,
"skip-nulls", skip_nulls,
NULL);
return GARROW_SET_LOOKUP_OPTIONS(options);
}
Expand Down
2 changes: 1 addition & 1 deletion ci/conda_env_archery.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jira
pygit2
pygithub
ruamel.yaml
setuptools_scm
setuptools_scm<8.0.0
toolz

# benchmark
Expand Down
2 changes: 1 addition & 1 deletion ci/conda_env_crossbow.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@ jinja2
jira
pygit2
ruamel.yaml
setuptools_scm
setuptools_scm<8.0.0
toolz
2 changes: 1 addition & 1 deletion ci/conda_env_python.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ pytest-faulthandler
pytest-lazy-fixture
s3fs>=2021.8.0
setuptools
setuptools_scm
setuptools_scm<8.0.0
24 changes: 24 additions & 0 deletions ci/docker/conda-python-cython2.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

ARG repo
ARG arch
ARG python=3.8
FROM ${repo}:${arch}-conda-python-${python}

RUN mamba install -q -y "cython<3" && \
mamba clean --all
6 changes: 4 additions & 2 deletions ci/scripts/integration_arrow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@ set -ex
arrow_dir=${1}
gold_dir=$arrow_dir/testing/data/arrow-ipc-stream/integration

pip install -e $arrow_dir/dev/archery
pip install -e $arrow_dir/dev/archery[integration]

# Rust can be enabled by exporting ARCHERY_INTEGRATION_WITH_RUST=1
archery integration \
time archery integration \
--run-c-data \
--run-ipc \
--run-flight \
--with-cpp=1 \
--with-csharp=1 \
Expand Down
2 changes: 0 additions & 2 deletions ci/scripts/matlab_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ cmake \
-S ${source_dir} \
-B ${build_dir} \
-G Ninja \
-D MATLAB_BUILD_TESTS=ON \
-D CMAKE_INSTALL_PREFIX=${install_dir} \
-D MATLAB_ADD_INSTALL_DIR_TO_SEARCH_PATH=OFF
cmake --build ${build_dir} --config Release --target install
ctest --test-dir ${build_dir}
2 changes: 1 addition & 1 deletion cpp/cmake_modules/BuildUtils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ function(arrow_create_merged_static_lib output_target)
if(APPLE)
set(BUNDLE_COMMAND "libtool" "-no_warning_for_no_symbols" "-static" "-o"
${output_lib_path} ${all_library_paths})
elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Clang|GNU|Intel)$")
elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Clang|GNU|Intel|IntelLLVM)$")
set(ar_script_path ${CMAKE_BINARY_DIR}/${ARG_NAME}.ar)

file(WRITE ${ar_script_path}.in "CREATE ${output_lib_path}\n")
Expand Down
9 changes: 6 additions & 3 deletions cpp/cmake_modules/SetupCxxFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,8 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-sign-conversion")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wunused-result")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wdate-time")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL
"IntelLLVM")
if(WIN32)
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wall")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wno-deprecated")
Expand Down Expand Up @@ -360,7 +361,8 @@ elseif("${BUILD_WARNING_LEVEL}" STREQUAL "EVERYTHING")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wextra")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-parameter")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wunused-result")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL
"IntelLLVM")
if(WIN32)
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wall")
else()
Expand All @@ -383,7 +385,8 @@ else()
OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang"
OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL
"IntelLLVM")
if(WIN32)
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /Wall")
else()
Expand Down
6 changes: 5 additions & 1 deletion cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,11 @@ endif()
#

if(ARROW_BUILD_INTEGRATION OR ARROW_BUILD_TESTS)
list(APPEND ARROW_SRCS integration/json_integration.cc integration/json_internal.cc)
list(APPEND
ARROW_SRCS
integration/c_data_integration_internal.cc
integration/json_integration.cc
integration/json_internal.cc)
endif()

if(ARROW_CSV)
Expand Down
12 changes: 6 additions & 6 deletions cpp/src/arrow/array/array_dict.cc
Original file line number Diff line number Diff line change
Expand Up @@ -282,9 +282,9 @@ class DictionaryUnifierImpl : public DictionaryUnifier {
*out_type = arrow::dictionary(index_type, value_type_);

// Build unified dictionary array
std::shared_ptr<ArrayData> data;
RETURN_NOT_OK(DictTraits::GetDictionaryArrayData(pool_, value_type_, memo_table_,
0 /* start_offset */, &data));
ARROW_ASSIGN_OR_RAISE(
auto data, DictTraits::GetDictionaryArrayData(pool_, value_type_, memo_table_,
0 /* start_offset */));
*out_dict = MakeArray(data);
return Status::OK();
}
Expand All @@ -299,9 +299,9 @@ class DictionaryUnifierImpl : public DictionaryUnifier {
}

// Build unified dictionary array
std::shared_ptr<ArrayData> data;
RETURN_NOT_OK(DictTraits::GetDictionaryArrayData(pool_, value_type_, memo_table_,
0 /* start_offset */, &data));
ARROW_ASSIGN_OR_RAISE(
auto data, DictTraits::GetDictionaryArrayData(pool_, value_type_, memo_table_,
0 /* start_offset */));
*out_dict = MakeArray(data);
return Status::OK();
}
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/arrow/array/builder_dict.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,9 @@ class DictionaryMemoTable::DictionaryMemoTableImpl {
enable_if_memoize<T, Status> Visit(const T&) {
using ConcreteMemoTable = typename DictionaryTraits<T>::MemoTableType;
auto memo_table = checked_cast<ConcreteMemoTable*>(memo_table_);
return DictionaryTraits<T>::GetDictionaryArrayData(pool_, value_type_, *memo_table,
start_offset_, out_);
ARROW_ASSIGN_OR_RAISE(*out_, DictionaryTraits<T>::GetDictionaryArrayData(
pool_, value_type_, *memo_table, start_offset_));
return Status::OK();
}
};

Expand Down
47 changes: 19 additions & 28 deletions cpp/src/arrow/array/dict_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include "arrow/array.h"
#include "arrow/buffer.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
Expand Down Expand Up @@ -63,11 +64,9 @@ struct DictionaryTraits<BooleanType> {
using T = BooleanType;
using MemoTableType = typename HashTraits<T>::MemoTableType;

static Status GetDictionaryArrayData(MemoryPool* pool,
const std::shared_ptr<DataType>& type,
const MemoTableType& memo_table,
int64_t start_offset,
std::shared_ptr<ArrayData>* out) {
static Result<std::shared_ptr<ArrayData>> GetDictionaryArrayData(
MemoryPool* pool, const std::shared_ptr<DataType>& type,
const MemoTableType& memo_table, int64_t start_offset) {
if (start_offset < 0) {
return Status::Invalid("invalid start_offset ", start_offset);
}
Expand All @@ -82,7 +81,9 @@ struct DictionaryTraits<BooleanType> {
: builder.Append(bool_values[i]));
}

return builder.FinishInternal(out);
std::shared_ptr<ArrayData> out;
RETURN_NOT_OK(builder.FinishInternal(&out));
return out;
}
}; // namespace internal

Expand All @@ -91,11 +92,9 @@ struct DictionaryTraits<T, enable_if_has_c_type<T>> {
using c_type = typename T::c_type;
using MemoTableType = typename HashTraits<T>::MemoTableType;

static Status GetDictionaryArrayData(MemoryPool* pool,
const std::shared_ptr<DataType>& type,
const MemoTableType& memo_table,
int64_t start_offset,
std::shared_ptr<ArrayData>* out) {
static Result<std::shared_ptr<ArrayData>> GetDictionaryArrayData(
MemoryPool* pool, const std::shared_ptr<DataType>& type,
const MemoTableType& memo_table, int64_t start_offset) {
auto dict_length = static_cast<int64_t>(memo_table.size()) - start_offset;
// This makes a copy, but we assume a dictionary array is usually small
// compared to the size of the dictionary-using array.
Expand All @@ -112,20 +111,17 @@ struct DictionaryTraits<T, enable_if_has_c_type<T>> {
RETURN_NOT_OK(
ComputeNullBitmap(pool, memo_table, start_offset, &null_count, &null_bitmap));

*out = ArrayData::Make(type, dict_length, {null_bitmap, dict_buffer}, null_count);
return Status::OK();
return ArrayData::Make(type, dict_length, {null_bitmap, dict_buffer}, null_count);
}
};

template <typename T>
struct DictionaryTraits<T, enable_if_base_binary<T>> {
using MemoTableType = typename HashTraits<T>::MemoTableType;

static Status GetDictionaryArrayData(MemoryPool* pool,
const std::shared_ptr<DataType>& type,
const MemoTableType& memo_table,
int64_t start_offset,
std::shared_ptr<ArrayData>* out) {
static Result<std::shared_ptr<ArrayData>> GetDictionaryArrayData(
MemoryPool* pool, const std::shared_ptr<DataType>& type,
const MemoTableType& memo_table, int64_t start_offset) {
using offset_type = typename T::offset_type;

// Create the offsets buffer
Expand All @@ -148,23 +144,19 @@ struct DictionaryTraits<T, enable_if_base_binary<T>> {
RETURN_NOT_OK(
ComputeNullBitmap(pool, memo_table, start_offset, &null_count, &null_bitmap));

*out = ArrayData::Make(type, dict_length,
return ArrayData::Make(type, dict_length,
{null_bitmap, std::move(dict_offsets), std::move(dict_data)},
null_count);

return Status::OK();
}
};

template <typename T>
struct DictionaryTraits<T, enable_if_fixed_size_binary<T>> {
using MemoTableType = typename HashTraits<T>::MemoTableType;

static Status GetDictionaryArrayData(MemoryPool* pool,
const std::shared_ptr<DataType>& type,
const MemoTableType& memo_table,
int64_t start_offset,
std::shared_ptr<ArrayData>* out) {
static Result<std::shared_ptr<ArrayData>> GetDictionaryArrayData(
MemoryPool* pool, const std::shared_ptr<DataType>& type,
const MemoTableType& memo_table, int64_t start_offset) {
const T& concrete_type = internal::checked_cast<const T&>(*type);

// Create the data buffer
Expand All @@ -182,9 +174,8 @@ struct DictionaryTraits<T, enable_if_fixed_size_binary<T>> {
RETURN_NOT_OK(
ComputeNullBitmap(pool, memo_table, start_offset, &null_count, &null_bitmap));

*out = ArrayData::Make(type, dict_length, {null_bitmap, std::move(dict_data)},
return ArrayData::Make(type, dict_length, {null_bitmap, std::move(dict_data)},
null_count);
return Status::OK();
}
};

Expand Down
Loading

0 comments on commit 3ef09de

Please sign in to comment.