Skip to content

Commit

Permalink
Merge branch 'main' into feat/flight-sql-client
Browse files Browse the repository at this point in the history
  • Loading branch information
HackPoint committed Nov 20, 2024
2 parents be65271 + 33e8cbb commit 59cc8e7
Show file tree
Hide file tree
Showing 48 changed files with 267 additions and 188 deletions.
1 change: 0 additions & 1 deletion .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@

# Docs
# /docs/
# .readthedocs.yml
# *.md
# *.rmd
# *.rst
Expand Down
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,5 @@ repos:
files: >-
(
?^ci/scripts/c_glib_build\.sh$|
?^ci/scripts/c_glib_test\.sh$|
)
19 changes: 0 additions & 19 deletions .readthedocs.yml

This file was deleted.

26 changes: 14 additions & 12 deletions c_glib/parquet-glib/arrow-file-reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,13 @@ gparquet_arrow_file_reader_new_arrow(GArrowSeekableInputStream *source, GError *
{
auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(source);
auto arrow_memory_pool = arrow::default_memory_pool();
std::unique_ptr<parquet::arrow::FileReader> parquet_arrow_file_reader;
auto status = parquet::arrow::OpenFile(arrow_random_access_file,
arrow_memory_pool,
&parquet_arrow_file_reader);
if (garrow_error_check(error, status, "[parquet][arrow][file-reader][new-arrow]")) {
return gparquet_arrow_file_reader_new_raw(parquet_arrow_file_reader.release());
auto parquet_arrow_file_reader_result =
parquet::arrow::OpenFile(arrow_random_access_file, arrow_memory_pool);
if (garrow::check(error,
parquet_arrow_file_reader_result,
"[parquet][arrow][file-reader][new-arrow]")) {
return gparquet_arrow_file_reader_new_raw(
parquet_arrow_file_reader_result->release());
} else {
return NULL;
}
Expand Down Expand Up @@ -168,12 +169,13 @@ gparquet_arrow_file_reader_new_path(const gchar *path, GError **error)
std::shared_ptr<arrow::io::RandomAccessFile> arrow_random_access_file =
arrow_memory_mapped_file.ValueOrDie();
auto arrow_memory_pool = arrow::default_memory_pool();
std::unique_ptr<parquet::arrow::FileReader> parquet_arrow_file_reader;
auto status = parquet::arrow::OpenFile(arrow_random_access_file,
arrow_memory_pool,
&parquet_arrow_file_reader);
if (garrow::check(error, status, "[parquet][arrow][file-reader][new-path]")) {
return gparquet_arrow_file_reader_new_raw(parquet_arrow_file_reader.release());
auto parquet_arrow_file_reader_result =
parquet::arrow::OpenFile(arrow_random_access_file, arrow_memory_pool);
if (garrow::check(error,
parquet_arrow_file_reader_result,
"[parquet][arrow][file-reader][new-path]")) {
return gparquet_arrow_file_reader_new_raw(
parquet_arrow_file_reader_result->release());
} else {
return NULL;
}
Expand Down
6 changes: 3 additions & 3 deletions ci/scripts/c_glib_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ set -ex
source_dir=${1}/c_glib
build_dir=${2}/c_glib

: ${ARROW_GLIB_VAPI:=true}
: "${ARROW_GLIB_VAPI:=true}"

export DYLD_LIBRARY_PATH=${ARROW_HOME}/lib:${DYLD_LIBRARY_PATH}
export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
Expand All @@ -34,7 +34,7 @@ if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
export ARROW_DEBUG_MEMORY_POOL=trap
fi

pushd ${source_dir}
pushd "${source_dir}"

ruby test/run-test.rb

Expand All @@ -51,7 +51,7 @@ fi

popd

pushd ${build_dir}
pushd "${build_dir}"
example/build
example/extension-type
if [ "${ARROW_GLIB_VAPI}" = "true" ]; then
Expand Down
1 change: 0 additions & 1 deletion cpp/build-support/lint_exclusions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
*RcppExports.cpp*
*_generated*
*arrowExports.cpp*
*parquet_constants.*
*parquet_types.*
*pyarrow_api.h
*pyarrow_lib.h
Expand Down
2 changes: 1 addition & 1 deletion cpp/examples/arrow/parquet_read_write.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ arrow::Status ReadFullFile(std::string path_to_file) {

// Open Parquet file reader
std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
ARROW_RETURN_NOT_OK(parquet::arrow::OpenFile(input, pool, &arrow_reader));
ARROW_ASSIGN_OR_RAISE(arrow_reader, parquet::arrow::OpenFile(input, pool));

// Read entire file as a single Arrow table
std::shared_ptr<arrow::Table> table;
Expand Down
16 changes: 8 additions & 8 deletions cpp/examples/parquet/parquet_arrow/reader_writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ void read_whole_file() {
arrow::default_memory_pool()));

std::unique_ptr<parquet::arrow::FileReader> reader;
PARQUET_THROW_NOT_OK(
parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
PARQUET_ASSIGN_OR_THROW(reader,
parquet::arrow::OpenFile(infile, arrow::default_memory_pool()));
std::shared_ptr<arrow::Table> table;
PARQUET_THROW_NOT_OK(reader->ReadTable(&table));
std::cout << "Loaded " << table->num_rows() << " rows in " << table->num_columns()
Expand All @@ -85,8 +85,8 @@ void read_single_rowgroup() {
arrow::default_memory_pool()));

std::unique_ptr<parquet::arrow::FileReader> reader;
PARQUET_THROW_NOT_OK(
parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
PARQUET_ASSIGN_OR_THROW(reader,
parquet::arrow::OpenFile(infile, arrow::default_memory_pool()));
std::shared_ptr<arrow::Table> table;
PARQUET_THROW_NOT_OK(reader->RowGroup(0)->ReadTable(&table));
std::cout << "Loaded " << table->num_rows() << " rows in " << table->num_columns()
Expand All @@ -102,8 +102,8 @@ void read_single_column() {
arrow::default_memory_pool()));

std::unique_ptr<parquet::arrow::FileReader> reader;
PARQUET_THROW_NOT_OK(
parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
PARQUET_ASSIGN_OR_THROW(reader,
parquet::arrow::OpenFile(infile, arrow::default_memory_pool()));
std::shared_ptr<arrow::ChunkedArray> array;
PARQUET_THROW_NOT_OK(reader->ReadColumn(0, &array));
PARQUET_THROW_NOT_OK(arrow::PrettyPrint(*array, 4, &std::cout));
Expand All @@ -122,8 +122,8 @@ void read_single_column_chunk() {
arrow::default_memory_pool()));

std::unique_ptr<parquet::arrow::FileReader> reader;
PARQUET_THROW_NOT_OK(
parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
PARQUET_ASSIGN_OR_THROW(reader,
parquet::arrow::OpenFile(infile, arrow::default_memory_pool()));
std::shared_ptr<arrow::ChunkedArray> array;
PARQUET_THROW_NOT_OK(reader->RowGroup(0)->Column(0)->Read(&array));
PARQUET_THROW_NOT_OK(arrow::PrettyPrint(*array, 4, &std::cout));
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/array/statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

namespace arrow {

/// \class ArrayStatistics
/// \brief Statistics for an Array
///
/// Apache Arrow format doesn't have statistics but data source such
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/arrow/c/bridge.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1310,13 +1310,13 @@ struct SchemaImporter {
}

bool keys_sorted = (c_struct_->flags & ARROW_FLAG_MAP_KEYS_SORTED);
bool values_nullable = value_type->field(1)->nullable();

// Some implementations of Arrow (such as Rust) use a non-standard field name
// for key ("keys") and value ("values") fields. For simplicity, we override
// them on import.
auto values_field =
::arrow::field("value", value_type->field(1)->type(), values_nullable);
type_ = map(value_type->field(0)->type(), values_field, keys_sorted);
type_ =
std::make_shared<MapType>(value_type->field(0)->WithName("key"),
value_type->field(1)->WithName("value"), keys_sorted);
return Status::OK();
}

Expand Down
11 changes: 11 additions & 0 deletions cpp/src/arrow/c/bridge_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3769,6 +3769,10 @@ TEST_F(TestSchemaRoundtrip, RegisteredExtension) {
TEST_F(TestSchemaRoundtrip, Map) {
TestWithTypeFactory([&]() { return map(utf8(), int32()); });
TestWithTypeFactory([&]() { return map(utf8(), field("value", int32(), false)); });
TestWithTypeFactory([&]() {
return map(utf8(), field("value", int32(), false,
KeyValueMetadata::Make({"meta key"}, {"meta value"})));
});
// Field names are brought in line with the spec on import.
TestWithTypeFactory(
[&]() {
Expand Down Expand Up @@ -5315,6 +5319,13 @@ TEST_F(TestArrayDeviceStreamRoundtrip, ChunkedArrayRoundtripEmpty) {

class TestAsyncDeviceArrayStreamRoundTrip : public BaseArrayStreamTest {
public:
void SetUp() override {
BaseArrayStreamTest::SetUp();
#ifndef ARROW_ENABLE_THREADING
GTEST_SKIP() << "Test requires ARROW_ENABLE_THREADING=ON";
#endif
}

static Result<std::shared_ptr<ArrayData>> ToDeviceData(
const std::shared_ptr<MemoryManager>& mm, const ArrayData& data) {
arrow::BufferVector buffers;
Expand Down
3 changes: 3 additions & 0 deletions cpp/src/arrow/compute/row/grouper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,9 @@ struct GrouperFastImpl : public Grouper {
} else {
ARROW_ASSIGN_OR_RAISE(fixedlen_bufs[i],
AllocatePaddedBuffer((num_groups + 1) * sizeof(uint32_t)));
// Set offset[0] to 0 so the later allocation of varlen_bufs doesn't see an
// uninitialized value when num_groups == 0.
reinterpret_cast<uint32_t*>(fixedlen_bufs[i]->mutable_data())[0] = 0;
}
cols_[i] =
KeyColumnArray(col_metadata_[i], num_groups, non_null_bufs[i]->mutable_data(),
Expand Down
22 changes: 22 additions & 0 deletions cpp/src/arrow/compute/row/grouper_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,5 +64,27 @@ TEST(Grouper, ResortedColumnsWithLargeNullRows) {
}
}

// Reproduction of GH-43124: Provoke var length buffer size if a grouper produces zero
// groups.
TEST(Grouper, EmptyGroups) {
ASSERT_OK_AND_ASSIGN(auto grouper, Grouper::Make({int32(), utf8()}));
ASSERT_OK_AND_ASSIGN(auto groups, grouper->GetUniques());

ASSERT_TRUE(groups[0].is_array());
ASSERT_EQ(groups[0].array()->buffers.size(), 2);
ASSERT_EQ(groups[0].array()->buffers[0], nullptr);
ASSERT_NE(groups[0].array()->buffers[1], nullptr);
ASSERT_EQ(groups[0].array()->buffers[1]->size(), 0);

ASSERT_TRUE(groups[1].is_array());
ASSERT_EQ(groups[1].array()->buffers.size(), 3);
ASSERT_EQ(groups[1].array()->buffers[0], nullptr);
ASSERT_NE(groups[1].array()->buffers[1], nullptr);
ASSERT_EQ(groups[1].array()->buffers[1]->size(), 4);
ASSERT_EQ(groups[1].array()->buffers[1]->data_as<const uint32_t>()[0], 0);
ASSERT_NE(groups[1].array()->buffers[2], nullptr);
ASSERT_EQ(groups[1].array()->buffers[2]->size(), 0);
}

} // namespace compute
} // namespace arrow
8 changes: 4 additions & 4 deletions cpp/src/arrow/filesystem/azurefs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -475,10 +475,10 @@ TEST(AzureFileSystem, InitializeWithDefaultCredential) {
TEST(AzureFileSystem, InitializeWithDefaultCredentialImplicitly) {
AzureOptions options;
options.account_name = "dummy-account-name";
AzureOptions explictly_default_options;
explictly_default_options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(explictly_default_options.ConfigureDefaultCredential());
ASSERT_TRUE(options.Equals(explictly_default_options));
AzureOptions explicitly_default_options;
explicitly_default_options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(explicitly_default_options.ConfigureDefaultCredential());
ASSERT_TRUE(options.Equals(explicitly_default_options));
}

TEST(AzureFileSystem, InitializeWithAnonymousCredential) {
Expand Down
8 changes: 0 additions & 8 deletions cpp/src/arrow/util/float16.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,7 @@ class ARROW_EXPORT Float16 {
}
/// \brief Return the value's bytes in little-endian byte order
constexpr std::array<uint8_t, 2> ToLittleEndian() const {
#if ARROW_LITTLE_ENDIAN
return {uint8_t(bits_ & 0xff), uint8_t(bits_ >> 8)};
#else
return {uint8_t(bits_ >> 8), uint8_t(bits_ & 0xff)};
#endif
}

/// \brief Copy the value's bytes in big-endian byte order
Expand All @@ -125,11 +121,7 @@ class ARROW_EXPORT Float16 {
}
/// \brief Return the value's bytes in big-endian byte order
constexpr std::array<uint8_t, 2> ToBigEndian() const {
#if ARROW_LITTLE_ENDIAN
return {uint8_t(bits_ >> 8), uint8_t(bits_ & 0xff)};
#else
return {uint8_t(bits_ & 0xff), uint8_t(bits_ >> 8)};
#endif
}

constexpr Float16 operator-() const { return FromBits(bits_ ^ 0x8000); }
Expand Down
47 changes: 28 additions & 19 deletions cpp/src/arrow/util/float16_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -323,44 +323,53 @@ TEST(Float16Test, Compare) {
TEST(Float16Test, ToBytes) {
constexpr auto f16 = Float16::FromBits(0xd01c);
std::array<uint8_t, 2> bytes;
auto load = [&bytes]() { return SafeLoadAs<uint16_t>(bytes.data()); };

constexpr uint8_t expected_high = 0xd0;
constexpr uint8_t expected_low = 0x1c;

// Test native-endian
f16.ToBytes(bytes.data());
ASSERT_EQ(load(), 0xd01c);
#if ARROW_LITTLE_ENDIAN
ASSERT_EQ(bytes[0], expected_low);
ASSERT_EQ(bytes[1], expected_high);
#else
ASSERT_EQ(bytes[0], expected_high);
ASSERT_EQ(bytes[1], expected_low);
#endif
bytes = f16.ToBytes();
ASSERT_EQ(load(), 0xd01c);

#if ARROW_LITTLE_ENDIAN
constexpr uint16_t expected_le = 0xd01c;
constexpr uint16_t expected_be = 0x1cd0;
ASSERT_EQ(bytes[0], expected_low);
ASSERT_EQ(bytes[1], expected_high);
#else
constexpr uint16_t expected_le = 0x1cd0;
constexpr uint16_t expected_be = 0xd01c;
ASSERT_EQ(bytes[0], expected_high);
ASSERT_EQ(bytes[1], expected_low);
#endif

// Test little-endian
f16.ToLittleEndian(bytes.data());
ASSERT_EQ(load(), expected_le);
ASSERT_EQ(bytes[0], expected_low);
ASSERT_EQ(bytes[1], expected_high);
bytes = f16.ToLittleEndian();
ASSERT_EQ(load(), expected_le);
ASSERT_EQ(bytes[0], expected_low);
ASSERT_EQ(bytes[1], expected_high);
// Test big-endian
f16.ToBigEndian(bytes.data());
ASSERT_EQ(load(), expected_be);
ASSERT_EQ(bytes[0], expected_high);
ASSERT_EQ(bytes[1], expected_low);
bytes = f16.ToBigEndian();
ASSERT_EQ(load(), expected_be);
ASSERT_EQ(bytes[0], expected_high);
ASSERT_EQ(bytes[1], expected_low);
}

TEST(Float16Test, FromBytes) {
constexpr uint16_t u16 = 0xd01c;
const auto* data = reinterpret_cast<const uint8_t*>(&u16);
ASSERT_EQ(Float16::FromBytes(data), Float16::FromBits(0xd01c));
const std::array<uint8_t, 2> bytes = {0x1c, 0xd0};
#if ARROW_LITTLE_ENDIAN
ASSERT_EQ(Float16::FromLittleEndian(data), Float16::FromBits(0xd01c));
ASSERT_EQ(Float16::FromBigEndian(data), Float16::FromBits(0x1cd0));
ASSERT_EQ(Float16::FromBytes(bytes.data()), Float16::FromBits(0xd01c));
#else
ASSERT_EQ(Float16::FromLittleEndian(data), Float16(0x1cd0));
ASSERT_EQ(Float16::FromBigEndian(data), Float16(0xd01c));
ASSERT_EQ(Float16::FromBytes(bytes.data()), Float16::FromBits(0x1cd0));
#endif
ASSERT_EQ(Float16::FromLittleEndian(bytes.data()), Float16::FromBits(0xd01c));
ASSERT_EQ(Float16::FromBigEndian(bytes.data()), Float16::FromBits(0x1cd0));
}

} // namespace
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/util/io_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@
#endif

#ifdef _WIN32
# include <Windows.h>
# include <windows.h>
#else
# include <dlfcn.h>
#endif
Expand Down
17 changes: 0 additions & 17 deletions cpp/src/generated/parquet_constants.cpp

This file was deleted.

Loading

0 comments on commit 59cc8e7

Please sign in to comment.