From c6917c077a6a0a17dd813dc6c28dac564190f995 Mon Sep 17 00:00:00 2001 From: mwish Date: Fri, 24 May 2024 23:04:10 +0800 Subject: [PATCH] add basic tests for arrow metadata --- cpp/src/parquet/CMakeLists.txt | 1 + cpp/src/parquet/arrow/arrow_metadata_test.cc | 75 ++++++++++++++++++++ cpp/src/parquet/type_fwd.h | 2 - 3 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 cpp/src/parquet/arrow/arrow_metadata_test.cc diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index 93f2e72d8d661..be19acc68f5c2 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -397,6 +397,7 @@ add_parquet_test(writer-test add_parquet_test(arrow-test SOURCES + arrow/arrow_metadata_test.cc arrow/arrow_reader_writer_test.cc arrow/arrow_schema_test.cc arrow/arrow_statistics_test.cc) diff --git a/cpp/src/parquet/arrow/arrow_metadata_test.cc b/cpp/src/parquet/arrow/arrow_metadata_test.cc new file mode 100644 index 0000000000000..f9014db8e30d8 --- /dev/null +++ b/cpp/src/parquet/arrow/arrow_metadata_test.cc @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gtest/gtest.h" + +#include "arrow/table.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/util/key_value_metadata.h" + +#include "parquet/api/writer.h" + +#include "parquet/arrow/schema.h" +#include "parquet/arrow/writer.h" +#include "parquet/file_writer.h" +#include "parquet/test_util.h" + +namespace parquet::arrow { + +TEST(Metadata, AppendMetadata) { + // A sample table, type and structure does not matter in this test case + auto schema = ::arrow::schema({::arrow::field("f", ::arrow::utf8())}); + auto table = ::arrow::Table::Make( + schema, {::arrow::ArrayFromJSON(::arrow::utf8(), R"(["a", "b", "c"])")}); + + auto sink = CreateOutputStream(); + ArrowWriterProperties::Builder builder; + builder.store_schema(); + ASSERT_OK_AND_ASSIGN(auto writer, + parquet::arrow::FileWriter::Open( + *schema, ::arrow::default_memory_pool(), sink, + parquet::default_writer_properties(), builder.build())); + + auto kv_meta = std::make_shared(); + kv_meta->Append("test_key_1", "test_value_1"); + kv_meta->Append("test_key_2", "test_value_2_"); + ASSERT_OK(writer->AddKeyValueMetadata(kv_meta)); + + // Key value metadata that will be added to the file. + auto kv_meta_added = std::make_shared<::arrow::KeyValueMetadata>(); + kv_meta_added->Append("test_key_2", "test_value_2"); + kv_meta_added->Append("test_key_3", "test_value_3"); + + ASSERT_OK(writer->AddKeyValueMetadata(kv_meta_added)); + ASSERT_OK(writer->Close()); + + // return error if the file is closed + ASSERT_RAISES(IOError, writer->AddKeyValueMetadata(kv_meta_added)); + + const auto& key_value_metadata = writer->metadata()->key_value_metadata(); + ASSERT_TRUE(nullptr != key_value_metadata); + + // Verify keys that were added before file writer was closed are present. + for (int i = 1; i <= 3; ++i) { + auto index = std::to_string(i); + PARQUET_ASSIGN_OR_THROW(auto value, key_value_metadata->Get("test_key_" + index)); + EXPECT_EQ("test_value_" + index, value); + } + EXPECT_TRUE(key_value_metadata->Contains("ARROW:schema")); +} + +} // namespace parquet::arrow diff --git a/cpp/src/parquet/type_fwd.h b/cpp/src/parquet/type_fwd.h index da0d0f7bdee96..fb5b75cdbe316 100644 --- a/cpp/src/parquet/type_fwd.h +++ b/cpp/src/parquet/type_fwd.h @@ -78,9 +78,7 @@ class ReaderProperties; class ArrowReaderProperties; class WriterProperties; -class WriterPropertiesBuilder; class ArrowWriterProperties; -class ArrowWriterPropertiesBuilder; namespace arrow {