From aa8f65276d39eb7e194a9fb2773ec0e7638974ad Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Tue, 27 May 2025 10:11:43 +0800 Subject: [PATCH 01/22] feat: static table metadata access support --- src/iceberg/CMakeLists.txt | 1 + src/iceberg/table.cc | 173 ++++++++++++++++++++++++++++++++++++ src/iceberg/table.h | 81 +++++++++++++++++ test/CMakeLists.txt | 10 ++- test/metadata_serde_test.cc | 37 ++------ test/table_test.cc | 107 ++++++++++++++++++++++ test/table_test_helper.cc | 64 +++++++++++++ test/table_test_helper.h | 36 ++++++++ 8 files changed, 476 insertions(+), 33 deletions(-) create mode 100644 src/iceberg/table.cc create mode 100644 test/table_test.cc create mode 100644 test/table_test_helper.cc create mode 100644 test/table_test_helper.h diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 09b419e7..7e9e3d7a 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -38,6 +38,7 @@ set(ICEBERG_SOURCES sort_field.cc sort_order.cc statistics_file.cc + table.cc table_metadata.cc transform.cc transform_function.cc diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc new file mode 100644 index 00000000..8e4d53ea --- /dev/null +++ b/src/iceberg/table.cc @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/table.h" + +#include + +#include "iceberg/exception.h" +#include "iceberg/partition_spec.h" +#include "iceberg/result.h" +#include "iceberg/schema.h" +#include "iceberg/snapshot.h" +#include "iceberg/sort_order.h" +#include "iceberg/table_metadata.h" + +namespace iceberg { + +BaseTable::BaseTable(std::string name, std::shared_ptr metadata) + : name_(std::move(name)), metadata_(std::move(metadata)) { + if (!metadata_) { + throw std::invalid_argument("Table metadata cannot be null"); + } +} + +void BaseTable::InitSchema() const { + for (const auto& schema : metadata_->schemas) { + if (schema->schema_id() != std::nullopt) { + schemas_map_.emplace(schema->schema_id().value(), schema); + if (schema->schema_id().value() == metadata_->current_schema_id) { + schema_ = schema; + } + } + } + // compatible with V1 table schema + if (!schema_ && metadata_->schemas.size() == 1UL) { + schema_ = metadata_->schemas.front(); + } +} + +void BaseTable::InitPartitionSpec() const { + for (const auto& spec : metadata_->partition_specs) { + partition_spec_map_[spec->spec_id()] = spec; + if (spec->spec_id() == metadata_->default_spec_id) { + partition_spec_ = spec; + } + } +} + +void BaseTable::InitSortOrder() const { + for (const auto& order : metadata_->sort_orders) { + sort_orders_map_[order->order_id()] = order; + if (order->order_id() == metadata_->default_sort_order_id) { + sort_order_ = order; + } + } +} + +void BaseTable::InitSnapshot() const { + auto snapshots = metadata_->snapshots; + for (const auto& snapshot : snapshots) { + if (snapshot->snapshot_id == metadata_->current_snapshot_id) { + current_snapshot_ = snapshot; + } + snapshots_map_[snapshot->snapshot_id] = snapshot; + } +} + +const std::string& BaseTable::uuid() const { return metadata_->table_uuid; } + +const std::shared_ptr& BaseTable::schema() const { + std::call_once(init_schema_once_, [this]() { InitSchema(); }); + if (!schema_) { + throw IcebergError("Current schema is not defined for this table"); + } + return schema_; +} + +const std::unordered_map>& BaseTable::schemas() const { + std::call_once(init_schema_once_, [this]() { InitSchema(); }); + return schemas_map_; +} + +const std::shared_ptr& BaseTable::spec() const { + std::call_once(init_partition_spec_once_, [this]() { InitPartitionSpec(); }); + return partition_spec_; +} + +const std::unordered_map>& BaseTable::specs() + const { + std::call_once(init_partition_spec_once_, [this]() { InitPartitionSpec(); }); + return partition_spec_map_; +} + +const std::shared_ptr& BaseTable::sort_order() const { + std::call_once(init_sort_order_once_, [this]() { InitSortOrder(); }); + return sort_order_; +} + +const std::unordered_map>& BaseTable::sort_orders() + const { + std::call_once(init_sort_order_once_, [this]() { InitSortOrder(); }); + return sort_orders_map_; +} + +const std::unordered_map& BaseTable::properties() const { + return metadata_->properties; +} + +const std::string& BaseTable::location() const { return metadata_->location; } + +const std::shared_ptr& BaseTable::current_snapshot() const { + std::call_once(init_snapshot_once_, [this]() { InitSnapshot(); }); + if (!current_snapshot_) { + throw IcebergError("Current snapshot is not defined for this table"); + } + return current_snapshot_; +} + +Result> BaseTable::snapshot(int64_t snapshot_id) const { + std::call_once(init_snapshot_once_, [this]() { InitSnapshot(); }); + auto iter = snapshots_map_.find(snapshot_id); + if (iter == snapshots_map_.end()) { + return NotFound("Snapshot with ID {} not found", snapshot_id); + } + return iter->second; +} + +const std::vector>& BaseTable::snapshots() const { + return metadata_->snapshots; +} + +const std::vector>& BaseTable::history() const { + // TODO: Implement history retrieval + throw IcebergError("history is not supported for BaseTable now"); +} + +Status StaticTable::Refresh() { + throw IcebergError("Refresh is not supported for StaticTable"); +} + +std::unique_ptr StaticTable::NewScan() const { + throw IcebergError("NewScan is not supported for StaticTable"); +} + +std::shared_ptr StaticTable::NewAppend() { + throw IcebergError("NewAppend is not supported for StaticTable"); +} + +std::unique_ptr StaticTable::NewTransaction() { + throw IcebergError("NewTransaction is not supported for StaticTable"); +} + +std::unique_ptr StaticTable::location_provider() const { + throw IcebergError("location_provider is not supported for StaticTable"); +} + +} // namespace iceberg diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 11a9fc98..7788d0aa 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -108,4 +108,85 @@ class ICEBERG_EXPORT Table { virtual std::unique_ptr location_provider() const = 0; }; +class ICEBERG_EXPORT BaseTable : public Table { + public: + virtual ~BaseTable() override = default; + BaseTable(std::string name, std::shared_ptr metadata); + + const std::string& name() const override { return name_; } + + const std::string& uuid() const override; + + const std::shared_ptr& schema() const override; + + const std::unordered_map>& schemas() const override; + + const std::shared_ptr& spec() const override; + + const std::unordered_map>& specs() + const override; + + const std::shared_ptr& sort_order() const override; + + const std::unordered_map>& sort_orders() + const override; + + const std::unordered_map& properties() const override; + + const std::string& location() const override; + + const std::shared_ptr& current_snapshot() const override; + + Result> snapshot(int64_t snapshot_id) const override; + + const std::vector>& snapshots() const override; + + const std::vector>& history() const override; + + private: + void InitSchema() const; + void InitPartitionSpec() const; + void InitSortOrder() const; + void InitSnapshot() const; + + const std::string name_; + + mutable std::shared_ptr schema_; + mutable std::unordered_map> schemas_map_; + + mutable std::shared_ptr partition_spec_; + mutable std::unordered_map> partition_spec_map_; + + mutable std::shared_ptr sort_order_; + mutable std::unordered_map> sort_orders_map_; + + mutable std::shared_ptr current_snapshot_; + mutable std::unordered_map> snapshots_map_; + + std::shared_ptr metadata_; + + // once_flags + mutable std::once_flag init_schema_once_; + mutable std::once_flag init_partition_spec_once_; + mutable std::once_flag init_sort_order_once_; + mutable std::once_flag init_snapshot_once_; +}; + +class ICEBERG_EXPORT StaticTable : public BaseTable { + public: + virtual ~StaticTable() override = default; + StaticTable(std::string name, std::shared_ptr metadata) + : BaseTable(std::move(name), std::move(metadata)) {} + + Status Refresh() override; + + std::unique_ptr NewScan() const override; + + std::shared_ptr NewAppend() override; + + std::unique_ptr NewTransaction() override; + + std::unique_ptr location_provider() const override; +}; + } // namespace iceberg diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 863bb09e..e0db2e4d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -49,6 +49,14 @@ target_sources(catalog_test PRIVATE in_memory_catalog_test.cc) target_link_libraries(catalog_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock) add_test(NAME catalog_test COMMAND catalog_test) +add_executable(table_test) +target_include_directories(table_test PRIVATE "${CMAKE_BINARY_DIR}") +target_sources(table_test PRIVATE table_test_helper.cc json_internal_test.cc table_test.cc + schema_json_test.cc) +target_link_libraries(table_test PRIVATE iceberg_static GTest::gtest_main + GTest::gmock) +add_test(NAME table_test COMMAND table_test) + add_executable(expression_test) target_sources(expression_test PRIVATE expression_test.cc) target_link_libraries(expression_test PRIVATE iceberg_static GTest::gtest_main @@ -57,7 +65,7 @@ add_test(NAME expression_test COMMAND expression_test) add_executable(json_serde_test) target_include_directories(json_serde_test PRIVATE "${CMAKE_BINARY_DIR}") -target_sources(json_serde_test PRIVATE json_internal_test.cc metadata_serde_test.cc +target_sources(json_serde_test PRIVATE table_test_helper.cc json_internal_test.cc metadata_serde_test.cc schema_json_test.cc) target_link_libraries(json_serde_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock) diff --git a/test/metadata_serde_test.cc b/test/metadata_serde_test.cc index 4a78e8ce..f80a32e4 100644 --- a/test/metadata_serde_test.cc +++ b/test/metadata_serde_test.cc @@ -20,13 +20,11 @@ #include #include #include -#include #include #include #include -#include "iceberg/json_internal.h" #include "iceberg/partition_field.h" #include "iceberg/partition_spec.h" #include "iceberg/schema.h" @@ -35,9 +33,9 @@ #include "iceberg/sort_field.h" #include "iceberg/sort_order.h" #include "iceberg/table_metadata.h" -#include "iceberg/test/test_config.h" #include "iceberg/transform.h" #include "iceberg/type.h" +#include "table_test_helper.h" namespace iceberg { @@ -46,40 +44,14 @@ namespace { class MetadataSerdeTest : public ::testing::Test { protected: void SetUp() override {} - - static std::string GetResourcePath(const std::string& file_name) { - return std::string(ICEBERG_TEST_RESOURCES) + "/" + file_name; - } - - static void ReadJsonFile(const std::string& file_name, std::string* content) { - std::filesystem::path path{GetResourcePath(file_name)}; - ASSERT_TRUE(std::filesystem::exists(path)) - << "File does not exist: " << path.string(); - - std::ifstream file(path); - std::stringstream buffer; - buffer << file.rdbuf(); - *content = buffer.str(); - } - - static void ReadTableMetadata(const std::string& file_name, - std::unique_ptr* metadata) { - std::string json_content; - ReadJsonFile(file_name, &json_content); - - nlohmann::json json = nlohmann::json::parse(json_content); - auto result = TableMetadataFromJson(json); - ASSERT_TRUE(result.has_value()) << "Failed to parse table metadata from " << file_name - << ": " << result.error().message; - *metadata = std::move(result.value()); - } }; } // namespace TEST_F(MetadataSerdeTest, DeserializeV1Valid) { std::unique_ptr metadata; - ASSERT_NO_FATAL_FAILURE(ReadTableMetadata("TableMetadataV1Valid.json", &metadata)); + ASSERT_NO_FATAL_FAILURE( + TableTestHelper::ReadTableMetadata("TableMetadataV1Valid.json", &metadata)); EXPECT_EQ(metadata->format_version, 1); EXPECT_EQ(metadata->table_uuid, "d20125c8-7284-442c-9aea-15fee620737c"); @@ -116,7 +88,8 @@ TEST_F(MetadataSerdeTest, DeserializeV1Valid) { TEST_F(MetadataSerdeTest, DeserializeV2Valid) { std::unique_ptr metadata; - ASSERT_NO_FATAL_FAILURE(ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); + ASSERT_NO_FATAL_FAILURE( + TableTestHelper::ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); EXPECT_EQ(metadata->format_version, 2); EXPECT_EQ(metadata->table_uuid, "9c12d441-03fe-4693-9a96-a0705ddf69c1"); diff --git a/test/table_test.cc b/test/table_test.cc new file mode 100644 index 00000000..45897598 --- /dev/null +++ b/test/table_test.cc @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include "iceberg/partition_spec.h" +#include "iceberg/schema.h" +#include "iceberg/snapshot.h" +#include "iceberg/table_metadata.h" +#include "iceberg/table.h" +#include "table_test_helper.h" + +namespace iceberg { + +namespace { + +class TableTest : public ::testing::Test { + protected: + void SetUp() override {} +}; + +} // namespace + +TEST_F(TableTest, TableSchemaV1Test) { + std::unique_ptr metadata; + ASSERT_NO_FATAL_FAILURE(TableTestHelper::ReadTableMetadata("TableMetadataV1Valid.json", &metadata)); + + StaticTable table("test_table_v1", std::move(metadata)); + ASSERT_EQ(table.name(), "test_table_v1"); + auto schema = table.schema(); + ASSERT_TRUE(schema != nullptr); + ASSERT_EQ(schema->fields().size(), 3); + auto schemas = table.schemas(); + ASSERT_TRUE(schemas.empty()); + + auto spec = table.spec(); + ASSERT_TRUE(spec != nullptr); + auto specs = table.specs(); + ASSERT_EQ(1UL, specs.size()); + + auto sort_order = table.sort_order(); + ASSERT_TRUE(sort_order != nullptr); + auto sort_orders = table.sort_orders(); + ASSERT_EQ(1UL, sort_orders.size()); + + auto location = table.location(); + ASSERT_EQ(location, "s3://bucket/test/location"); + + auto snapshots = table.snapshots(); + ASSERT_TRUE(snapshots.empty()); +} + +TEST_F(TableTest, TableSchemaV2Test) { + std::unique_ptr metadata; + ASSERT_NO_FATAL_FAILURE(TableTestHelper::ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); + + StaticTable table("test_table_v2", std::move(metadata)); + ASSERT_EQ(table.name(), "test_table_v2"); + auto schema = table.schema(); + ASSERT_TRUE(schema != nullptr); + ASSERT_EQ(schema->fields().size(), 3); + auto schemas = table.schemas(); + ASSERT_FALSE(schemas.empty()); + + auto spec = table.spec(); + ASSERT_TRUE(spec != nullptr); + auto specs = table.specs(); + ASSERT_EQ(1UL, specs.size()); + + auto sort_order = table.sort_order(); + ASSERT_TRUE(sort_order != nullptr); + auto sort_orders = table.sort_orders(); + ASSERT_EQ(1UL, sort_orders.size()); + + auto location = table.location(); + ASSERT_EQ(location, "s3://bucket/test/location"); + + auto snapshots = table.snapshots(); + ASSERT_EQ(2UL, snapshots.size()); + auto snapshot = table.current_snapshot(); + ASSERT_TRUE(snapshot != nullptr); +} + +} // namespace iceberg diff --git a/test/table_test_helper.cc b/test/table_test_helper.cc new file mode 100644 index 00000000..4b427916 --- /dev/null +++ b/test/table_test_helper.cc @@ -0,0 +1,64 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +#include "iceberg/json_internal.h" +#include "iceberg/test/test_config.h" +#include "table_test_helper.h" + +namespace iceberg { + + std::string TableTestHelper::GetResourcePath(const std::string& file_name) { + return std::string(ICEBERG_TEST_RESOURCES) + "/" + file_name; + } + + void TableTestHelper::ReadJsonFile(const std::string& file_name, std::string* content) { + std::filesystem::path path{GetResourcePath(file_name)}; + ASSERT_TRUE(std::filesystem::exists(path)) + << "File does not exist: " << path.string(); + + std::ifstream file(path); + std::stringstream buffer; + buffer << file.rdbuf(); + *content = buffer.str(); + } + + void TableTestHelper::ReadTableMetadata(const std::string& file_name, + std::unique_ptr* metadata) { + std::string json_content; + ReadJsonFile(file_name, &json_content); + + nlohmann::json json = nlohmann::json::parse(json_content); + auto result = TableMetadataFromJson(json); + ASSERT_TRUE(result.has_value()) << "Failed to parse table metadata from " << file_name + << ": " << result.error().message; + *metadata = std::move(result.value()); + } + +} // namespace \ No newline at end of file diff --git a/test/table_test_helper.h b/test/table_test_helper.h new file mode 100644 index 00000000..7c706e62 --- /dev/null +++ b/test/table_test_helper.h @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include "iceberg/type_fwd.h" + +namespace iceberg { + +class TableTestHelper { + public: + static std::string GetResourcePath(const std::string& file_name); + + static void ReadJsonFile(const std::string& file_name, std::string* content); + + static void ReadTableMetadata(const std::string& file_name, + std::unique_ptr* metadata); +}; + +} // namespace iceberg \ No newline at end of file From 7a509910abb66cf70c930c17976f0051c319ea90 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Tue, 27 May 2025 11:09:40 +0800 Subject: [PATCH 02/22] feat: static table metadata access support --- src/iceberg/table.h | 1 + test/CMakeLists.txt | 11 ++++----- test/table_test.cc | 9 ++++--- test/table_test_helper.cc | 50 +++++++++++++++++++-------------------- test/table_test_helper.h | 2 +- 5 files changed, 38 insertions(+), 35 deletions(-) diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 7788d0aa..6a5ab696 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -20,6 +20,7 @@ #pragma once #include +#include #include #include #include diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e0db2e4d..a27f4204 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -51,10 +51,9 @@ add_test(NAME catalog_test COMMAND catalog_test) add_executable(table_test) target_include_directories(table_test PRIVATE "${CMAKE_BINARY_DIR}") -target_sources(table_test PRIVATE table_test_helper.cc json_internal_test.cc table_test.cc - schema_json_test.cc) -target_link_libraries(table_test PRIVATE iceberg_static GTest::gtest_main - GTest::gmock) +target_sources(table_test PRIVATE table_test_helper.cc json_internal_test.cc + table_test.cc schema_json_test.cc) +target_link_libraries(table_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock) add_test(NAME table_test COMMAND table_test) add_executable(expression_test) @@ -65,8 +64,8 @@ add_test(NAME expression_test COMMAND expression_test) add_executable(json_serde_test) target_include_directories(json_serde_test PRIVATE "${CMAKE_BINARY_DIR}") -target_sources(json_serde_test PRIVATE table_test_helper.cc json_internal_test.cc metadata_serde_test.cc - schema_json_test.cc) +target_sources(json_serde_test PRIVATE table_test_helper.cc json_internal_test.cc + metadata_serde_test.cc schema_json_test.cc) target_link_libraries(json_serde_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock) add_test(NAME json_serde_test COMMAND json_serde_test) diff --git a/test/table_test.cc b/test/table_test.cc index 45897598..eeca67d2 100644 --- a/test/table_test.cc +++ b/test/table_test.cc @@ -17,6 +17,8 @@ * under the License. */ +#include "iceberg/table.h" + #include #include #include @@ -30,7 +32,6 @@ #include "iceberg/schema.h" #include "iceberg/snapshot.h" #include "iceberg/table_metadata.h" -#include "iceberg/table.h" #include "table_test_helper.h" namespace iceberg { @@ -46,7 +47,8 @@ class TableTest : public ::testing::Test { TEST_F(TableTest, TableSchemaV1Test) { std::unique_ptr metadata; - ASSERT_NO_FATAL_FAILURE(TableTestHelper::ReadTableMetadata("TableMetadataV1Valid.json", &metadata)); + ASSERT_NO_FATAL_FAILURE( + TableTestHelper::ReadTableMetadata("TableMetadataV1Valid.json", &metadata)); StaticTable table("test_table_v1", std::move(metadata)); ASSERT_EQ(table.name(), "test_table_v1"); @@ -75,7 +77,8 @@ TEST_F(TableTest, TableSchemaV1Test) { TEST_F(TableTest, TableSchemaV2Test) { std::unique_ptr metadata; - ASSERT_NO_FATAL_FAILURE(TableTestHelper::ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); + ASSERT_NO_FATAL_FAILURE( + TableTestHelper::ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); StaticTable table("test_table_v2", std::move(metadata)); ASSERT_EQ(table.name(), "test_table_v2"); diff --git a/test/table_test_helper.cc b/test/table_test_helper.cc index 4b427916..08a363a4 100644 --- a/test/table_test_helper.cc +++ b/test/table_test_helper.cc @@ -1,5 +1,5 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one + * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file @@ -19,6 +19,8 @@ #pragma once +#include "table_test_helper.h" + #include #include #include @@ -30,35 +32,33 @@ #include "iceberg/json_internal.h" #include "iceberg/test/test_config.h" -#include "table_test_helper.h" namespace iceberg { - std::string TableTestHelper::GetResourcePath(const std::string& file_name) { - return std::string(ICEBERG_TEST_RESOURCES) + "/" + file_name; - } +std::string TableTestHelper::GetResourcePath(const std::string& file_name) { + return std::string(ICEBERG_TEST_RESOURCES) + "/" + file_name; +} - void TableTestHelper::ReadJsonFile(const std::string& file_name, std::string* content) { - std::filesystem::path path{GetResourcePath(file_name)}; - ASSERT_TRUE(std::filesystem::exists(path)) - << "File does not exist: " << path.string(); +void TableTestHelper::ReadJsonFile(const std::string& file_name, std::string* content) { + std::filesystem::path path{GetResourcePath(file_name)}; + ASSERT_TRUE(std::filesystem::exists(path)) << "File does not exist: " << path.string(); - std::ifstream file(path); - std::stringstream buffer; - buffer << file.rdbuf(); - *content = buffer.str(); - } + std::ifstream file(path); + std::stringstream buffer; + buffer << file.rdbuf(); + *content = buffer.str(); +} - void TableTestHelper::ReadTableMetadata(const std::string& file_name, - std::unique_ptr* metadata) { - std::string json_content; - ReadJsonFile(file_name, &json_content); +void TableTestHelper::ReadTableMetadata(const std::string& file_name, + std::unique_ptr* metadata) { + std::string json_content; + ReadJsonFile(file_name, &json_content); - nlohmann::json json = nlohmann::json::parse(json_content); - auto result = TableMetadataFromJson(json); - ASSERT_TRUE(result.has_value()) << "Failed to parse table metadata from " << file_name - << ": " << result.error().message; - *metadata = std::move(result.value()); - } + nlohmann::json json = nlohmann::json::parse(json_content); + auto result = TableMetadataFromJson(json); + ASSERT_TRUE(result.has_value()) << "Failed to parse table metadata from " << file_name + << ": " << result.error().message; + *metadata = std::move(result.value()); +} -} // namespace \ No newline at end of file +} // namespace iceberg diff --git a/test/table_test_helper.h b/test/table_test_helper.h index 7c706e62..823862e1 100644 --- a/test/table_test_helper.h +++ b/test/table_test_helper.h @@ -33,4 +33,4 @@ class TableTestHelper { std::unique_ptr* metadata); }; -} // namespace iceberg \ No newline at end of file +} // namespace iceberg From 0df021746cb9a9d701313ac7e02a9fbce2a6a11b Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Tue, 27 May 2025 12:01:26 +0800 Subject: [PATCH 03/22] feat: static table metadata access support --- test/table_test_helper.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/table_test_helper.h b/test/table_test_helper.h index 823862e1..7ccbe789 100644 --- a/test/table_test_helper.h +++ b/test/table_test_helper.h @@ -19,6 +19,8 @@ #pragma once +#include + #include "iceberg/type_fwd.h" namespace iceberg { From f2a718d591b85f6310f9a2407e9f6c03f37794d8 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Tue, 27 May 2025 12:24:41 +0800 Subject: [PATCH 04/22] feat: static table metadata access support --- test/table_test_helper.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/test/table_test_helper.cc b/test/table_test_helper.cc index 08a363a4..f38f057e 100644 --- a/test/table_test_helper.cc +++ b/test/table_test_helper.cc @@ -23,6 +23,7 @@ #include #include +#include #include #include #include From 1f94b9bc04137d695a846c7b16cdb53fcd775ec2 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Tue, 27 May 2025 14:05:15 +0800 Subject: [PATCH 05/22] feat: static table metadata access support --- test/table_test_helper.cc | 4 ---- test/table_test_helper.h | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/test/table_test_helper.cc b/test/table_test_helper.cc index f38f057e..e93c2d14 100644 --- a/test/table_test_helper.cc +++ b/test/table_test_helper.cc @@ -17,16 +17,12 @@ * under the License. */ -#pragma once - #include "table_test_helper.h" #include #include -#include #include #include -#include #include #include diff --git a/test/table_test_helper.h b/test/table_test_helper.h index 7ccbe789..93b35c1b 100644 --- a/test/table_test_helper.h +++ b/test/table_test_helper.h @@ -19,6 +19,7 @@ #pragma once +#include #include #include "iceberg/type_fwd.h" From 8c854f99d8b7d8f5a28b4b8143ce7e3320a4409b Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Tue, 27 May 2025 14:42:28 +0800 Subject: [PATCH 06/22] feat: static table metadata access support --- src/iceberg/table.cc | 2 +- src/iceberg/table.h | 4 ++-- test/table_test_helper.h | 3 +++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc index 8e4d53ea..dba65f31 100644 --- a/src/iceberg/table.cc +++ b/src/iceberg/table.cc @@ -146,7 +146,7 @@ const std::vector>& BaseTable::snapshots() const { } const std::vector>& BaseTable::history() const { - // TODO: Implement history retrieval + // TODO(lishuxu): Implement history retrieval throw IcebergError("history is not supported for BaseTable now"); } diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 6a5ab696..7296945a 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -111,7 +111,7 @@ class ICEBERG_EXPORT Table { class ICEBERG_EXPORT BaseTable : public Table { public: - virtual ~BaseTable() override = default; + ~BaseTable() override = default; BaseTable(std::string name, std::shared_ptr metadata); const std::string& name() const override { return name_; } @@ -175,7 +175,7 @@ class ICEBERG_EXPORT BaseTable : public Table { class ICEBERG_EXPORT StaticTable : public BaseTable { public: - virtual ~StaticTable() override = default; + ~StaticTable() override = default; StaticTable(std::string name, std::shared_ptr metadata) : BaseTable(std::move(name), std::move(metadata)) {} diff --git a/test/table_test_helper.h b/test/table_test_helper.h index 93b35c1b..479ac100 100644 --- a/test/table_test_helper.h +++ b/test/table_test_helper.h @@ -28,10 +28,13 @@ namespace iceberg { class TableTestHelper { public: + /// \brief Get the full path to a resource file in the test resources directory static std::string GetResourcePath(const std::string& file_name); + /// \brief Read a JSON file from the test resources directory static void ReadJsonFile(const std::string& file_name, std::string* content); + /// \brief Read table metadata from a JSON file in the test resources directory static void ReadTableMetadata(const std::string& file_name, std::unique_ptr* metadata); }; From 675bdd883c2496d36cd9423dbb1cc69101de5933 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Wed, 28 May 2025 21:41:06 +0800 Subject: [PATCH 07/22] static table metadata access support --- src/iceberg/table.cc | 40 +++++++------ src/iceberg/table.h | 58 ++++++++++++------- src/iceberg/table_metadata.cc | 13 +++++ src/iceberg/table_metadata.h | 2 + src/iceberg/table_scan.h | 27 +++++++++ test/CMakeLists.txt | 6 +- test/metadata_serde_test.cc | 6 +- test/table_test.cc | 35 +++++------ test/{table_test_helper.cc => test_common.cc} | 14 ++--- test/{table_test_helper.h => test_common.h} | 21 +++---- 10 files changed, 138 insertions(+), 84 deletions(-) create mode 100644 src/iceberg/table_scan.h rename test/{table_test_helper.cc => test_common.cc} (81%) rename test/{table_test_helper.h => test_common.h} (60%) diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc index dba65f31..6fd463a7 100644 --- a/src/iceberg/table.cc +++ b/src/iceberg/table.cc @@ -40,7 +40,7 @@ BaseTable::BaseTable(std::string name, std::shared_ptr metadata) void BaseTable::InitSchema() const { for (const auto& schema : metadata_->schemas) { - if (schema->schema_id() != std::nullopt) { + if (schema->schema_id()) { schemas_map_.emplace(schema->schema_id().value(), schema); if (schema->schema_id().value() == metadata_->current_schema_id) { schema_ = schema; @@ -83,10 +83,10 @@ void BaseTable::InitSnapshot() const { const std::string& BaseTable::uuid() const { return metadata_->table_uuid; } -const std::shared_ptr& BaseTable::schema() const { +Result> BaseTable::schema() const { std::call_once(init_schema_once_, [this]() { InitSchema(); }); if (!schema_) { - throw IcebergError("Current schema is not defined for this table"); + return NotFound("Current schema is not defined for this table"); } return schema_; } @@ -96,8 +96,11 @@ const std::unordered_map>& BaseTable::schemas() return schemas_map_; } -const std::shared_ptr& BaseTable::spec() const { +Result> BaseTable::spec() const { std::call_once(init_partition_spec_once_, [this]() { InitPartitionSpec(); }); + if (!partition_spec_) { + return NotFound("Default partition spec is not defined for this table"); + } return partition_spec_; } @@ -107,8 +110,11 @@ const std::unordered_map>& BaseTable::sp return partition_spec_map_; } -const std::shared_ptr& BaseTable::sort_order() const { +Result> BaseTable::sort_order() const { std::call_once(init_sort_order_once_, [this]() { InitSortOrder(); }); + if (!sort_order_) { + return NotFound("Default sort order is not defined for this table"); + } return sort_order_; } @@ -124,10 +130,10 @@ const std::unordered_map& BaseTable::properties() cons const std::string& BaseTable::location() const { return metadata_->location; } -const std::shared_ptr& BaseTable::current_snapshot() const { +Result> BaseTable::current_snapshot() const { std::call_once(init_snapshot_once_, [this]() { InitSnapshot(); }); if (!current_snapshot_) { - throw IcebergError("Current snapshot is not defined for this table"); + return NotFound("Current snapshot is not defined for this table"); } return current_snapshot_; } @@ -147,27 +153,27 @@ const std::vector>& BaseTable::snapshots() const { const std::vector>& BaseTable::history() const { // TODO(lishuxu): Implement history retrieval - throw IcebergError("history is not supported for BaseTable now"); + return history_; } Status StaticTable::Refresh() { - throw IcebergError("Refresh is not supported for StaticTable"); + return NotSupported("Refresh is not supported for StaticTable"); } -std::unique_ptr StaticTable::NewScan() const { - throw IcebergError("NewScan is not supported for StaticTable"); +Result> StaticTable::NewScan() const { + return NotSupported("NewScan is not supported for StaticTable"); } -std::shared_ptr StaticTable::NewAppend() { - throw IcebergError("NewAppend is not supported for StaticTable"); +Result> StaticTable::NewAppend() { + return NotSupported("NewAppend is not supported for StaticTable"); } -std::unique_ptr StaticTable::NewTransaction() { - throw IcebergError("NewTransaction is not supported for StaticTable"); +Result> StaticTable::NewTransaction() { + return NotSupported("NewTransaction is not supported for StaticTable"); } -std::unique_ptr StaticTable::location_provider() const { - throw IcebergError("location_provider is not supported for StaticTable"); +Result> StaticTable::location_provider() const { + return NotSupported("location_provider is not supported for StaticTable"); } } // namespace iceberg diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 7296945a..f0e2a969 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -26,7 +26,10 @@ #include #include "iceberg/iceberg_export.h" +#include "iceberg/location_provider.h" #include "iceberg/result.h" +#include "iceberg/table_scan.h" +#include "iceberg/transaction.h" #include "iceberg/type_fwd.h" namespace iceberg { @@ -45,21 +48,21 @@ class ICEBERG_EXPORT Table { /// \brief Refresh the current table metadata virtual Status Refresh() = 0; - /// \brief Return the schema for this table - virtual const std::shared_ptr& schema() const = 0; + /// \brief Return the schema for this table, return NotFoundError if not found + virtual Result> schema() const = 0; /// \brief Return a map of schema for this table virtual const std::unordered_map>& schemas() const = 0; - /// \brief Return the partition spec for this table - virtual const std::shared_ptr& spec() const = 0; + /// \brief Return the partition spec for this table, return NotFoundError if not found + virtual Result> spec() const = 0; /// \brief Return a map of partition specs for this table virtual const std::unordered_map>& specs() const = 0; - /// \brief Return the sort order for this table - virtual const std::shared_ptr& sort_order() const = 0; + /// \brief Return the sort order for this table, return NotFoundError if not found + virtual Result> sort_order() const = 0; /// \brief Return a map of sort order IDs to sort orders for this table virtual const std::unordered_map>& sort_orders() @@ -71,8 +74,8 @@ class ICEBERG_EXPORT Table { /// \brief Return the table's base location virtual const std::string& location() const = 0; - /// \brief Return the table's current snapshot - virtual const std::shared_ptr& current_snapshot() const = 0; + /// \brief Return the table's current snapshot, or NotFoundError if not found + virtual Result> current_snapshot() const = 0; /// \brief Get the snapshot of this table with the given id, or null if there is no /// matching snapshot @@ -92,13 +95,13 @@ class ICEBERG_EXPORT Table { /// \brief Create a new table scan for this table /// /// Once a table scan is created, it can be refined to project columns and filter data. - virtual std::unique_ptr NewScan() const = 0; + virtual Result> NewScan() const = 0; /// \brief Create a new append API to add files to this table and commit - virtual std::shared_ptr NewAppend() = 0; + virtual Result> NewAppend() = 0; /// \brief Create a new transaction API to commit multiple table operations at once - virtual std::unique_ptr NewTransaction() = 0; + virtual Result> NewTransaction() = 0; /// TODO(wgtmac): design of FileIO is not finalized yet. We intend to use an /// IO-less design in the core library. @@ -106,28 +109,37 @@ class ICEBERG_EXPORT Table { // virtual std::shared_ptr io() const = 0; /// \brief Returns a LocationProvider to provide locations for new data files - virtual std::unique_ptr location_provider() const = 0; + virtual Result> location_provider() const = 0; }; +/// \brief An abstract base implementation of the Iceberg Table interface. +/// +/// BaseTable provides common functionality for Iceberg table implementations, +/// including lazy initialization of schema, partition specs, sort orders, +/// and snapshot metadata. +/// +/// This class is not intended to be used directly by users, but serves as a foundation +/// for concrete implementations such as StaticTable or CatalogTable. class ICEBERG_EXPORT BaseTable : public Table { public: ~BaseTable() override = default; + BaseTable(std::string name, std::shared_ptr metadata); const std::string& name() const override { return name_; } const std::string& uuid() const override; - const std::shared_ptr& schema() const override; + Result> schema() const override; const std::unordered_map>& schemas() const override; - const std::shared_ptr& spec() const override; + Result> spec() const override; const std::unordered_map>& specs() const override; - const std::shared_ptr& sort_order() const override; + Result> sort_order() const override; const std::unordered_map>& sort_orders() const override; @@ -136,7 +148,7 @@ class ICEBERG_EXPORT BaseTable : public Table { const std::string& location() const override; - const std::shared_ptr& current_snapshot() const override; + Result> current_snapshot() const override; Result> snapshot(int64_t snapshot_id) const override; @@ -164,6 +176,8 @@ class ICEBERG_EXPORT BaseTable : public Table { mutable std::shared_ptr current_snapshot_; mutable std::unordered_map> snapshots_map_; + mutable std::vector> history_; + std::shared_ptr metadata_; // once_flags @@ -173,21 +187,25 @@ class ICEBERG_EXPORT BaseTable : public Table { mutable std::once_flag init_snapshot_once_; }; +/// \brief A read-only implementation of an Iceberg table. +/// +/// StaticTable represents a snapshot of a table that does not support mutation. class ICEBERG_EXPORT StaticTable : public BaseTable { public: ~StaticTable() override = default; + StaticTable(std::string name, std::shared_ptr metadata) : BaseTable(std::move(name), std::move(metadata)) {} Status Refresh() override; - std::unique_ptr NewScan() const override; + Result> NewScan() const override; - std::shared_ptr NewAppend() override; + Result> NewAppend() override; - std::unique_ptr NewTransaction() override; + Result> NewTransaction() override; - std::unique_ptr location_provider() const override; + Result> location_provider() const override; }; } // namespace iceberg diff --git a/src/iceberg/table_metadata.cc b/src/iceberg/table_metadata.cc index 4e112fd2..9713364a 100644 --- a/src/iceberg/table_metadata.cc +++ b/src/iceberg/table_metadata.cc @@ -76,6 +76,19 @@ Result> TableMetadata::SortOrder() const { return *iter; } +Result> TableMetadata::Snapshot() const { + if (current_snapshot_id == Snapshot::kInvalidSnapshotId) { + return NotFound("Current snapshot is not defined for this table"); + } + auto iter = std::ranges::find_if(snapshots, [this](const auto& snapshot) { + return snapshot->snapshot_id == current_snapshot_id; + }); + if (iter == snapshots.end()) { + return NotFound("Current snapshot with ID {} is not found", current_snapshot_id); + } + return *iter; +} + namespace { template diff --git a/src/iceberg/table_metadata.h b/src/iceberg/table_metadata.h index 12483140..c40e5ff8 100644 --- a/src/iceberg/table_metadata.h +++ b/src/iceberg/table_metadata.h @@ -133,6 +133,8 @@ struct ICEBERG_EXPORT TableMetadata { Result> PartitionSpec() const; /// \brief Get the current sort order, return NotFoundError if not found Result> SortOrder() const; + /// \brief Get the current snapshot, return NotFoundError if not found + Result> Snapshot() const; friend bool operator==(const TableMetadata& lhs, const TableMetadata& rhs); diff --git a/src/iceberg/table_scan.h b/src/iceberg/table_scan.h new file mode 100644 index 00000000..7cc28e91 --- /dev/null +++ b/src/iceberg/table_scan.h @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +namespace iceberg { + +/// \brief Represents a table scan operation +class ICEBERG_EXPORT TableScan{}; + +} // namespace iceberg diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a27f4204..125714f6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -51,8 +51,8 @@ add_test(NAME catalog_test COMMAND catalog_test) add_executable(table_test) target_include_directories(table_test PRIVATE "${CMAKE_BINARY_DIR}") -target_sources(table_test PRIVATE table_test_helper.cc json_internal_test.cc - table_test.cc schema_json_test.cc) +target_sources(table_test PRIVATE test_common.cc json_internal_test.cc table_test.cc + schema_json_test.cc) target_link_libraries(table_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock) add_test(NAME table_test COMMAND table_test) @@ -64,7 +64,7 @@ add_test(NAME expression_test COMMAND expression_test) add_executable(json_serde_test) target_include_directories(json_serde_test PRIVATE "${CMAKE_BINARY_DIR}") -target_sources(json_serde_test PRIVATE table_test_helper.cc json_internal_test.cc +target_sources(json_serde_test PRIVATE test_common.cc json_internal_test.cc metadata_serde_test.cc schema_json_test.cc) target_link_libraries(json_serde_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock) diff --git a/test/metadata_serde_test.cc b/test/metadata_serde_test.cc index f80a32e4..b6f1d050 100644 --- a/test/metadata_serde_test.cc +++ b/test/metadata_serde_test.cc @@ -35,7 +35,7 @@ #include "iceberg/table_metadata.h" #include "iceberg/transform.h" #include "iceberg/type.h" -#include "table_test_helper.h" +#include "test_common.h" namespace iceberg { @@ -51,7 +51,7 @@ class MetadataSerdeTest : public ::testing::Test { TEST_F(MetadataSerdeTest, DeserializeV1Valid) { std::unique_ptr metadata; ASSERT_NO_FATAL_FAILURE( - TableTestHelper::ReadTableMetadata("TableMetadataV1Valid.json", &metadata)); + test::ReadTableMetadata("TableMetadataV1Valid.json", &metadata)); EXPECT_EQ(metadata->format_version, 1); EXPECT_EQ(metadata->table_uuid, "d20125c8-7284-442c-9aea-15fee620737c"); @@ -89,7 +89,7 @@ TEST_F(MetadataSerdeTest, DeserializeV1Valid) { TEST_F(MetadataSerdeTest, DeserializeV2Valid) { std::unique_ptr metadata; ASSERT_NO_FATAL_FAILURE( - TableTestHelper::ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); + test::ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); EXPECT_EQ(metadata->format_version, 2); EXPECT_EQ(metadata->table_uuid, "9c12d441-03fe-4693-9a96-a0705ddf69c1"); diff --git a/test/table_test.cc b/test/table_test.cc index eeca67d2..161402d0 100644 --- a/test/table_test.cc +++ b/test/table_test.cc @@ -32,39 +32,30 @@ #include "iceberg/schema.h" #include "iceberg/snapshot.h" #include "iceberg/table_metadata.h" -#include "table_test_helper.h" +#include "test_common.h" namespace iceberg { -namespace { - -class TableTest : public ::testing::Test { - protected: - void SetUp() override {} -}; - -} // namespace - -TEST_F(TableTest, TableSchemaV1Test) { +TEST(TableTest, TableSchemaV1Test) { std::unique_ptr metadata; ASSERT_NO_FATAL_FAILURE( - TableTestHelper::ReadTableMetadata("TableMetadataV1Valid.json", &metadata)); + test::ReadTableMetadata("TableMetadataV1Valid.json", &metadata)); StaticTable table("test_table_v1", std::move(metadata)); ASSERT_EQ(table.name(), "test_table_v1"); auto schema = table.schema(); - ASSERT_TRUE(schema != nullptr); - ASSERT_EQ(schema->fields().size(), 3); + ASSERT_TRUE(schema.has_value()); + ASSERT_EQ(schema.value()->fields().size(), 3); auto schemas = table.schemas(); ASSERT_TRUE(schemas.empty()); auto spec = table.spec(); - ASSERT_TRUE(spec != nullptr); + ASSERT_TRUE(spec.has_value()); auto specs = table.specs(); ASSERT_EQ(1UL, specs.size()); auto sort_order = table.sort_order(); - ASSERT_TRUE(sort_order != nullptr); + ASSERT_TRUE(sort_order.has_value()); auto sort_orders = table.sort_orders(); ASSERT_EQ(1UL, sort_orders.size()); @@ -75,26 +66,26 @@ TEST_F(TableTest, TableSchemaV1Test) { ASSERT_TRUE(snapshots.empty()); } -TEST_F(TableTest, TableSchemaV2Test) { +TEST(TableTest, TableSchemaV2Test) { std::unique_ptr metadata; ASSERT_NO_FATAL_FAILURE( - TableTestHelper::ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); + test::ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); StaticTable table("test_table_v2", std::move(metadata)); ASSERT_EQ(table.name(), "test_table_v2"); auto schema = table.schema(); - ASSERT_TRUE(schema != nullptr); - ASSERT_EQ(schema->fields().size(), 3); + ASSERT_TRUE(schema.has_value()); + ASSERT_EQ(schema.value()->fields().size(), 3); auto schemas = table.schemas(); ASSERT_FALSE(schemas.empty()); auto spec = table.spec(); - ASSERT_TRUE(spec != nullptr); + ASSERT_TRUE(spec.has_value()); auto specs = table.specs(); ASSERT_EQ(1UL, specs.size()); auto sort_order = table.sort_order(); - ASSERT_TRUE(sort_order != nullptr); + ASSERT_TRUE(sort_order.has_value()); auto sort_orders = table.sort_orders(); ASSERT_EQ(1UL, sort_orders.size()); diff --git a/test/table_test_helper.cc b/test/test_common.cc similarity index 81% rename from test/table_test_helper.cc rename to test/test_common.cc index e93c2d14..e03dbf3a 100644 --- a/test/table_test_helper.cc +++ b/test/test_common.cc @@ -17,7 +17,7 @@ * under the License. */ -#include "table_test_helper.h" +#include "test_common.h" #include #include @@ -30,13 +30,13 @@ #include "iceberg/json_internal.h" #include "iceberg/test/test_config.h" -namespace iceberg { +namespace iceberg::test { -std::string TableTestHelper::GetResourcePath(const std::string& file_name) { +std::string GetResourcePath(const std::string& file_name) { return std::string(ICEBERG_TEST_RESOURCES) + "/" + file_name; } -void TableTestHelper::ReadJsonFile(const std::string& file_name, std::string* content) { +void ReadJsonFile(const std::string& file_name, std::string* content) { std::filesystem::path path{GetResourcePath(file_name)}; ASSERT_TRUE(std::filesystem::exists(path)) << "File does not exist: " << path.string(); @@ -46,8 +46,8 @@ void TableTestHelper::ReadJsonFile(const std::string& file_name, std::string* co *content = buffer.str(); } -void TableTestHelper::ReadTableMetadata(const std::string& file_name, - std::unique_ptr* metadata) { +void ReadTableMetadata(const std::string& file_name, + std::unique_ptr* metadata) { std::string json_content; ReadJsonFile(file_name, &json_content); @@ -58,4 +58,4 @@ void TableTestHelper::ReadTableMetadata(const std::string& file_name, *metadata = std::move(result.value()); } -} // namespace iceberg +} // namespace iceberg::test diff --git a/test/table_test_helper.h b/test/test_common.h similarity index 60% rename from test/table_test_helper.h rename to test/test_common.h index 479ac100..832538ba 100644 --- a/test/table_test_helper.h +++ b/test/test_common.h @@ -24,19 +24,16 @@ #include "iceberg/type_fwd.h" -namespace iceberg { +namespace iceberg::test { -class TableTestHelper { - public: - /// \brief Get the full path to a resource file in the test resources directory - static std::string GetResourcePath(const std::string& file_name); +/// \brief Get the full path to a resource file in the test resources directory +std::string GetResourcePath(const std::string& file_name); - /// \brief Read a JSON file from the test resources directory - static void ReadJsonFile(const std::string& file_name, std::string* content); +/// \brief Read a JSON file from the test resources directory +void ReadJsonFile(const std::string& file_name, std::string* content); - /// \brief Read table metadata from a JSON file in the test resources directory - static void ReadTableMetadata(const std::string& file_name, - std::unique_ptr* metadata); -}; +/// \brief Read table metadata from a JSON file in the test resources directory +void ReadTableMetadata(const std::string& file_name, + std::unique_ptr* metadata); -} // namespace iceberg +} // namespace iceberg::test From 9b1055724cbb5e70c55e61bc4c4438cef20a839c Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Thu, 29 May 2025 10:06:07 +0800 Subject: [PATCH 08/22] feat: static table metadata access support --- src/iceberg/table_scan.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/iceberg/table_scan.h b/src/iceberg/table_scan.h index 7cc28e91..a12862cd 100644 --- a/src/iceberg/table_scan.h +++ b/src/iceberg/table_scan.h @@ -22,6 +22,9 @@ namespace iceberg { /// \brief Represents a table scan operation -class ICEBERG_EXPORT TableScan{}; +class ICEBERG_EXPORT TableScan { + public: + virtual ~TableScan() = default; +}; } // namespace iceberg From f93319862138bc79136f444f0db2349ea77495d4 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Thu, 29 May 2025 20:36:10 +0800 Subject: [PATCH 09/22] feat: static table metadata access support --- src/iceberg/CMakeLists.txt | 2 +- src/iceberg/table.h | 110 +------------------- src/iceberg/{table.cc => table_impl.cc} | 10 +- src/iceberg/table_impl.h | 129 ++++++++++++++++++++++++ src/iceberg/table_scan.h | 30 ------ test/metadata_serde_test.cc | 12 ++- test/table_test.cc | 28 +++-- test/test_common.cc | 4 +- test/test_common.h | 4 +- 9 files changed, 168 insertions(+), 161 deletions(-) rename src/iceberg/{table.cc => table_impl.cc} (95%) create mode 100644 src/iceberg/table_impl.h delete mode 100644 src/iceberg/table_scan.h diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 7e9e3d7a..afa15a59 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -38,7 +38,7 @@ set(ICEBERG_SOURCES sort_field.cc sort_order.cc statistics_file.cc - table.cc + table_impl.cc table_metadata.cc transform.cc transform_function.cc diff --git a/src/iceberg/table.h b/src/iceberg/table.h index f0e2a969..8ec1eac7 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -19,17 +19,12 @@ #pragma once -#include -#include #include #include #include #include "iceberg/iceberg_export.h" -#include "iceberg/location_provider.h" #include "iceberg/result.h" -#include "iceberg/table_scan.h" -#include "iceberg/transaction.h" #include "iceberg/type_fwd.h" namespace iceberg { @@ -92,10 +87,11 @@ class ICEBERG_EXPORT Table { /// \return a vector of history entries virtual const std::vector>& history() const = 0; - /// \brief Create a new table scan for this table - /// - /// Once a table scan is created, it can be refined to project columns and filter data. - virtual Result> NewScan() const = 0; + // TODO(lishuxu): TableScan is not implemented yet, disable it for now. + // /// \brief Create a new table scan for this table + // /// + // /// Once a table scan is created, it can be refined to project columns and filter + // data. virtual Result> NewScan() const = 0; /// \brief Create a new append API to add files to this table and commit virtual Result> NewAppend() = 0; @@ -112,100 +108,4 @@ class ICEBERG_EXPORT Table { virtual Result> location_provider() const = 0; }; -/// \brief An abstract base implementation of the Iceberg Table interface. -/// -/// BaseTable provides common functionality for Iceberg table implementations, -/// including lazy initialization of schema, partition specs, sort orders, -/// and snapshot metadata. -/// -/// This class is not intended to be used directly by users, but serves as a foundation -/// for concrete implementations such as StaticTable or CatalogTable. -class ICEBERG_EXPORT BaseTable : public Table { - public: - ~BaseTable() override = default; - - BaseTable(std::string name, std::shared_ptr metadata); - - const std::string& name() const override { return name_; } - - const std::string& uuid() const override; - - Result> schema() const override; - - const std::unordered_map>& schemas() const override; - - Result> spec() const override; - - const std::unordered_map>& specs() - const override; - - Result> sort_order() const override; - - const std::unordered_map>& sort_orders() - const override; - - const std::unordered_map& properties() const override; - - const std::string& location() const override; - - Result> current_snapshot() const override; - - Result> snapshot(int64_t snapshot_id) const override; - - const std::vector>& snapshots() const override; - - const std::vector>& history() const override; - - private: - void InitSchema() const; - void InitPartitionSpec() const; - void InitSortOrder() const; - void InitSnapshot() const; - - const std::string name_; - - mutable std::shared_ptr schema_; - mutable std::unordered_map> schemas_map_; - - mutable std::shared_ptr partition_spec_; - mutable std::unordered_map> partition_spec_map_; - - mutable std::shared_ptr sort_order_; - mutable std::unordered_map> sort_orders_map_; - - mutable std::shared_ptr current_snapshot_; - mutable std::unordered_map> snapshots_map_; - - mutable std::vector> history_; - - std::shared_ptr metadata_; - - // once_flags - mutable std::once_flag init_schema_once_; - mutable std::once_flag init_partition_spec_once_; - mutable std::once_flag init_sort_order_once_; - mutable std::once_flag init_snapshot_once_; -}; - -/// \brief A read-only implementation of an Iceberg table. -/// -/// StaticTable represents a snapshot of a table that does not support mutation. -class ICEBERG_EXPORT StaticTable : public BaseTable { - public: - ~StaticTable() override = default; - - StaticTable(std::string name, std::shared_ptr metadata) - : BaseTable(std::move(name), std::move(metadata)) {} - - Status Refresh() override; - - Result> NewScan() const override; - - Result> NewAppend() override; - - Result> NewTransaction() override; - - Result> location_provider() const override; -}; - } // namespace iceberg diff --git a/src/iceberg/table.cc b/src/iceberg/table_impl.cc similarity index 95% rename from src/iceberg/table.cc rename to src/iceberg/table_impl.cc index 6fd463a7..a9309419 100644 --- a/src/iceberg/table.cc +++ b/src/iceberg/table_impl.cc @@ -17,13 +17,9 @@ * under the License. */ -#include "iceberg/table.h" +#include "iceberg/table_impl.h" -#include - -#include "iceberg/exception.h" #include "iceberg/partition_spec.h" -#include "iceberg/result.h" #include "iceberg/schema.h" #include "iceberg/snapshot.h" #include "iceberg/sort_order.h" @@ -160,10 +156,6 @@ Status StaticTable::Refresh() { return NotSupported("Refresh is not supported for StaticTable"); } -Result> StaticTable::NewScan() const { - return NotSupported("NewScan is not supported for StaticTable"); -} - Result> StaticTable::NewAppend() { return NotSupported("NewAppend is not supported for StaticTable"); } diff --git a/src/iceberg/table_impl.h b/src/iceberg/table_impl.h new file mode 100644 index 00000000..24a07c62 --- /dev/null +++ b/src/iceberg/table_impl.h @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/location_provider.h" +#include "iceberg/result.h" +#include "iceberg/table.h" +#include "iceberg/transaction.h" +#include "iceberg/type_fwd.h" + +namespace iceberg { + +/// \brief An abstract base implementation of the Iceberg Table interface. +/// +/// BaseTable provides common functionality for Iceberg table implementations, +/// including lazy initialization of schema, partition specs, sort orders, +/// and snapshot metadata. +/// +/// This class is not intended to be used directly by users, but serves as a foundation +/// for concrete implementations such as StaticTable or CatalogTable. +class ICEBERG_EXPORT BaseTable : public Table { + public: + ~BaseTable() override = default; + + BaseTable(std::string name, std::shared_ptr metadata); + + const std::string& name() const override { return name_; } + + const std::string& uuid() const override; + + Result> schema() const override; + + const std::unordered_map>& schemas() const override; + + Result> spec() const override; + + const std::unordered_map>& specs() + const override; + + Result> sort_order() const override; + + const std::unordered_map>& sort_orders() + const override; + + const std::unordered_map& properties() const override; + + const std::string& location() const override; + + Result> current_snapshot() const override; + + Result> snapshot(int64_t snapshot_id) const override; + + const std::vector>& snapshots() const override; + + const std::vector>& history() const override; + + private: + void InitSchema() const; + void InitPartitionSpec() const; + void InitSortOrder() const; + void InitSnapshot() const; + + const std::string name_; + + mutable std::shared_ptr schema_; + mutable std::unordered_map> schemas_map_; + + mutable std::shared_ptr partition_spec_; + mutable std::unordered_map> partition_spec_map_; + + mutable std::shared_ptr sort_order_; + mutable std::unordered_map> sort_orders_map_; + + mutable std::shared_ptr current_snapshot_; + mutable std::unordered_map> snapshots_map_; + + mutable std::vector> history_; + + std::shared_ptr metadata_; + + // once_flags + mutable std::once_flag init_schema_once_; + mutable std::once_flag init_partition_spec_once_; + mutable std::once_flag init_sort_order_once_; + mutable std::once_flag init_snapshot_once_; +}; + +/// \brief A read-only implementation of an Iceberg table. +/// +/// StaticTable represents a snapshot of a table that does not support mutation. +class ICEBERG_EXPORT StaticTable : public BaseTable { + public: + ~StaticTable() override = default; + + StaticTable(std::string name, std::shared_ptr metadata) + : BaseTable(std::move(name), std::move(metadata)) {} + + Status Refresh() override; + + Result> NewAppend() override; + + Result> NewTransaction() override; + + Result> location_provider() const override; +}; + +} // namespace iceberg diff --git a/src/iceberg/table_scan.h b/src/iceberg/table_scan.h deleted file mode 100644 index a12862cd..00000000 --- a/src/iceberg/table_scan.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#pragma once - -namespace iceberg { - -/// \brief Represents a table scan operation -class ICEBERG_EXPORT TableScan { - public: - virtual ~TableScan() = default; -}; - -} // namespace iceberg diff --git a/test/metadata_serde_test.cc b/test/metadata_serde_test.cc index b6f1d050..4c78f652 100644 --- a/test/metadata_serde_test.cc +++ b/test/metadata_serde_test.cc @@ -50,8 +50,7 @@ class MetadataSerdeTest : public ::testing::Test { TEST_F(MetadataSerdeTest, DeserializeV1Valid) { std::unique_ptr metadata; - ASSERT_NO_FATAL_FAILURE( - test::ReadTableMetadata("TableMetadataV1Valid.json", &metadata)); + ASSERT_NO_FATAL_FAILURE(ReadTableMetadata("TableMetadataV1Valid.json", &metadata)); EXPECT_EQ(metadata->format_version, 1); EXPECT_EQ(metadata->table_uuid, "d20125c8-7284-442c-9aea-15fee620737c"); @@ -84,12 +83,13 @@ TEST_F(MetadataSerdeTest, DeserializeV1Valid) { auto partition_spec = metadata->PartitionSpec(); ASSERT_TRUE(partition_spec.has_value()); EXPECT_EQ(*(partition_spec.value().get()), *expected_spec); + auto snapshot = metadata->Snapshot(); + ASSERT_FALSE(snapshot.has_value()); } TEST_F(MetadataSerdeTest, DeserializeV2Valid) { std::unique_ptr metadata; - ASSERT_NO_FATAL_FAILURE( - test::ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); + ASSERT_NO_FATAL_FAILURE(ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); EXPECT_EQ(metadata->format_version, 2); EXPECT_EQ(metadata->table_uuid, "9c12d441-03fe-4693-9a96-a0705ddf69c1"); @@ -136,7 +136,11 @@ TEST_F(MetadataSerdeTest, DeserializeV2Valid) { ASSERT_TRUE(sort_order.has_value()); EXPECT_EQ(*(sort_order.value().get()), *expected_sort_order); + // Compare snapshot EXPECT_EQ(metadata->current_snapshot_id, 3055729675574597004); + auto snapshot = metadata->Snapshot(); + ASSERT_TRUE(snapshot.has_value()); + EXPECT_EQ(snapshot.value()->snapshot_id, 3055729675574597004); // Compare snapshots std::vector expected_snapshots{ diff --git a/test/table_test.cc b/test/table_test.cc index 161402d0..35d52325 100644 --- a/test/table_test.cc +++ b/test/table_test.cc @@ -17,8 +17,6 @@ * under the License. */ -#include "iceberg/table.h" - #include #include #include @@ -31,71 +29,85 @@ #include "iceberg/partition_spec.h" #include "iceberg/schema.h" #include "iceberg/snapshot.h" +#include "iceberg/table_impl.h" #include "iceberg/table_metadata.h" #include "test_common.h" namespace iceberg { -TEST(TableTest, TableSchemaV1Test) { +TEST(StaticTable, TableV1) { std::unique_ptr metadata; - ASSERT_NO_FATAL_FAILURE( - test::ReadTableMetadata("TableMetadataV1Valid.json", &metadata)); + ASSERT_NO_FATAL_FAILURE(ReadTableMetadata("TableMetadataV1Valid.json", &metadata)); StaticTable table("test_table_v1", std::move(metadata)); ASSERT_EQ(table.name(), "test_table_v1"); + + // Check table schema auto schema = table.schema(); ASSERT_TRUE(schema.has_value()); ASSERT_EQ(schema.value()->fields().size(), 3); auto schemas = table.schemas(); ASSERT_TRUE(schemas.empty()); + // Check table spec auto spec = table.spec(); ASSERT_TRUE(spec.has_value()); auto specs = table.specs(); ASSERT_EQ(1UL, specs.size()); + // Check table sort_order auto sort_order = table.sort_order(); ASSERT_TRUE(sort_order.has_value()); auto sort_orders = table.sort_orders(); ASSERT_EQ(1UL, sort_orders.size()); + // Check table location auto location = table.location(); ASSERT_EQ(location, "s3://bucket/test/location"); + // Check table snapshots auto snapshots = table.snapshots(); ASSERT_TRUE(snapshots.empty()); } -TEST(TableTest, TableSchemaV2Test) { +TEST(StaticTable, TableV2) { std::unique_ptr metadata; - ASSERT_NO_FATAL_FAILURE( - test::ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); + ASSERT_NO_FATAL_FAILURE(ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); StaticTable table("test_table_v2", std::move(metadata)); ASSERT_EQ(table.name(), "test_table_v2"); + + // Check table schema auto schema = table.schema(); ASSERT_TRUE(schema.has_value()); ASSERT_EQ(schema.value()->fields().size(), 3); auto schemas = table.schemas(); ASSERT_FALSE(schemas.empty()); + // Check partition spec auto spec = table.spec(); ASSERT_TRUE(spec.has_value()); auto specs = table.specs(); ASSERT_EQ(1UL, specs.size()); + // Check sort order auto sort_order = table.sort_order(); ASSERT_TRUE(sort_order.has_value()); auto sort_orders = table.sort_orders(); ASSERT_EQ(1UL, sort_orders.size()); + // Check table location auto location = table.location(); ASSERT_EQ(location, "s3://bucket/test/location"); + // Check snapshot auto snapshots = table.snapshots(); ASSERT_EQ(2UL, snapshots.size()); auto snapshot = table.current_snapshot(); ASSERT_TRUE(snapshot != nullptr); + auto invalid_snapshot_id = 9999; + snapshot = table.snapshot(invalid_snapshot_id); + ASSERT_FALSE(snapshot.has_value()); } } // namespace iceberg diff --git a/test/test_common.cc b/test/test_common.cc index e03dbf3a..25fa8f2c 100644 --- a/test/test_common.cc +++ b/test/test_common.cc @@ -30,7 +30,7 @@ #include "iceberg/json_internal.h" #include "iceberg/test/test_config.h" -namespace iceberg::test { +namespace iceberg { std::string GetResourcePath(const std::string& file_name) { return std::string(ICEBERG_TEST_RESOURCES) + "/" + file_name; @@ -58,4 +58,4 @@ void ReadTableMetadata(const std::string& file_name, *metadata = std::move(result.value()); } -} // namespace iceberg::test +} // namespace iceberg diff --git a/test/test_common.h b/test/test_common.h index 832538ba..a9dba8ca 100644 --- a/test/test_common.h +++ b/test/test_common.h @@ -24,7 +24,7 @@ #include "iceberg/type_fwd.h" -namespace iceberg::test { +namespace iceberg { /// \brief Get the full path to a resource file in the test resources directory std::string GetResourcePath(const std::string& file_name); @@ -36,4 +36,4 @@ void ReadJsonFile(const std::string& file_name, std::string* content); void ReadTableMetadata(const std::string& file_name, std::unique_ptr* metadata); -} // namespace iceberg::test +} // namespace iceberg From c9fe0acd6cc25ddd351b79c135382fb92b88a1d2 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Tue, 3 Jun 2025 09:57:30 +0800 Subject: [PATCH 10/22] feat: static table metadata access support --- src/iceberg/table_impl.cc | 18 +++--------------- src/iceberg/table_impl.h | 1 - src/iceberg/table_metadata.cc | 21 ++++++++++++++++----- src/iceberg/table_metadata.h | 4 ++++ 4 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/iceberg/table_impl.cc b/src/iceberg/table_impl.cc index a9309419..c68f0f60 100644 --- a/src/iceberg/table_impl.cc +++ b/src/iceberg/table_impl.cc @@ -19,6 +19,7 @@ #include "iceberg/table_impl.h" +#include "iceberg/exception.h" #include "iceberg/partition_spec.h" #include "iceberg/schema.h" #include "iceberg/snapshot.h" @@ -30,7 +31,7 @@ namespace iceberg { BaseTable::BaseTable(std::string name, std::shared_ptr metadata) : name_(std::move(name)), metadata_(std::move(metadata)) { if (!metadata_) { - throw std::invalid_argument("Table metadata cannot be null"); + throw IcebergError("Table metadata cannot be null"); } } @@ -38,15 +39,8 @@ void BaseTable::InitSchema() const { for (const auto& schema : metadata_->schemas) { if (schema->schema_id()) { schemas_map_.emplace(schema->schema_id().value(), schema); - if (schema->schema_id().value() == metadata_->current_schema_id) { - schema_ = schema; - } } } - // compatible with V1 table schema - if (!schema_ && metadata_->schemas.size() == 1UL) { - schema_ = metadata_->schemas.front(); - } } void BaseTable::InitPartitionSpec() const { @@ -79,13 +73,7 @@ void BaseTable::InitSnapshot() const { const std::string& BaseTable::uuid() const { return metadata_->table_uuid; } -Result> BaseTable::schema() const { - std::call_once(init_schema_once_, [this]() { InitSchema(); }); - if (!schema_) { - return NotFound("Current schema is not defined for this table"); - } - return schema_; -} +Result> BaseTable::schema() const { return metadata_->Schema(); } const std::unordered_map>& BaseTable::schemas() const { std::call_once(init_schema_once_, [this]() { InitSchema(); }); diff --git a/src/iceberg/table_impl.h b/src/iceberg/table_impl.h index 24a07c62..a9509f69 100644 --- a/src/iceberg/table_impl.h +++ b/src/iceberg/table_impl.h @@ -84,7 +84,6 @@ class ICEBERG_EXPORT BaseTable : public Table { const std::string name_; - mutable std::shared_ptr schema_; mutable std::unordered_map> schemas_map_; mutable std::shared_ptr partition_spec_; diff --git a/src/iceberg/table_metadata.cc b/src/iceberg/table_metadata.cc index 9713364a..17e24ad9 100644 --- a/src/iceberg/table_metadata.cc +++ b/src/iceberg/table_metadata.cc @@ -47,13 +47,24 @@ std::string ToString(const MetadataLogEntry& entry) { } Result> TableMetadata::Schema() const { - auto iter = std::ranges::find_if(schemas, [this](const auto& schema) { - return schema->schema_id() == current_schema_id; + std::call_once(init_schema_once, [this]() { + auto iter = std::ranges::find_if(schemas, [this](const auto& schema) { + return schema->schema_id() == current_schema_id; + }); + if (iter != schemas.end()) { + schema = *iter; + } + + // compatible with V1 table schema + if (!schema && schemas.size() == 1UL) { + schema = schemas.front(); + } }); - if (iter == schemas.end()) { - return NotFound("Current schema is not found"); + + if (!schema) { + return NotFound("Current schema is not defined for this table"); } - return *iter; + return schema; } Result> TableMetadata::PartitionSpec() const { diff --git a/src/iceberg/table_metadata.h b/src/iceberg/table_metadata.h index c40e5ff8..aa5958b0 100644 --- a/src/iceberg/table_metadata.h +++ b/src/iceberg/table_metadata.h @@ -92,6 +92,8 @@ struct ICEBERG_EXPORT TableMetadata { TimePointMs last_updated_ms; /// The highest assigned column ID for the table int32_t last_column_id; + /// The current schema for the table, or null if not set + mutable std::shared_ptr schema; /// A list of schemas std::vector> schemas; /// ID of the table's current schema @@ -127,6 +129,8 @@ struct ICEBERG_EXPORT TableMetadata { /// A `long` higher than all assigned row IDs int64_t next_row_id; + mutable std::once_flag init_schema_once; + /// \brief Get the current schema, return NotFoundError if not found Result> Schema() const; /// \brief Get the current partition spec, return NotFoundError if not found From a09cb8b2ae96dc3e879e25b0c9376fc6a0286934 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Tue, 3 Jun 2025 10:19:40 +0800 Subject: [PATCH 11/22] feat: static table metadata access support --- src/iceberg/table_metadata.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/iceberg/table_metadata.h b/src/iceberg/table_metadata.h index aa5958b0..c2eeab07 100644 --- a/src/iceberg/table_metadata.h +++ b/src/iceberg/table_metadata.h @@ -23,6 +23,7 @@ /// Table metadata for Iceberg tables. #include +#include #include #include #include @@ -129,6 +130,7 @@ struct ICEBERG_EXPORT TableMetadata { /// A `long` higher than all assigned row IDs int64_t next_row_id; + /// \brief Used for lazy initialization of schema mutable std::once_flag init_schema_once; /// \brief Get the current schema, return NotFoundError if not found From 401a0194e65bdea1f46ea0df1b564bd05744b7cc Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Tue, 3 Jun 2025 10:50:08 +0800 Subject: [PATCH 12/22] feat: static table metadata access support --- src/iceberg/table.h | 1 + src/iceberg/table_impl.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 8ec1eac7..3d26c100 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -19,6 +19,7 @@ #pragma once +#include #include #include #include diff --git a/src/iceberg/table_impl.h b/src/iceberg/table_impl.h index a9509f69..8f7e8773 100644 --- a/src/iceberg/table_impl.h +++ b/src/iceberg/table_impl.h @@ -19,6 +19,7 @@ #pragma once +#include #include #include #include From 345355ab4046a3cc0b8d5935d7dc0ae8e3157b08 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Tue, 3 Jun 2025 11:39:27 +0800 Subject: [PATCH 13/22] feat: static table metadata access support --- test/metadata_io_test.cc | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/test/metadata_io_test.cc b/test/metadata_io_test.cc index 7d987e25..432101b0 100644 --- a/test/metadata_io_test.cc +++ b/test/metadata_io_test.cc @@ -50,27 +50,25 @@ class MetadataIOTest : public TempFileTestBase { /*optional=*/false); auto schema = std::make_shared(std::move(schema_fields), /*schema_id=*/1); - TableMetadata metadata{ - .format_version = 1, - .table_uuid = "1234567890", - .location = "s3://bucket/path", - .last_sequence_number = 0, - .schemas = {schema}, - .current_schema_id = 1, - .default_spec_id = 0, - .last_partition_id = 0, - .properties = {{"key", "value"}}, - .current_snapshot_id = 3051729675574597004, - .snapshots = {std::make_shared(Snapshot{ - .snapshot_id = 3051729675574597004, - .sequence_number = 0, - .timestamp_ms = TimePointMsFromUnixMs(1515100955770).value(), - .manifest_list = "s3://a/b/1.avro", - .summary = {{"operation", "append"}}, - })}, - .default_sort_order_id = 0, - .next_row_id = 0}; - return metadata; + return TableMetadata{.format_version = 1, + .table_uuid = "1234567890", + .location = "s3://bucket/path", + .last_sequence_number = 0, + .schemas = {schema}, + .current_schema_id = 1, + .default_spec_id = 0, + .last_partition_id = 0, + .properties = {{"key", "value"}}, + .current_snapshot_id = 3051729675574597004, + .snapshots = {std::make_shared(Snapshot{ + .snapshot_id = 3051729675574597004, + .sequence_number = 0, + .timestamp_ms = TimePointMsFromUnixMs(1515100955770).value(), + .manifest_list = "s3://a/b/1.avro", + .summary = {{"operation", "append"}}, + })}, + .default_sort_order_id = 0, + .next_row_id = 0}; } std::shared_ptr io_; From 92577260feac6e00efed2997414a68b3ef57d0e9 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Wed, 18 Jun 2025 22:53:27 +0800 Subject: [PATCH 14/22] feat: metadata access support for table --- src/iceberg/CMakeLists.txt | 2 +- src/iceberg/snapshot.h | 16 ++++ src/iceberg/table.cc | 130 ++++++++++++++++++++++++++++ src/iceberg/table.h | 101 +++++++++++++--------- src/iceberg/table_impl.cc | 159 ----------------------------------- src/iceberg/table_impl.h | 129 ---------------------------- src/iceberg/table_metadata.h | 17 +--- src/iceberg/type_fwd.h | 4 +- test/table_test.cc | 40 ++++----- 9 files changed, 233 insertions(+), 365 deletions(-) create mode 100644 src/iceberg/table.cc delete mode 100644 src/iceberg/table_impl.cc delete mode 100644 src/iceberg/table_impl.h diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index c62f6d67..38fd28d1 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -38,7 +38,7 @@ set(ICEBERG_SOURCES sort_field.cc sort_order.cc statistics_file.cc - table_impl.cc + table.cc table_metadata.cc transform.cc transform_function.cc diff --git a/src/iceberg/snapshot.h b/src/iceberg/snapshot.h index 2df6a44d..96f8a36c 100644 --- a/src/iceberg/snapshot.h +++ b/src/iceberg/snapshot.h @@ -273,4 +273,20 @@ struct ICEBERG_EXPORT Snapshot { bool Equals(const Snapshot& other) const; }; +/// \brief Represents a snapshot log entry +struct ICEBERG_EXPORT SnapshotLogEntry { + /// The timestamp in milliseconds of the change + TimePointMs timestamp_ms; + /// ID of the snapshot + int64_t snapshot_id; + + friend bool operator==(const SnapshotLogEntry& lhs, const SnapshotLogEntry& rhs) { + return lhs.timestamp_ms == rhs.timestamp_ms && lhs.snapshot_id == rhs.snapshot_id; + } + + friend bool operator!=(const SnapshotLogEntry& lhs, const SnapshotLogEntry& rhs) { + return !(lhs == rhs); + } +}; + } // namespace iceberg diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc new file mode 100644 index 00000000..e2f3bd5c --- /dev/null +++ b/src/iceberg/table.cc @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/table.h" + +#include "iceberg/partition_spec.h" +#include "iceberg/schema.h" +#include "iceberg/sort_order.h" +#include "iceberg/table_metadata.h" + +namespace iceberg { + +const std::string& Table::uuid() const { return metadata_->table_uuid; } + +const std::shared_ptr& Table::schema() const { + if (!schema_) { + const static std::shared_ptr kEmptySchema = + std::make_shared(std::vector{}); + auto schema = metadata_->Schema(); + if (schema.has_value()) { + schema_ = schema.value(); + } else { + schema_ = kEmptySchema; + } + } + return schema_; +} + +const std::unordered_map>& Table::schemas() const { + std::call_once(init_schemas_once_, [this]() { + for (const auto& schema : metadata_->schemas) { + if (schema->schema_id()) { + schemas_map_.emplace(schema->schema_id().value(), schema); + } + } + }); + return schemas_map_; +} + +const std::shared_ptr& Table::spec() const { + std::call_once(init_partition_spec_once_, [this]() { + auto partition_spec = metadata_->PartitionSpec(); + if (partition_spec.has_value()) { + partition_spec_ = partition_spec.value(); + } + }); + return partition_spec_; +} + +const std::unordered_map>& Table::specs() const { + std::call_once(init_partition_specs_once_, [this]() { + for (const auto& spec : metadata_->partition_specs) { + partition_spec_map_[spec->spec_id()] = spec; + } + }); + return partition_spec_map_; +} + +const std::shared_ptr& Table::sort_order() const { + std::call_once(init_sort_order_once_, [this]() { + auto sort_order = metadata_->SortOrder(); + if (sort_order.has_value()) { + sort_order_ = sort_order.value(); + } + }); + return sort_order_; +} + +const std::unordered_map>& Table::sort_orders() + const { + std::call_once(init_sort_orders_once_, [this]() { + for (const auto& order : metadata_->sort_orders) { + sort_orders_map_[order->order_id()] = order; + } + }); + return sort_orders_map_; +} + +const std::unordered_map& Table::properties() const { + return metadata_->properties; +} + +const std::string& Table::location() const { return metadata_->location; } + +std::shared_ptr Table::CurrentSnapshot() const { + std::call_once(init_snapshot_once_, [this]() { + auto snapshot = metadata_->Snapshot(); + if (snapshot.has_value()) { + current_snapshot_ = snapshot.value(); + } + }); + return current_snapshot_; +} + +std::shared_ptr Table::SnapshotById(int64_t snapshot_id) const { + auto iter = std::ranges::find_if(metadata_->snapshots, + [this, &snapshot_id](const auto& snapshot) { + return snapshot->snapshot_id == snapshot_id; + }); + if (iter == metadata_->snapshots.end()) { + return nullptr; + } + return *iter; +} + +const std::vector>& Table::snapshots() const { + return metadata_->snapshots; +} + +const std::vector& Table::history() const { + return metadata_->snapshot_log; +} + +} // namespace iceberg diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 3d26c100..91da5576 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -19,13 +19,16 @@ #pragma once -#include #include #include #include #include "iceberg/iceberg_export.h" +#include "iceberg/location_provider.h" #include "iceberg/result.h" +#include "iceberg/snapshot.h" +#include "iceberg/table_identifier.h" +#include "iceberg/transaction.h" #include "iceberg/type_fwd.h" namespace iceberg { @@ -35,78 +38,96 @@ class ICEBERG_EXPORT Table { public: virtual ~Table() = default; - /// \brief Return the full name for this table - virtual const std::string& name() const = 0; + /// \brief Construct a table. + /// \param[in] identifier The identifier of the table. + /// \param[in] metadata The metadata for the table. + /// \param[in] metadata_location The location of the table metadata file. + /// \param[in] io The FileIO to read and write table data and metadata files. + /// \param[in] catalog The catalog that this table belongs to. + Table(TableIdentifier identifier, std::shared_ptr metadata, + std::string metadata_location, std::shared_ptr io, + std::shared_ptr catalog) + : identifier_(std::move(identifier)), + metadata_(std::move(metadata)), + metadata_location_(std::move(metadata_location)), + io_(std::move(io)), + catalog_(std::move(catalog)) {}; + + /// \brief Return the identifier of this table + const TableIdentifier& name() const { return identifier_; } /// \brief Returns the UUID of the table - virtual const std::string& uuid() const = 0; + const std::string& uuid() const; - /// \brief Refresh the current table metadata - virtual Status Refresh() = 0; - - /// \brief Return the schema for this table, return NotFoundError if not found - virtual Result> schema() const = 0; + /// \brief Return the schema for this table, return empty schema if not found + const std::shared_ptr& schema() const; /// \brief Return a map of schema for this table - virtual const std::unordered_map>& schemas() const = 0; + const std::unordered_map>& schemas() const; - /// \brief Return the partition spec for this table, return NotFoundError if not found - virtual Result> spec() const = 0; + /// \brief Return the partition spec for this table, return null if default spec is not + /// found + const std::shared_ptr& spec() const; /// \brief Return a map of partition specs for this table - virtual const std::unordered_map>& specs() - const = 0; + const std::unordered_map>& specs() const; - /// \brief Return the sort order for this table, return NotFoundError if not found - virtual Result> sort_order() const = 0; + /// \brief Return the sort order for this table, return null if default sort order is + /// not found + const std::shared_ptr& sort_order() const; /// \brief Return a map of sort order IDs to sort orders for this table - virtual const std::unordered_map>& sort_orders() - const = 0; + const std::unordered_map>& sort_orders() const; /// \brief Return a map of string properties for this table - virtual const std::unordered_map& properties() const = 0; + const std::unordered_map& properties() const; /// \brief Return the table's base location - virtual const std::string& location() const = 0; + const std::string& location() const; - /// \brief Return the table's current snapshot, or NotFoundError if not found - virtual Result> current_snapshot() const = 0; + /// \brief Return the table's current snapshot, return null if not found + std::shared_ptr CurrentSnapshot() const; /// \brief Get the snapshot of this table with the given id, or null if there is no /// matching snapshot /// /// \param snapshot_id the ID of the snapshot to get /// \return the Snapshot with the given id - virtual Result> snapshot(int64_t snapshot_id) const = 0; + std::shared_ptr SnapshotById(int64_t snapshot_id) const; /// \brief Get the snapshots of this table - virtual const std::vector>& snapshots() const = 0; + const std::vector>& snapshots() const; /// \brief Get the snapshot history of this table /// /// \return a vector of history entries - virtual const std::vector>& history() const = 0; + const std::vector& history() const; + + private: + const TableIdentifier identifier_; + const std::shared_ptr metadata_; + const std::string metadata_location_; + std::shared_ptr io_; + std::shared_ptr catalog_; - // TODO(lishuxu): TableScan is not implemented yet, disable it for now. - // /// \brief Create a new table scan for this table - // /// - // /// Once a table scan is created, it can be refined to project columns and filter - // data. virtual Result> NewScan() const = 0; + mutable std::shared_ptr schema_; + mutable std::unordered_map> schemas_map_; - /// \brief Create a new append API to add files to this table and commit - virtual Result> NewAppend() = 0; + mutable std::shared_ptr partition_spec_; + mutable std::unordered_map> partition_spec_map_; - /// \brief Create a new transaction API to commit multiple table operations at once - virtual Result> NewTransaction() = 0; + mutable std::shared_ptr sort_order_; + mutable std::unordered_map> sort_orders_map_; - /// TODO(wgtmac): design of FileIO is not finalized yet. We intend to use an - /// IO-less design in the core library. - // /// \brief Returns a FileIO to read and write table data and metadata files - // virtual std::shared_ptr io() const = 0; + mutable std::shared_ptr current_snapshot_; - /// \brief Returns a LocationProvider to provide locations for new data files - virtual Result> location_provider() const = 0; + // once_flags + mutable std::once_flag init_schemas_once_; + mutable std::once_flag init_partition_spec_once_; + mutable std::once_flag init_partition_specs_once_; + mutable std::once_flag init_sort_order_once_; + mutable std::once_flag init_sort_orders_once_; + mutable std::once_flag init_snapshot_once_; }; } // namespace iceberg diff --git a/src/iceberg/table_impl.cc b/src/iceberg/table_impl.cc deleted file mode 100644 index c68f0f60..00000000 --- a/src/iceberg/table_impl.cc +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#include "iceberg/table_impl.h" - -#include "iceberg/exception.h" -#include "iceberg/partition_spec.h" -#include "iceberg/schema.h" -#include "iceberg/snapshot.h" -#include "iceberg/sort_order.h" -#include "iceberg/table_metadata.h" - -namespace iceberg { - -BaseTable::BaseTable(std::string name, std::shared_ptr metadata) - : name_(std::move(name)), metadata_(std::move(metadata)) { - if (!metadata_) { - throw IcebergError("Table metadata cannot be null"); - } -} - -void BaseTable::InitSchema() const { - for (const auto& schema : metadata_->schemas) { - if (schema->schema_id()) { - schemas_map_.emplace(schema->schema_id().value(), schema); - } - } -} - -void BaseTable::InitPartitionSpec() const { - for (const auto& spec : metadata_->partition_specs) { - partition_spec_map_[spec->spec_id()] = spec; - if (spec->spec_id() == metadata_->default_spec_id) { - partition_spec_ = spec; - } - } -} - -void BaseTable::InitSortOrder() const { - for (const auto& order : metadata_->sort_orders) { - sort_orders_map_[order->order_id()] = order; - if (order->order_id() == metadata_->default_sort_order_id) { - sort_order_ = order; - } - } -} - -void BaseTable::InitSnapshot() const { - auto snapshots = metadata_->snapshots; - for (const auto& snapshot : snapshots) { - if (snapshot->snapshot_id == metadata_->current_snapshot_id) { - current_snapshot_ = snapshot; - } - snapshots_map_[snapshot->snapshot_id] = snapshot; - } -} - -const std::string& BaseTable::uuid() const { return metadata_->table_uuid; } - -Result> BaseTable::schema() const { return metadata_->Schema(); } - -const std::unordered_map>& BaseTable::schemas() const { - std::call_once(init_schema_once_, [this]() { InitSchema(); }); - return schemas_map_; -} - -Result> BaseTable::spec() const { - std::call_once(init_partition_spec_once_, [this]() { InitPartitionSpec(); }); - if (!partition_spec_) { - return NotFound("Default partition spec is not defined for this table"); - } - return partition_spec_; -} - -const std::unordered_map>& BaseTable::specs() - const { - std::call_once(init_partition_spec_once_, [this]() { InitPartitionSpec(); }); - return partition_spec_map_; -} - -Result> BaseTable::sort_order() const { - std::call_once(init_sort_order_once_, [this]() { InitSortOrder(); }); - if (!sort_order_) { - return NotFound("Default sort order is not defined for this table"); - } - return sort_order_; -} - -const std::unordered_map>& BaseTable::sort_orders() - const { - std::call_once(init_sort_order_once_, [this]() { InitSortOrder(); }); - return sort_orders_map_; -} - -const std::unordered_map& BaseTable::properties() const { - return metadata_->properties; -} - -const std::string& BaseTable::location() const { return metadata_->location; } - -Result> BaseTable::current_snapshot() const { - std::call_once(init_snapshot_once_, [this]() { InitSnapshot(); }); - if (!current_snapshot_) { - return NotFound("Current snapshot is not defined for this table"); - } - return current_snapshot_; -} - -Result> BaseTable::snapshot(int64_t snapshot_id) const { - std::call_once(init_snapshot_once_, [this]() { InitSnapshot(); }); - auto iter = snapshots_map_.find(snapshot_id); - if (iter == snapshots_map_.end()) { - return NotFound("Snapshot with ID {} not found", snapshot_id); - } - return iter->second; -} - -const std::vector>& BaseTable::snapshots() const { - return metadata_->snapshots; -} - -const std::vector>& BaseTable::history() const { - // TODO(lishuxu): Implement history retrieval - return history_; -} - -Status StaticTable::Refresh() { - return NotSupported("Refresh is not supported for StaticTable"); -} - -Result> StaticTable::NewAppend() { - return NotSupported("NewAppend is not supported for StaticTable"); -} - -Result> StaticTable::NewTransaction() { - return NotSupported("NewTransaction is not supported for StaticTable"); -} - -Result> StaticTable::location_provider() const { - return NotSupported("location_provider is not supported for StaticTable"); -} - -} // namespace iceberg diff --git a/src/iceberg/table_impl.h b/src/iceberg/table_impl.h deleted file mode 100644 index 8f7e8773..00000000 --- a/src/iceberg/table_impl.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#pragma once - -#include -#include -#include -#include - -#include "iceberg/iceberg_export.h" -#include "iceberg/location_provider.h" -#include "iceberg/result.h" -#include "iceberg/table.h" -#include "iceberg/transaction.h" -#include "iceberg/type_fwd.h" - -namespace iceberg { - -/// \brief An abstract base implementation of the Iceberg Table interface. -/// -/// BaseTable provides common functionality for Iceberg table implementations, -/// including lazy initialization of schema, partition specs, sort orders, -/// and snapshot metadata. -/// -/// This class is not intended to be used directly by users, but serves as a foundation -/// for concrete implementations such as StaticTable or CatalogTable. -class ICEBERG_EXPORT BaseTable : public Table { - public: - ~BaseTable() override = default; - - BaseTable(std::string name, std::shared_ptr metadata); - - const std::string& name() const override { return name_; } - - const std::string& uuid() const override; - - Result> schema() const override; - - const std::unordered_map>& schemas() const override; - - Result> spec() const override; - - const std::unordered_map>& specs() - const override; - - Result> sort_order() const override; - - const std::unordered_map>& sort_orders() - const override; - - const std::unordered_map& properties() const override; - - const std::string& location() const override; - - Result> current_snapshot() const override; - - Result> snapshot(int64_t snapshot_id) const override; - - const std::vector>& snapshots() const override; - - const std::vector>& history() const override; - - private: - void InitSchema() const; - void InitPartitionSpec() const; - void InitSortOrder() const; - void InitSnapshot() const; - - const std::string name_; - - mutable std::unordered_map> schemas_map_; - - mutable std::shared_ptr partition_spec_; - mutable std::unordered_map> partition_spec_map_; - - mutable std::shared_ptr sort_order_; - mutable std::unordered_map> sort_orders_map_; - - mutable std::shared_ptr current_snapshot_; - mutable std::unordered_map> snapshots_map_; - - mutable std::vector> history_; - - std::shared_ptr metadata_; - - // once_flags - mutable std::once_flag init_schema_once_; - mutable std::once_flag init_partition_spec_once_; - mutable std::once_flag init_sort_order_once_; - mutable std::once_flag init_snapshot_once_; -}; - -/// \brief A read-only implementation of an Iceberg table. -/// -/// StaticTable represents a snapshot of a table that does not support mutation. -class ICEBERG_EXPORT StaticTable : public BaseTable { - public: - ~StaticTable() override = default; - - StaticTable(std::string name, std::shared_ptr metadata) - : BaseTable(std::move(name), std::move(metadata)) {} - - Status Refresh() override; - - Result> NewAppend() override; - - Result> NewTransaction() override; - - Result> location_provider() const override; -}; - -} // namespace iceberg diff --git a/src/iceberg/table_metadata.h b/src/iceberg/table_metadata.h index ddaff809..3b143b45 100644 --- a/src/iceberg/table_metadata.h +++ b/src/iceberg/table_metadata.h @@ -30,27 +30,12 @@ #include #include "iceberg/iceberg_export.h" +#include "iceberg/snapshot.h" #include "iceberg/type_fwd.h" #include "iceberg/util/timepoint.h" namespace iceberg { -/// \brief Represents a snapshot log entry -struct ICEBERG_EXPORT SnapshotLogEntry { - /// The timestamp in milliseconds of the change - TimePointMs timestamp_ms; - /// ID of the snapshot - int64_t snapshot_id; - - friend bool operator==(const SnapshotLogEntry& lhs, const SnapshotLogEntry& rhs) { - return lhs.timestamp_ms == rhs.timestamp_ms && lhs.snapshot_id == rhs.snapshot_id; - } - - friend bool operator!=(const SnapshotLogEntry& lhs, const SnapshotLogEntry& rhs) { - return !(lhs == rhs); - } -}; - /// \brief Represents a metadata log entry struct ICEBERG_EXPORT MetadataLogEntry { /// The timestamp in milliseconds of the change diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index a5996c42..adbf25bf 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -99,6 +99,9 @@ class TransformFunction; struct PartitionStatisticsFile; struct Snapshot; struct SnapshotRef; +struct SnapshotLogEntry; +struct MetadataLogEntry; + struct StatisticsFile; struct TableMetadata; @@ -113,7 +116,6 @@ enum class TransformType; /// TODO: Forward declarations below are not added yet. /// ---------------------------------------------------------------------------- -class HistoryEntry; class StructLike; class MetadataUpdate; diff --git a/test/table_test.cc b/test/table_test.cc index 35d52325..61c13334 100644 --- a/test/table_test.cc +++ b/test/table_test.cc @@ -17,6 +17,8 @@ * under the License. */ +#include "iceberg/table.h" + #include #include #include @@ -29,35 +31,34 @@ #include "iceberg/partition_spec.h" #include "iceberg/schema.h" #include "iceberg/snapshot.h" -#include "iceberg/table_impl.h" #include "iceberg/table_metadata.h" #include "test_common.h" namespace iceberg { -TEST(StaticTable, TableV1) { +TEST(Table, TableV1) { std::unique_ptr metadata; ASSERT_NO_FATAL_FAILURE(ReadTableMetadata("TableMetadataV1Valid.json", &metadata)); - - StaticTable table("test_table_v1", std::move(metadata)); - ASSERT_EQ(table.name(), "test_table_v1"); + TableIdentifier tableIdent{.ns = {}, .name = "test_table_v1"}; + Table table(tableIdent, std::move(metadata), "s3://bucket/test/location/meta/", nullptr, + nullptr); + ASSERT_EQ(table.name().name, "test_table_v1"); // Check table schema auto schema = table.schema(); - ASSERT_TRUE(schema.has_value()); - ASSERT_EQ(schema.value()->fields().size(), 3); + ASSERT_EQ(schema->fields().size(), 3); auto schemas = table.schemas(); ASSERT_TRUE(schemas.empty()); // Check table spec auto spec = table.spec(); - ASSERT_TRUE(spec.has_value()); + ASSERT_TRUE(spec != nullptr); auto specs = table.specs(); ASSERT_EQ(1UL, specs.size()); // Check table sort_order auto sort_order = table.sort_order(); - ASSERT_TRUE(sort_order.has_value()); + ASSERT_TRUE(sort_order != nullptr); auto sort_orders = table.sort_orders(); ASSERT_EQ(1UL, sort_orders.size()); @@ -70,29 +71,30 @@ TEST(StaticTable, TableV1) { ASSERT_TRUE(snapshots.empty()); } -TEST(StaticTable, TableV2) { +TEST(Table, TableV2) { std::unique_ptr metadata; ASSERT_NO_FATAL_FAILURE(ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); + TableIdentifier tableIdent{.ns = {}, .name = "test_table_v2"}; - StaticTable table("test_table_v2", std::move(metadata)); - ASSERT_EQ(table.name(), "test_table_v2"); + Table table(tableIdent, std::move(metadata), "s3://bucket/test/location/meta/", nullptr, + nullptr); + ASSERT_EQ(table.name().name, "test_table_v2"); // Check table schema auto schema = table.schema(); - ASSERT_TRUE(schema.has_value()); - ASSERT_EQ(schema.value()->fields().size(), 3); + ASSERT_EQ(schema->fields().size(), 3); auto schemas = table.schemas(); ASSERT_FALSE(schemas.empty()); // Check partition spec auto spec = table.spec(); - ASSERT_TRUE(spec.has_value()); + ASSERT_TRUE(spec != nullptr); auto specs = table.specs(); ASSERT_EQ(1UL, specs.size()); // Check sort order auto sort_order = table.sort_order(); - ASSERT_TRUE(sort_order.has_value()); + ASSERT_TRUE(sort_order != nullptr); auto sort_orders = table.sort_orders(); ASSERT_EQ(1UL, sort_orders.size()); @@ -103,11 +105,11 @@ TEST(StaticTable, TableV2) { // Check snapshot auto snapshots = table.snapshots(); ASSERT_EQ(2UL, snapshots.size()); - auto snapshot = table.current_snapshot(); + auto snapshot = table.CurrentSnapshot(); ASSERT_TRUE(snapshot != nullptr); auto invalid_snapshot_id = 9999; - snapshot = table.snapshot(invalid_snapshot_id); - ASSERT_FALSE(snapshot.has_value()); + snapshot = table.SnapshotById(invalid_snapshot_id); + ASSERT_TRUE(snapshot == nullptr); } } // namespace iceberg From d95bd4b195f55d3b90895e474967d540165dfe45 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Wed, 18 Jun 2025 23:02:57 +0800 Subject: [PATCH 15/22] feat: metadata access support for table --- src/iceberg/table.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 91da5576..deb4fafb 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -19,16 +19,14 @@ #pragma once +#include #include #include #include #include "iceberg/iceberg_export.h" -#include "iceberg/location_provider.h" -#include "iceberg/result.h" #include "iceberg/snapshot.h" #include "iceberg/table_identifier.h" -#include "iceberg/transaction.h" #include "iceberg/type_fwd.h" namespace iceberg { From 515bd86518dfa83e9c314240681785ff04f948aa Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Thu, 19 Jun 2025 09:41:10 +0800 Subject: [PATCH 16/22] feat: metadata access support for table --- src/iceberg/table.cc | 2 +- src/iceberg/table.h | 2 +- test/table_test.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc index e2f3bd5c..69519a73 100644 --- a/src/iceberg/table.cc +++ b/src/iceberg/table.cc @@ -98,7 +98,7 @@ const std::unordered_map& Table::properties() const { const std::string& Table::location() const { return metadata_->location; } -std::shared_ptr Table::CurrentSnapshot() const { +std::shared_ptr Table::current_snapshot() const { std::call_once(init_snapshot_once_, [this]() { auto snapshot = metadata_->Snapshot(); if (snapshot.has_value()) { diff --git a/src/iceberg/table.h b/src/iceberg/table.h index deb4fafb..99c3b837 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -84,7 +84,7 @@ class ICEBERG_EXPORT Table { const std::string& location() const; /// \brief Return the table's current snapshot, return null if not found - std::shared_ptr CurrentSnapshot() const; + std::shared_ptr current_snapshot() const; /// \brief Get the snapshot of this table with the given id, or null if there is no /// matching snapshot diff --git a/test/table_test.cc b/test/table_test.cc index 61c13334..8bab4e78 100644 --- a/test/table_test.cc +++ b/test/table_test.cc @@ -105,7 +105,7 @@ TEST(Table, TableV2) { // Check snapshot auto snapshots = table.snapshots(); ASSERT_EQ(2UL, snapshots.size()); - auto snapshot = table.CurrentSnapshot(); + auto snapshot = table.current_snapshot(); ASSERT_TRUE(snapshot != nullptr); auto invalid_snapshot_id = 9999; snapshot = table.SnapshotById(invalid_snapshot_id); From 43979f28b117bdb22d82a22683088ad54c2aec91 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Fri, 20 Jun 2025 17:27:36 +0800 Subject: [PATCH 17/22] feat: metadata access support for table --- src/iceberg/snapshot.h | 16 ------- src/iceberg/table.cc | 84 +++++++++++++---------------------- src/iceberg/table.h | 64 ++++++++++++-------------- src/iceberg/table_metadata.cc | 24 +++------- src/iceberg/table_metadata.h | 21 ++++++--- src/iceberg/type_fwd.h | 3 +- test/table_test.cc | 35 +++++++++------ 7 files changed, 104 insertions(+), 143 deletions(-) diff --git a/src/iceberg/snapshot.h b/src/iceberg/snapshot.h index 96f8a36c..2df6a44d 100644 --- a/src/iceberg/snapshot.h +++ b/src/iceberg/snapshot.h @@ -273,20 +273,4 @@ struct ICEBERG_EXPORT Snapshot { bool Equals(const Snapshot& other) const; }; -/// \brief Represents a snapshot log entry -struct ICEBERG_EXPORT SnapshotLogEntry { - /// The timestamp in milliseconds of the change - TimePointMs timestamp_ms; - /// ID of the snapshot - int64_t snapshot_id; - - friend bool operator==(const SnapshotLogEntry& lhs, const SnapshotLogEntry& rhs) { - return lhs.timestamp_ms == rhs.timestamp_ms && lhs.snapshot_id == rhs.snapshot_id; - } - - friend bool operator!=(const SnapshotLogEntry& lhs, const SnapshotLogEntry& rhs) { - return !(lhs == rhs); - } -}; - } // namespace iceberg diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc index 69519a73..c79f3786 100644 --- a/src/iceberg/table.cc +++ b/src/iceberg/table.cc @@ -28,67 +28,51 @@ namespace iceberg { const std::string& Table::uuid() const { return metadata_->table_uuid; } -const std::shared_ptr& Table::schema() const { - if (!schema_) { - const static std::shared_ptr kEmptySchema = - std::make_shared(std::vector{}); - auto schema = metadata_->Schema(); - if (schema.has_value()) { - schema_ = schema.value(); - } else { - schema_ = kEmptySchema; - } - } - return schema_; -} +Result> Table::schema() const { return metadata_->Schema(); } -const std::unordered_map>& Table::schemas() const { - std::call_once(init_schemas_once_, [this]() { +const std::shared_ptr>>& +Table::schemas() const { + if (!schemas_map_) { + schemas_map_ = + std::make_shared>>(); for (const auto& schema : metadata_->schemas) { if (schema->schema_id()) { - schemas_map_.emplace(schema->schema_id().value(), schema); + schemas_map_->emplace(schema->schema_id().value(), schema); } } - }); + } return schemas_map_; } -const std::shared_ptr& Table::spec() const { - std::call_once(init_partition_spec_once_, [this]() { - auto partition_spec = metadata_->PartitionSpec(); - if (partition_spec.has_value()) { - partition_spec_ = partition_spec.value(); - } - }); - return partition_spec_; +Result> Table::spec() const { + return metadata_->PartitionSpec(); } -const std::unordered_map>& Table::specs() const { - std::call_once(init_partition_specs_once_, [this]() { +const std::shared_ptr>>& +Table::specs() const { + if (!partition_spec_map_) { + partition_spec_map_ = + std::make_shared>>(); for (const auto& spec : metadata_->partition_specs) { - partition_spec_map_[spec->spec_id()] = spec; + partition_spec_map_->emplace(spec->spec_id(), spec); } - }); + } return partition_spec_map_; } -const std::shared_ptr& Table::sort_order() const { - std::call_once(init_sort_order_once_, [this]() { - auto sort_order = metadata_->SortOrder(); - if (sort_order.has_value()) { - sort_order_ = sort_order.value(); - } - }); - return sort_order_; +Result> Table::sort_order() const { + return metadata_->SortOrder(); } -const std::unordered_map>& Table::sort_orders() - const { - std::call_once(init_sort_orders_once_, [this]() { +const std::shared_ptr>>& +Table::sort_orders() const { + if (!sort_orders_map_) { + sort_orders_map_ = + std::make_shared>>(); for (const auto& order : metadata_->sort_orders) { - sort_orders_map_[order->order_id()] = order; + sort_orders_map_->emplace(order->order_id(), order); } - }); + } return sort_orders_map_; } @@ -98,23 +82,17 @@ const std::unordered_map& Table::properties() const { const std::string& Table::location() const { return metadata_->location; } -std::shared_ptr Table::current_snapshot() const { - std::call_once(init_snapshot_once_, [this]() { - auto snapshot = metadata_->Snapshot(); - if (snapshot.has_value()) { - current_snapshot_ = snapshot.value(); - } - }); - return current_snapshot_; +Result> Table::current_snapshot() const { + return metadata_->Snapshot(); } -std::shared_ptr Table::SnapshotById(int64_t snapshot_id) const { +Result> Table::SnapshotById(int64_t snapshot_id) const { auto iter = std::ranges::find_if(metadata_->snapshots, [this, &snapshot_id](const auto& snapshot) { return snapshot->snapshot_id == snapshot_id; }); if (iter == metadata_->snapshots.end()) { - return nullptr; + return NotFound("Snapshot with ID {} is not found", snapshot_id); } return *iter; } @@ -127,4 +105,6 @@ const std::vector& Table::history() const { return metadata_->snapshot_log; } +const std::shared_ptr& Table::io() const { return io_; } + } // namespace iceberg diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 99c3b837..062890e5 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -41,7 +41,8 @@ class ICEBERG_EXPORT Table { /// \param[in] metadata The metadata for the table. /// \param[in] metadata_location The location of the table metadata file. /// \param[in] io The FileIO to read and write table data and metadata files. - /// \param[in] catalog The catalog that this table belongs to. + /// \param[in] catalog The catalog that this table belongs to. If null, the table will + /// be read-only. Table(TableIdentifier identifier, std::shared_ptr metadata, std::string metadata_location, std::shared_ptr io, std::shared_ptr catalog) @@ -57,25 +58,26 @@ class ICEBERG_EXPORT Table { /// \brief Returns the UUID of the table const std::string& uuid() const; - /// \brief Return the schema for this table, return empty schema if not found - const std::shared_ptr& schema() const; + /// \brief Return the schema for this table, return NotFoundError if not found + Result> schema() const; /// \brief Return a map of schema for this table - const std::unordered_map>& schemas() const; + const std::shared_ptr>>& schemas() + const; - /// \brief Return the partition spec for this table, return null if default spec is not - /// found - const std::shared_ptr& spec() const; + /// \brief Return the partition spec for this table, return NotFoundError if not found + Result> spec() const; /// \brief Return a map of partition specs for this table - const std::unordered_map>& specs() const; + const std::shared_ptr>>& + specs() const; - /// \brief Return the sort order for this table, return null if default sort order is - /// not found - const std::shared_ptr& sort_order() const; + /// \brief Return the sort order for this table, return NotFoundError if not found + Result> sort_order() const; /// \brief Return a map of sort order IDs to sort orders for this table - const std::unordered_map>& sort_orders() const; + const std::shared_ptr>>& + sort_orders() const; /// \brief Return a map of string properties for this table const std::unordered_map& properties() const; @@ -83,15 +85,14 @@ class ICEBERG_EXPORT Table { /// \brief Return the table's base location const std::string& location() const; - /// \brief Return the table's current snapshot, return null if not found - std::shared_ptr current_snapshot() const; + /// \brief Return the table's current snapshot, return NotFoundError if not found + Result> current_snapshot() const; - /// \brief Get the snapshot of this table with the given id, or null if there is no - /// matching snapshot + /// \brief Get the snapshot of this table with the given id /// /// \param snapshot_id the ID of the snapshot to get - /// \return the Snapshot with the given id - std::shared_ptr SnapshotById(int64_t snapshot_id) const; + /// \return the Snapshot with the given id, return NotFoundError if not found + Result> SnapshotById(int64_t snapshot_id) const; /// \brief Get the snapshots of this table const std::vector>& snapshots() const; @@ -101,6 +102,9 @@ class ICEBERG_EXPORT Table { /// \return a vector of history entries const std::vector& history() const; + /// \brief Returns a FileIO to read and write table data and metadata files + const std::shared_ptr& io() const; + private: const TableIdentifier identifier_; const std::shared_ptr metadata_; @@ -108,24 +112,12 @@ class ICEBERG_EXPORT Table { std::shared_ptr io_; std::shared_ptr catalog_; - mutable std::shared_ptr schema_; - mutable std::unordered_map> schemas_map_; - - mutable std::shared_ptr partition_spec_; - mutable std::unordered_map> partition_spec_map_; - - mutable std::shared_ptr sort_order_; - mutable std::unordered_map> sort_orders_map_; - - mutable std::shared_ptr current_snapshot_; - - // once_flags - mutable std::once_flag init_schemas_once_; - mutable std::once_flag init_partition_spec_once_; - mutable std::once_flag init_partition_specs_once_; - mutable std::once_flag init_sort_order_once_; - mutable std::once_flag init_sort_orders_once_; - mutable std::once_flag init_snapshot_once_; + mutable std::shared_ptr>> + schemas_map_; + mutable std::shared_ptr>> + partition_spec_map_; + mutable std::shared_ptr>> + sort_orders_map_; }; } // namespace iceberg diff --git a/src/iceberg/table_metadata.cc b/src/iceberg/table_metadata.cc index 17e24ad9..b820517b 100644 --- a/src/iceberg/table_metadata.cc +++ b/src/iceberg/table_metadata.cc @@ -47,24 +47,13 @@ std::string ToString(const MetadataLogEntry& entry) { } Result> TableMetadata::Schema() const { - std::call_once(init_schema_once, [this]() { - auto iter = std::ranges::find_if(schemas, [this](const auto& schema) { - return schema->schema_id() == current_schema_id; - }); - if (iter != schemas.end()) { - schema = *iter; - } - - // compatible with V1 table schema - if (!schema && schemas.size() == 1UL) { - schema = schemas.front(); - } + auto iter = std::ranges::find_if(schemas, [this](const auto& schema) { + return schema->schema_id() == current_schema_id; }); - - if (!schema) { - return NotFound("Current schema is not defined for this table"); + if (iter == schemas.end()) { + return NotFound("Current schema is not found"); } - return schema; + return *iter; } Result> TableMetadata::PartitionSpec() const { @@ -88,9 +77,6 @@ Result> TableMetadata::SortOrder() const { } Result> TableMetadata::Snapshot() const { - if (current_snapshot_id == Snapshot::kInvalidSnapshotId) { - return NotFound("Current snapshot is not defined for this table"); - } auto iter = std::ranges::find_if(snapshots, [this](const auto& snapshot) { return snapshot->snapshot_id == current_snapshot_id; }); diff --git a/src/iceberg/table_metadata.h b/src/iceberg/table_metadata.h index 3b143b45..e4c04633 100644 --- a/src/iceberg/table_metadata.h +++ b/src/iceberg/table_metadata.h @@ -36,6 +36,22 @@ namespace iceberg { +/// \brief Represents a snapshot log entry +struct ICEBERG_EXPORT SnapshotLogEntry { + /// The timestamp in milliseconds of the change + TimePointMs timestamp_ms; + /// ID of the snapshot + int64_t snapshot_id; + + friend bool operator==(const SnapshotLogEntry& lhs, const SnapshotLogEntry& rhs) { + return lhs.timestamp_ms == rhs.timestamp_ms && lhs.snapshot_id == rhs.snapshot_id; + } + + friend bool operator!=(const SnapshotLogEntry& lhs, const SnapshotLogEntry& rhs) { + return !(lhs == rhs); + } +}; + /// \brief Represents a metadata log entry struct ICEBERG_EXPORT MetadataLogEntry { /// The timestamp in milliseconds of the change @@ -80,8 +96,6 @@ struct ICEBERG_EXPORT TableMetadata { TimePointMs last_updated_ms; /// The highest assigned column ID for the table int32_t last_column_id; - /// The current schema for the table, or null if not set - mutable std::shared_ptr schema; /// A list of schemas std::vector> schemas; /// ID of the table's current schema @@ -117,9 +131,6 @@ struct ICEBERG_EXPORT TableMetadata { /// A `long` higher than all assigned row IDs int64_t next_row_id; - /// \brief Used for lazy initialization of schema - mutable std::once_flag init_schema_once; - /// \brief Get the current schema, return NotFoundError if not found Result> Schema() const; /// \brief Get the current partition spec, return NotFoundError if not found diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index adbf25bf..cc5f0a7f 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -99,8 +99,9 @@ class TransformFunction; struct PartitionStatisticsFile; struct Snapshot; struct SnapshotRef; -struct SnapshotLogEntry; + struct MetadataLogEntry; +struct SnapshotLogEntry; struct StatisticsFile; struct TableMetadata; diff --git a/test/table_test.cc b/test/table_test.cc index 8bab4e78..56f475de 100644 --- a/test/table_test.cc +++ b/test/table_test.cc @@ -46,21 +46,22 @@ TEST(Table, TableV1) { // Check table schema auto schema = table.schema(); - ASSERT_EQ(schema->fields().size(), 3); + ASSERT_TRUE(schema.has_value()); + ASSERT_EQ(schema.value()->fields().size(), 3); auto schemas = table.schemas(); - ASSERT_TRUE(schemas.empty()); + ASSERT_TRUE(schemas->empty()); // Check table spec auto spec = table.spec(); - ASSERT_TRUE(spec != nullptr); + ASSERT_TRUE(spec.has_value()); auto specs = table.specs(); - ASSERT_EQ(1UL, specs.size()); + ASSERT_EQ(1UL, specs->size()); // Check table sort_order auto sort_order = table.sort_order(); - ASSERT_TRUE(sort_order != nullptr); + ASSERT_TRUE(sort_order.has_value()); auto sort_orders = table.sort_orders(); - ASSERT_EQ(1UL, sort_orders.size()); + ASSERT_EQ(1UL, sort_orders->size()); // Check table location auto location = table.location(); @@ -69,6 +70,9 @@ TEST(Table, TableV1) { // Check table snapshots auto snapshots = table.snapshots(); ASSERT_TRUE(snapshots.empty()); + + auto io = table.io(); + ASSERT_TRUE(io == nullptr); } TEST(Table, TableV2) { @@ -82,21 +86,22 @@ TEST(Table, TableV2) { // Check table schema auto schema = table.schema(); - ASSERT_EQ(schema->fields().size(), 3); + ASSERT_TRUE(schema.has_value()); + ASSERT_EQ(schema.value()->fields().size(), 3); auto schemas = table.schemas(); - ASSERT_FALSE(schemas.empty()); + ASSERT_FALSE(schemas->empty()); // Check partition spec auto spec = table.spec(); - ASSERT_TRUE(spec != nullptr); + ASSERT_TRUE(spec.has_value()); auto specs = table.specs(); - ASSERT_EQ(1UL, specs.size()); + ASSERT_EQ(1UL, specs->size()); // Check sort order auto sort_order = table.sort_order(); - ASSERT_TRUE(sort_order != nullptr); + ASSERT_TRUE(sort_order.has_value()); auto sort_orders = table.sort_orders(); - ASSERT_EQ(1UL, sort_orders.size()); + ASSERT_EQ(1UL, sort_orders->size()); // Check table location auto location = table.location(); @@ -106,10 +111,12 @@ TEST(Table, TableV2) { auto snapshots = table.snapshots(); ASSERT_EQ(2UL, snapshots.size()); auto snapshot = table.current_snapshot(); - ASSERT_TRUE(snapshot != nullptr); + ASSERT_TRUE(snapshot.has_value()); + snapshot = table.SnapshotById(snapshot.value()->snapshot_id); + ASSERT_TRUE(snapshot.has_value()); auto invalid_snapshot_id = 9999; snapshot = table.SnapshotById(invalid_snapshot_id); - ASSERT_TRUE(snapshot == nullptr); + ASSERT_FALSE(snapshot.has_value()); } } // namespace iceberg From 3fc237493eb94fd3adc98d1089657e140929008b Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Mon, 23 Jun 2025 17:02:32 +0800 Subject: [PATCH 18/22] feat: metadata access support for table --- src/iceberg/table.h | 3 ++- src/iceberg/table_metadata.h | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 062890e5..985743fa 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -107,11 +107,12 @@ class ICEBERG_EXPORT Table { private: const TableIdentifier identifier_; - const std::shared_ptr metadata_; + std::shared_ptr metadata_; const std::string metadata_location_; std::shared_ptr io_; std::shared_ptr catalog_; + // Cache lazy-initialized maps. mutable std::shared_ptr>> schemas_map_; mutable std::shared_ptr>> diff --git a/src/iceberg/table_metadata.h b/src/iceberg/table_metadata.h index e4c04633..da03297f 100644 --- a/src/iceberg/table_metadata.h +++ b/src/iceberg/table_metadata.h @@ -23,14 +23,12 @@ /// Table metadata for Iceberg tables. #include -#include #include #include #include #include #include "iceberg/iceberg_export.h" -#include "iceberg/snapshot.h" #include "iceberg/type_fwd.h" #include "iceberg/util/timepoint.h" From 3dd509662b1bf85738061b5d7df59a84c0ffba8c Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Mon, 23 Jun 2025 18:09:27 +0800 Subject: [PATCH 19/22] feat: metadata access support for table --- src/iceberg/table.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 985743fa..954af290 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -41,8 +41,8 @@ class ICEBERG_EXPORT Table { /// \param[in] metadata The metadata for the table. /// \param[in] metadata_location The location of the table metadata file. /// \param[in] io The FileIO to read and write table data and metadata files. - /// \param[in] catalog The catalog that this table belongs to. If null, the table will - /// be read-only. + /// \param[in] catalog The catalog that this table belongs to. If the catalog is null, + /// the table will be read-only. Table(TableIdentifier identifier, std::shared_ptr metadata, std::string metadata_location, std::shared_ptr io, std::shared_ptr catalog) From 84d82eb6e94fea43356ed4c1ce80bcbf1867d2dd Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Mon, 23 Jun 2025 23:09:46 +0800 Subject: [PATCH 20/22] feat: metadata access support for table --- src/iceberg/table.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 954af290..37c1c77e 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -19,7 +19,6 @@ #pragma once -#include #include #include #include From 13c411ff8dc8236a07f4ca23ca176c4fca807f86 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Tue, 24 Jun 2025 09:26:29 +0800 Subject: [PATCH 21/22] feat: metadata access support for table --- src/iceberg/table.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 37c1c77e..ebf554ae 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -40,8 +40,8 @@ class ICEBERG_EXPORT Table { /// \param[in] metadata The metadata for the table. /// \param[in] metadata_location The location of the table metadata file. /// \param[in] io The FileIO to read and write table data and metadata files. - /// \param[in] catalog The catalog that this table belongs to. If the catalog is null, - /// the table will be read-only. + /// \param[in] catalog The catalog that this table belongs to. If null, the table will + /// be read-only. Table(TableIdentifier identifier, std::shared_ptr metadata, std::string metadata_location, std::shared_ptr io, std::shared_ptr catalog) From e6ac508d6b670ba674af80cc44e485a622bf3b96 Mon Sep 17 00:00:00 2001 From: "shuxu.li" Date: Tue, 24 Jun 2025 09:51:33 +0800 Subject: [PATCH 22/22] feat: metadata access support for table --- src/iceberg/table.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/iceberg/table.h b/src/iceberg/table.h index ebf554ae..9db02b4b 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -61,6 +61,7 @@ class ICEBERG_EXPORT Table { Result> schema() const; /// \brief Return a map of schema for this table + /// \note This method is **not** thread-safe in the current implementation. const std::shared_ptr>>& schemas() const; @@ -68,6 +69,7 @@ class ICEBERG_EXPORT Table { Result> spec() const; /// \brief Return a map of partition specs for this table + /// \note This method is **not** thread-safe in the current implementation. const std::shared_ptr>>& specs() const; @@ -75,6 +77,7 @@ class ICEBERG_EXPORT Table { Result> sort_order() const; /// \brief Return a map of sort order IDs to sort orders for this table + /// \note This method is **not** thread-safe in the current implementation. const std::shared_ptr>>& sort_orders() const;