From ad12b3650d41ffe3dffb0965cfca090421da002e Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 14 Nov 2024 17:05:28 +0100 Subject: [PATCH 1/5] add table function tostring --- duckdb | 2 +- src/functions/delta_scan.cpp | 18 +++++++++++++----- src/include/functions/delta_scan.hpp | 1 + src/storage/delta_table_entry.cpp | 1 + 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/duckdb b/duckdb index 0ccf3c2..7fb238e 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 0ccf3c25ccbb25fb90616e77b38f6d138f82950d +Subproject commit 7fb238e1d2625fdc34a3057f3dffa7dfc32e0c5a diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 4e35b17..3053fbf 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -980,11 +980,17 @@ bool DeltaMultiFileReader::ParseOption(const string &key, const Value &val, Mult return MultiFileReader::ParseOption(key, val, options, context); } -// -// DeltaMultiFileReaderBindData::DeltaMultiFileReaderBindData(DeltaSnapshot & delta_snapshot): -// current_snapshot(delta_snapshot){ -// -//} + +static InsertionOrderPreservingMap DeltaFunctionToString(TableFunctionToStringInput &input) { + InsertionOrderPreservingMap result; + + if (input.table_function.function_info) { + auto& table_info = input.table_function.function_info->Cast(); + result["Table"] = table_info.table_name; + } + + return result; +} TableFunctionSet DeltaFunctions::GetDeltaScanFunction(DatabaseInstance &instance) { // Parquet extension needs to be loaded for this to make sense @@ -1007,6 +1013,8 @@ TableFunctionSet DeltaFunctions::GetDeltaScanFunction(DatabaseInstance &instance function.table_scan_progress = nullptr; function.get_bind_info = nullptr; + function.to_string = DeltaFunctionToString; + // Schema param is just confusing here function.named_parameters.erase("schema"); diff --git a/src/include/functions/delta_scan.hpp b/src/include/functions/delta_scan.hpp index 32662a2..72636e8 100644 --- a/src/include/functions/delta_scan.hpp +++ b/src/include/functions/delta_scan.hpp @@ -17,6 +17,7 @@ struct DeltaSnapshot; struct DeltaFunctionInfo : public TableFunctionInfo { shared_ptr snapshot; string expected_path; + string table_name; }; struct DeltaFileMetaData { diff --git a/src/storage/delta_table_entry.cpp b/src/storage/delta_table_entry.cpp index 6f7f829..6dfb19d 100644 --- a/src/storage/delta_table_entry.cpp +++ b/src/storage/delta_table_entry.cpp @@ -47,6 +47,7 @@ TableFunction DeltaTableEntry::GetScanFunction(ClientContext &context, unique_pt auto function_info = make_shared_ptr(); function_info->snapshot = this->snapshot; + function_info->table_name = delta_catalog.GetName(); delta_scan_function.function_info = std::move(function_info); vector inputs = {delta_catalog.GetDBPath()}; From 1fe8e0d69ca222024aaee7c632ffd0f6a1ab9ff2 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 20 Nov 2024 09:59:43 +0100 Subject: [PATCH 2/5] bump duckdb to 1.1.4.dev2005 --- .github/workflows/MainDistributionPipeline.yml | 6 +++--- duckdb | 2 +- extension-ci-tools | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 22af8c2..3449ca6 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -16,8 +16,8 @@ jobs: name: Build extension binaries uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: - # pip install duckdb==1.1.4.dev1594 - duckdb_version: 0ccf3c25cc + # pip install duckdb==1.1.4.dev2005 + duckdb_version: b470dea7ee ci_tools_version: main extension_name: delta enable_rust: true @@ -33,5 +33,5 @@ jobs: with: extension_name: delta ci_tools_version: main - duckdb_version: 0ccf3c25cc + duckdb_version: b470dea7ee exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw' diff --git a/duckdb b/duckdb index 0ccf3c2..b470dea 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 0ccf3c25ccbb25fb90616e77b38f6d138f82950d +Subproject commit b470dea7ee47dc2debcc37a4e94976f8eff6670c diff --git a/extension-ci-tools b/extension-ci-tools index 3e987be..916d4ef 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit 3e987be862c95d0f7fc674fa242c97ce3a37ee04 +Subproject commit 916d4ef4371068ca98a007378b52582c3e46b4e5 From 35114aebc34e0824a38babb7fbcd1d15be18c0c5 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 20 Nov 2024 10:14:13 +0100 Subject: [PATCH 3/5] fix upstream MultiFileReader API changes --- src/functions/delta_scan.cpp | 16 ++++++++-------- src/include/functions/delta_scan.hpp | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 3053fbf..8021a29 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -526,7 +526,7 @@ unique_ptr DeltaSnapshot::ComplexFilterPushdown(ClientContext &co for (const auto &filter : filters) { combiner.AddFilter(filter->Copy()); } - auto filterstmp = combiner.GenerateTableScanFilters(info.column_ids); + auto filterstmp = combiner.GenerateTableScanFilters(info.column_indexes); // TODO: can/should we figure out if this filtered anything? auto filtered_list = make_uniq(context, paths[0]); @@ -643,7 +643,7 @@ void DeltaMultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFil void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options, const string &filename, const vector &local_names, const vector &global_types, - const vector &global_names, const vector &global_column_ids, + const vector &global_names, const vector &global_column_ids, MultiFileReaderData &reader_data, ClientContext &context, optional_ptr global_state) { MultiFileReader::FinalizeBind(file_options, options, filename, local_names, global_types, global_names, @@ -671,7 +671,7 @@ void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_optio if (!file_metadata->partition_map.empty()) { for (idx_t i = 0; i < global_column_ids.size(); i++) { - column_t col_id = global_column_ids[i]; + column_t col_id = global_column_ids[i].GetPrimaryIndex(); if (IsRowIdColumnId(col_id)) { continue; } @@ -749,14 +749,14 @@ unique_ptr DeltaMultiFileReader::InitializeGlobalSta duckdb::ClientContext &context, const duckdb::MultiFileReaderOptions &file_options, const duckdb::MultiFileReaderBindData &bind_data, const duckdb::MultiFileList &file_list, const vector &global_types, const vector &global_names, - const vector &global_column_ids) { + const vector &global_column_ids) { vector extra_columns; vector> mapped_columns; // Create a map of the columns that are in the projection case_insensitive_map_t selected_columns; for (idx_t i = 0; i < global_column_ids.size(); i++) { - auto global_id = global_column_ids[i]; + auto global_id = global_column_ids[i].GetPrimaryIndex(); if (IsRowIdColumnId(global_id)) { continue; } @@ -815,7 +815,7 @@ unique_ptr DeltaMultiFileReader::InitializeGlobalSta // in the parquet files, we just add null constant columns static void CustomMulfiFileNameMapping(const string &file_name, const vector &local_types, const vector &local_names, const vector &global_types, - const vector &global_names, const vector &global_column_ids, + const vector &global_names, const vector &global_column_ids, MultiFileReaderData &reader_data, const string &initial_file, optional_ptr global_state) { D_ASSERT(global_types.size() == global_names.size()); @@ -839,7 +839,7 @@ static void CustomMulfiFileNameMapping(const string &file_name, const vector= global_types.size()) { throw InternalException( "MultiFileReader::CreatePositionalMapping - global_id is out of range in global_types for this file"); @@ -880,7 +880,7 @@ static void CustomMulfiFileNameMapping(const string &file_name, const vector &local_types, const vector &local_names, const vector &global_types, const vector &global_names, - const vector &global_column_ids, + const vector &global_column_ids, MultiFileReaderData &reader_data, const string &initial_file, optional_ptr global_state) { // First call the base implementation to do most mapping diff --git a/src/include/functions/delta_scan.hpp b/src/include/functions/delta_scan.hpp index 72636e8..5bc981c 100644 --- a/src/include/functions/delta_scan.hpp +++ b/src/include/functions/delta_scan.hpp @@ -133,7 +133,7 @@ struct DeltaMultiFileReader : public MultiFileReader { void CreateNameMapping(const string &file_name, const vector &local_types, const vector &local_names, const vector &global_types, - const vector &global_names, const vector &global_column_ids, + const vector &global_names, const vector &global_column_ids, MultiFileReaderData &reader_data, const string &initial_file, optional_ptr global_state) override; @@ -141,12 +141,12 @@ struct DeltaMultiFileReader : public MultiFileReader { InitializeGlobalState(ClientContext &context, const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &bind_data, const MultiFileList &file_list, const vector &global_types, const vector &global_names, - const vector &global_column_ids) override; + const vector &global_column_ids) override; void FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options, const string &filename, const vector &local_names, const vector &global_types, const vector &global_names, - const vector &global_column_ids, MultiFileReaderData &reader_data, + const vector &global_column_ids, MultiFileReaderData &reader_data, ClientContext &context, optional_ptr global_state) override; //! Override the FinalizeChunk method From 6f25451dc798521f0782f6e5b196ec54b2210f79 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 20 Nov 2024 10:17:38 +0100 Subject: [PATCH 4/5] add explain table name test --- test/sql/dat/attach.test | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test/sql/dat/attach.test b/test/sql/dat/attach.test index de6615d..14ece4b 100644 --- a/test/sql/dat/attach.test +++ b/test/sql/dat/attach.test @@ -122,4 +122,10 @@ select utf8 from dt 1 2 3 -4 \ No newline at end of file +4 + +# Test that the explain output contains the table name +query II +explain from dt +---- +physical_plan :.*Table: dt.* \ No newline at end of file From 35e9d529482422e4213d452ac862ffa5f39fbee0 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 20 Nov 2024 10:18:02 +0100 Subject: [PATCH 5/5] format --- src/functions/delta_scan.cpp | 20 ++++++++++---------- src/include/functions/delta_scan.hpp | 2 +- src/storage/delta_table_entry.cpp | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 8021a29..377c5c0 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -643,9 +643,9 @@ void DeltaMultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFil void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options, const string &filename, const vector &local_names, const vector &global_types, - const vector &global_names, const vector &global_column_ids, - MultiFileReaderData &reader_data, ClientContext &context, - optional_ptr global_state) { + const vector &global_names, + const vector &global_column_ids, MultiFileReaderData &reader_data, + ClientContext &context, optional_ptr global_state) { MultiFileReader::FinalizeBind(file_options, options, filename, local_names, global_types, global_names, global_column_ids, reader_data, context, global_state); @@ -982,14 +982,14 @@ bool DeltaMultiFileReader::ParseOption(const string &key, const Value &val, Mult } static InsertionOrderPreservingMap DeltaFunctionToString(TableFunctionToStringInput &input) { - InsertionOrderPreservingMap result; + InsertionOrderPreservingMap result; - if (input.table_function.function_info) { - auto& table_info = input.table_function.function_info->Cast(); - result["Table"] = table_info.table_name; - } + if (input.table_function.function_info) { + auto &table_info = input.table_function.function_info->Cast(); + result["Table"] = table_info.table_name; + } - return result; + return result; } TableFunctionSet DeltaFunctions::GetDeltaScanFunction(DatabaseInstance &instance) { @@ -1013,7 +1013,7 @@ TableFunctionSet DeltaFunctions::GetDeltaScanFunction(DatabaseInstance &instance function.table_scan_progress = nullptr; function.get_bind_info = nullptr; - function.to_string = DeltaFunctionToString; + function.to_string = DeltaFunctionToString; // Schema param is just confusing here function.named_parameters.erase("schema"); diff --git a/src/include/functions/delta_scan.hpp b/src/include/functions/delta_scan.hpp index 5bc981c..e9e89da 100644 --- a/src/include/functions/delta_scan.hpp +++ b/src/include/functions/delta_scan.hpp @@ -17,7 +17,7 @@ struct DeltaSnapshot; struct DeltaFunctionInfo : public TableFunctionInfo { shared_ptr snapshot; string expected_path; - string table_name; + string table_name; }; struct DeltaFileMetaData { diff --git a/src/storage/delta_table_entry.cpp b/src/storage/delta_table_entry.cpp index 6dfb19d..be6ea58 100644 --- a/src/storage/delta_table_entry.cpp +++ b/src/storage/delta_table_entry.cpp @@ -47,7 +47,7 @@ TableFunction DeltaTableEntry::GetScanFunction(ClientContext &context, unique_pt auto function_info = make_shared_ptr(); function_info->snapshot = this->snapshot; - function_info->table_name = delta_catalog.GetName(); + function_info->table_name = delta_catalog.GetName(); delta_scan_function.function_info = std::move(function_info); vector inputs = {delta_catalog.GetDBPath()};