Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve table function to string #122

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/MainDistributionPipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ jobs:
name: Build extension binaries
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
with:
# pip install duckdb==1.1.4.dev1594
duckdb_version: 0ccf3c25cc
# pip install duckdb==1.1.4.dev2005
duckdb_version: b470dea7ee
ci_tools_version: main
extension_name: delta
enable_rust: true
Expand All @@ -33,5 +33,5 @@ jobs:
with:
extension_name: delta
ci_tools_version: main
duckdb_version: 0ccf3c25cc
duckdb_version: b470dea7ee
exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw'
2 changes: 1 addition & 1 deletion duckdb
Submodule duckdb updated 417 files
38 changes: 23 additions & 15 deletions src/functions/delta_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ unique_ptr<MultiFileList> DeltaSnapshot::ComplexFilterPushdown(ClientContext &co
for (const auto &filter : filters) {
combiner.AddFilter(filter->Copy());
}
auto filterstmp = combiner.GenerateTableScanFilters(info.column_ids);
auto filterstmp = combiner.GenerateTableScanFilters(info.column_indexes);

// TODO: can/should we figure out if this filtered anything?
auto filtered_list = make_uniq<DeltaSnapshot>(context, paths[0]);
Expand Down Expand Up @@ -643,9 +643,9 @@ void DeltaMultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFil
void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options,
const MultiFileReaderBindData &options, const string &filename,
const vector<string> &local_names, const vector<LogicalType> &global_types,
const vector<string> &global_names, const vector<column_t> &global_column_ids,
MultiFileReaderData &reader_data, ClientContext &context,
optional_ptr<MultiFileReaderGlobalState> global_state) {
const vector<string> &global_names,
const vector<ColumnIndex> &global_column_ids, MultiFileReaderData &reader_data,
ClientContext &context, optional_ptr<MultiFileReaderGlobalState> global_state) {
MultiFileReader::FinalizeBind(file_options, options, filename, local_names, global_types, global_names,
global_column_ids, reader_data, context, global_state);

Expand All @@ -671,7 +671,7 @@ void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_optio

if (!file_metadata->partition_map.empty()) {
for (idx_t i = 0; i < global_column_ids.size(); i++) {
column_t col_id = global_column_ids[i];
column_t col_id = global_column_ids[i].GetPrimaryIndex();
if (IsRowIdColumnId(col_id)) {
continue;
}
Expand Down Expand Up @@ -749,14 +749,14 @@ unique_ptr<MultiFileReaderGlobalState> DeltaMultiFileReader::InitializeGlobalSta
duckdb::ClientContext &context, const duckdb::MultiFileReaderOptions &file_options,
const duckdb::MultiFileReaderBindData &bind_data, const duckdb::MultiFileList &file_list,
const vector<duckdb::LogicalType> &global_types, const vector<std::string> &global_names,
const vector<duckdb::column_t> &global_column_ids) {
const vector<ColumnIndex> &global_column_ids) {
vector<LogicalType> extra_columns;
vector<pair<string, idx_t>> mapped_columns;

// Create a map of the columns that are in the projection
case_insensitive_map_t<idx_t> selected_columns;
for (idx_t i = 0; i < global_column_ids.size(); i++) {
auto global_id = global_column_ids[i];
auto global_id = global_column_ids[i].GetPrimaryIndex();
if (IsRowIdColumnId(global_id)) {
continue;
}
Expand Down Expand Up @@ -815,7 +815,7 @@ unique_ptr<MultiFileReaderGlobalState> DeltaMultiFileReader::InitializeGlobalSta
// in the parquet files, we just add null constant columns
static void CustomMulfiFileNameMapping(const string &file_name, const vector<LogicalType> &local_types,
const vector<string> &local_names, const vector<LogicalType> &global_types,
const vector<string> &global_names, const vector<column_t> &global_column_ids,
const vector<string> &global_names, const vector<ColumnIndex> &global_column_ids,
MultiFileReaderData &reader_data, const string &initial_file,
optional_ptr<MultiFileReaderGlobalState> global_state) {
D_ASSERT(global_types.size() == global_names.size());
Expand All @@ -839,7 +839,7 @@ static void CustomMulfiFileNameMapping(const string &file_name, const vector<Log
continue;
}
// not constant - look up the column in the name map
auto global_id = global_column_ids[i];
auto global_id = global_column_ids[i].GetPrimaryIndex();
if (global_id >= global_types.size()) {
throw InternalException(
"MultiFileReader::CreatePositionalMapping - global_id is out of range in global_types for this file");
Expand Down Expand Up @@ -880,7 +880,7 @@ static void CustomMulfiFileNameMapping(const string &file_name, const vector<Log
void DeltaMultiFileReader::CreateNameMapping(const string &file_name, const vector<LogicalType> &local_types,
const vector<string> &local_names, const vector<LogicalType> &global_types,
const vector<string> &global_names,
const vector<column_t> &global_column_ids,
const vector<ColumnIndex> &global_column_ids,
MultiFileReaderData &reader_data, const string &initial_file,
optional_ptr<MultiFileReaderGlobalState> global_state) {
// First call the base implementation to do most mapping
Expand Down Expand Up @@ -980,11 +980,17 @@ bool DeltaMultiFileReader::ParseOption(const string &key, const Value &val, Mult

return MultiFileReader::ParseOption(key, val, options, context);
}
//
// DeltaMultiFileReaderBindData::DeltaMultiFileReaderBindData(DeltaSnapshot & delta_snapshot):
// current_snapshot(delta_snapshot){
//
//}

static InsertionOrderPreservingMap<string> DeltaFunctionToString(TableFunctionToStringInput &input) {
InsertionOrderPreservingMap<string> result;

if (input.table_function.function_info) {
auto &table_info = input.table_function.function_info->Cast<DeltaFunctionInfo>();
result["Table"] = table_info.table_name;
}

return result;
}

TableFunctionSet DeltaFunctions::GetDeltaScanFunction(DatabaseInstance &instance) {
// Parquet extension needs to be loaded for this to make sense
Expand All @@ -1007,6 +1013,8 @@ TableFunctionSet DeltaFunctions::GetDeltaScanFunction(DatabaseInstance &instance
function.table_scan_progress = nullptr;
function.get_bind_info = nullptr;

function.to_string = DeltaFunctionToString;

// Schema param is just confusing here
function.named_parameters.erase("schema");

Expand Down
7 changes: 4 additions & 3 deletions src/include/functions/delta_scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ struct DeltaSnapshot;
struct DeltaFunctionInfo : public TableFunctionInfo {
shared_ptr<DeltaSnapshot> snapshot;
string expected_path;
string table_name;
};

struct DeltaFileMetaData {
Expand Down Expand Up @@ -132,20 +133,20 @@ struct DeltaMultiFileReader : public MultiFileReader {

void CreateNameMapping(const string &file_name, const vector<LogicalType> &local_types,
const vector<string> &local_names, const vector<LogicalType> &global_types,
const vector<string> &global_names, const vector<column_t> &global_column_ids,
const vector<string> &global_names, const vector<ColumnIndex> &global_column_ids,
MultiFileReaderData &reader_data, const string &initial_file,
optional_ptr<MultiFileReaderGlobalState> global_state) override;

unique_ptr<MultiFileReaderGlobalState>
InitializeGlobalState(ClientContext &context, const MultiFileReaderOptions &file_options,
const MultiFileReaderBindData &bind_data, const MultiFileList &file_list,
const vector<LogicalType> &global_types, const vector<string> &global_names,
const vector<column_t> &global_column_ids) override;
const vector<ColumnIndex> &global_column_ids) override;

void FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options,
const string &filename, const vector<string> &local_names,
const vector<LogicalType> &global_types, const vector<string> &global_names,
const vector<column_t> &global_column_ids, MultiFileReaderData &reader_data,
const vector<ColumnIndex> &global_column_ids, MultiFileReaderData &reader_data,
ClientContext &context, optional_ptr<MultiFileReaderGlobalState> global_state) override;

//! Override the FinalizeChunk method
Expand Down
1 change: 1 addition & 0 deletions src/storage/delta_table_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ TableFunction DeltaTableEntry::GetScanFunction(ClientContext &context, unique_pt
auto function_info = make_shared_ptr<DeltaFunctionInfo>();

function_info->snapshot = this->snapshot;
function_info->table_name = delta_catalog.GetName();
delta_scan_function.function_info = std::move(function_info);

vector<Value> inputs = {delta_catalog.GetDBPath()};
Expand Down
8 changes: 7 additions & 1 deletion test/sql/dat/attach.test
Original file line number Diff line number Diff line change
Expand Up @@ -122,4 +122,10 @@ select utf8 from dt
1
2
3
4
4

# Test that the explain output contains the table name
query II
explain from dt
----
physical_plan <REGEX>:.*Table: dt.*
Loading