-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix truncate behavior #16
Changes from all commits
11e1684
6962015
624d171
7065709
70e5bcf
d5619ee
3cb0412
fe33f43
3c1bbfa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ using duckdb::KeywordHelper; | |
|
||
// Utility | ||
|
||
std::string table_def::to_string() const { | ||
std::string table_def::to_escaped_string() const { | ||
std::ostringstream out; | ||
out << KeywordHelper::WriteQuoted(db_name, '"') << "." | ||
<< KeywordHelper::WriteQuoted(schema_name, '"') << "." | ||
|
@@ -65,7 +65,7 @@ bool table_exists(duckdb::Connection &con, const table_def &table) { | |
|
||
if (result->HasError()) { | ||
throw std::runtime_error("Could not find whether table <" + | ||
table.to_string() + | ||
table.to_escaped_string() + | ||
"> exists: " + result->GetError()); | ||
} | ||
auto materialized_result = duckdb::unique_ptr_cast< | ||
|
@@ -83,7 +83,7 @@ void create_schema(duckdb::Connection &con, const std::string &db_name, | |
void create_table(duckdb::Connection &con, const table_def &table, | ||
const std::vector<const column_def *> &columns_pk, | ||
const std::vector<column_def> &all_columns) { | ||
const std::string absolute_table_name = table.to_string(); | ||
const std::string absolute_table_name = table.to_escaped_string(); | ||
std::ostringstream ddl; | ||
ddl << "CREATE OR REPLACE TABLE " << absolute_table_name << " ("; | ||
|
||
|
@@ -129,7 +129,8 @@ std::vector<column_def> describe_table(duckdb::Connection &con, | |
auto result = statement->Execute(params, false); | ||
|
||
if (result->HasError()) { | ||
throw std::runtime_error("Could not describe table <" + table.to_string() + | ||
throw std::runtime_error("Could not describe table <" + | ||
table.to_escaped_string() + | ||
">:" + result->GetError()); | ||
} | ||
auto materialized_result = duckdb::unique_ptr_cast< | ||
|
@@ -148,7 +149,7 @@ std::vector<column_def> describe_table(duckdb::Connection &con, | |
void alter_table(duckdb::Connection &con, const table_def &table, | ||
const std::vector<column_def> &columns) { | ||
|
||
auto absolute_table_name = table.to_string(); | ||
auto absolute_table_name = table.to_escaped_string(); | ||
std::set<std::string> alter_types; | ||
std::set<std::string> added_columns; | ||
std::set<std::string> deleted_columns; | ||
|
@@ -233,7 +234,7 @@ void upsert(duckdb::Connection &con, const table_def &table, | |
const std::string &staging_table_name, | ||
std::vector<const column_def *> &columns_pk, | ||
std::vector<const column_def *> &columns_regular) { | ||
const std::string absolute_table_name = table.to_string(); | ||
const std::string absolute_table_name = table.to_escaped_string(); | ||
std::ostringstream sql; | ||
sql << "INSERT INTO " << absolute_table_name << " SELECT * FROM " | ||
<< staging_table_name; | ||
|
@@ -264,7 +265,7 @@ void update_values(duckdb::Connection &con, const table_def &table, | |
const std::string &unmodified_string) { | ||
|
||
std::ostringstream sql; | ||
auto absolute_table_name = table.to_string(); | ||
auto absolute_table_name = table.to_escaped_string(); | ||
|
||
sql << "UPDATE " << absolute_table_name << " SET "; | ||
|
||
|
@@ -299,7 +300,7 @@ void delete_rows(duckdb::Connection &con, const table_def &table, | |
const std::string &staging_table_name, | ||
std::vector<const column_def *> &columns_pk) { | ||
|
||
const std::string absolute_table_name = table.to_string(); | ||
const std::string absolute_table_name = table.to_escaped_string(); | ||
std::ostringstream sql; | ||
sql << "DELETE FROM " + absolute_table_name << " USING " << staging_table_name | ||
<< " WHERE "; | ||
|
@@ -319,13 +320,27 @@ void delete_rows(duckdb::Connection &con, const table_def &table, | |
} | ||
} | ||
|
||
void truncate_table(duckdb::Connection &con, const table_def &table) { | ||
const std::string absolute_table_name = table.to_string(); | ||
void truncate_table(duckdb::Connection &con, const table_def &table, | ||
const std::string &synced_column, | ||
std::chrono::nanoseconds &cutoff_ns, | ||
const std::string &deleted_column) { | ||
const std::string absolute_table_name = table.to_escaped_string(); | ||
std::ostringstream sql; | ||
sql << "DELETE FROM " + absolute_table_name; | ||
|
||
sql << "UPDATE " << absolute_table_name << " SET " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (sorry if I already asked that before...) we can't quote the table name? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. perhaps I should rename it to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. renamed to |
||
<< KeywordHelper::WriteQuoted(deleted_column, '"') << " = true WHERE " | ||
<< KeywordHelper::WriteQuoted(synced_column, '"') | ||
<< " < make_timestamp(?)"; | ||
auto query = sql.str(); | ||
mdlog::info("truncate_table: " + query); | ||
auto result = con.Query(query); | ||
auto statement = con.Prepare(query); | ||
|
||
// DuckDB make_timestamp takes microseconds; Fivetran sends millisecond | ||
// precision -- safe to divide with truncation | ||
long cutoff_microseconds = cutoff_ns.count() / 1000; | ||
duckdb::vector<duckdb::Value> params = {duckdb::Value(cutoff_microseconds)}; | ||
|
||
auto result = statement->Execute(params, false); | ||
if (result->HasError()) { | ||
throw std::runtime_error("Error truncating table <" + absolute_table_name + | ||
">:" + result->GetError()); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
id,title,magic_number,_fivetran_deleted,_fivetran_synced | ||
3,"unmod-NcK9NIjPUutCsz4mjOQQztbnwnE1sY3",15,false,"2024-01-09T04:30:13.984276065Z" | ||
2,"The empire strikes back","unmod-NcK9NIjPUutCsz4mjOQQztbnwnE1sY3",false,"2024-01-09T04:30:13.984276065Z" | ||
3,"unmod-NcK9NIjPUutCsz4mjOQQztbnwnE1sY3",15,false,"2024-02-08T23:59:59.999999999Z" | ||
2,"The empire strikes back","unmod-NcK9NIjPUutCsz4mjOQQztbnwnE1sY3",false,"2024-02-09T00:00:00.000000000Z" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't it be converted to ns?
But also, doesn't
std::chrono::nanoseconds(request->utc_delete_before().nanos())
already give you what you need? Does it just give you the fractional seconds part?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adding seconds to nanoseconds gets seconds converted automatically (docs)
nanos()
only contain the fractional part -- proto docs.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A bit too magical for me, but I guess I'm not on the C++ design committee :-)
You got to love this radically opposite approach between C++ stdlib that makes it as nice and expressive as possible vs the proto which complicate things and wants to split seconds and ns!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was actually grateful that for once C++ did not make me invent every basic utility from first principles, but yeah, it's not super intuitive either.
And I have no idea what proto designers were thinking here. I guess it's space saving for usecases where only second granularity is needed? But who uses second granularity anymore -- it's not 1999.