From 7730587accdd36ee4aa7389c6494d2fff0e2e96c Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Mon, 23 Sep 2024 13:02:19 +0200 Subject: [PATCH 1/4] Add support for unix epochs when parsing timestamps --- data/db/unix_timestamp.db | Bin 0 -> 12288 bytes src/sqlite_scanner.cpp | 30 +++++++++++++++++--- src/sqlite_stmt.cpp | 2 ++ test/sql/storage/attach_unix_timestamp.test | 20 +++++++++++++ 4 files changed, 48 insertions(+), 4 deletions(-) create mode 100644 data/db/unix_timestamp.db create mode 100644 test/sql/storage/attach_unix_timestamp.test diff --git a/data/db/unix_timestamp.db b/data/db/unix_timestamp.db new file mode 100644 index 0000000000000000000000000000000000000000..6311af3cce9f66a76d7224fe18bda1fb4a4271b3 GIT binary patch literal 12288 zcmeI#u}Z^09LMpyYa5{`=hm_3Ru>oPZj^vS7H#)()&zn{EA8PVu0D_8B)*8yv513< zZ{X2dO(8x&;CpbnU;at{IX>GBF5=83CY{a->&&s(7gFw?7$bye`PuMuGn5a_&5d7W zDgPH+;%M@+=btJa3;pCT3^=2f-E*<}fia-2AUa znR$ISUkd%Hm*qMc0R#|0009ILKmY**5I_I{1nMOaDAi2A4juxzTJOudU!VUMLVxH* sxlTp^0R#|0009ILKmY**5I_KddI+?Y+6muY&qt5%&u#gW&Gz2E0RbyjD*ylh literal 0 HcmV?d00001 diff --git a/src/sqlite_scanner.cpp b/src/sqlite_scanner.cpp index 391d821..70965ca 100644 --- a/src/sqlite_scanner.cpp +++ b/src/sqlite_scanner.cpp @@ -12,7 +12,7 @@ #include "duckdb/main/client_context.hpp" #include "duckdb/main/config.hpp" #include "duckdb/storage/storage_extension.hpp" - +#include "duckdb/common/operator/cast_operators.hpp" #include namespace duckdb { @@ -260,9 +260,31 @@ static void SqliteScan(ClientContext &context, TableFunctionInput &data, DataChu Date::FromCString((const char *)sqlite3_value_text(val), sqlite3_value_bytes(val)); break; case LogicalTypeId::TIMESTAMP: - stmt.CheckTypeMatches(bind_data, val, sqlite_column_type, SQLITE_TEXT, col_idx); - FlatVector::GetData(out_vec)[out_idx] = - Timestamp::FromCString((const char *)sqlite3_value_text(val), sqlite3_value_bytes(val)); + // SQLite does not have a timestamp type - but it has "conventions" + // See https://www.sqlite.org/lang_datefunc.html + // The conventions are: + // A text string that is an ISO 8601 date/time value + // The number of days including fractional days since -4713-11-24 12:00:00 + // The number of seconds including fractional seconds since 1970-01-01 00:00:00 + // for now we only support ISO-8601 and unix timestamps + if (sqlite_column_type == SQLITE_INTEGER) { + // unix timestamp + FlatVector::GetData(out_vec)[out_idx] = + Timestamp::FromEpochSeconds(sqlite3_value_int64(val)); + } else if (sqlite_column_type == SQLITE_FLOAT) { + int64_t timestamp_micros = + Cast::Operation(sqlite3_value_double(val) * 1000000.0); + FlatVector::GetData(out_vec)[out_idx] = + Timestamp::FromEpochMicroSeconds(timestamp_micros); + } else if (sqlite_column_type == SQLITE_TEXT) { + // ISO-8601 + FlatVector::GetData(out_vec)[out_idx] = + Timestamp::FromCString((const char *)sqlite3_value_text(val), sqlite3_value_bytes(val)); + } else { + throw NotImplementedException( + "Unimplemented SQLite type for column of type TIMESTAMP\n* SET sqlite_all_varchar=true to " + "load all columns as VARCHAR and skip type conversions"); + } break; case LogicalTypeId::BLOB: FlatVector::GetData(out_vec)[out_idx] = StringVector::AddStringOrBlob( diff --git a/src/sqlite_stmt.cpp b/src/sqlite_stmt.cpp index 474f7fc..6d76171 100644 --- a/src/sqlite_stmt.cpp +++ b/src/sqlite_stmt.cpp @@ -69,6 +69,7 @@ void SQLiteStatement::CheckTypeMatches(const SqliteBindData &bind_data, sqlite3_ auto message = "Invalid type in column \"" + column_name + "\": column was declared as " + SQLiteUtils::TypeToString(expected_type) + ", found \"" + value_as_text + "\" of type \"" + SQLiteUtils::TypeToString(sqlite_column_type) + "\" instead."; + message += "\n* SET sqlite_all_varchar=true to load all columns as VARCHAR and skip type conversions"; throw Exception(ExceptionType::MISMATCH_TYPE, message); } } @@ -79,6 +80,7 @@ void SQLiteStatement::CheckTypeIsFloatOrInteger(sqlite3_value *val, int sqlite_c auto value_as_text = string((const char *)sqlite3_value_text(val)); auto message = "Invalid type in column \"" + column_name + "\": expected float or integer, found \"" + value_as_text + "\" of type \"" + SQLiteUtils::TypeToString(sqlite_column_type) + "\" instead."; + message += "\n* SET sqlite_all_varchar=true to load all columns as VARCHAR and skip type conversions"; throw Exception(ExceptionType::MISMATCH_TYPE, message); } } diff --git a/test/sql/storage/attach_unix_timestamp.test b/test/sql/storage/attach_unix_timestamp.test new file mode 100644 index 0000000..0141d9e --- /dev/null +++ b/test/sql/storage/attach_unix_timestamp.test @@ -0,0 +1,20 @@ +# name: test/sql/storage/attach_unix_timestamp.test +# description: +# group: [sqlite_storage] + +require sqlite_scanner + +statement ok +ATTACH 'data/db/unix_timestamp.db' AS s (TYPE SQLITE, READONLY) + +query I +SELECT * FROM s.timestamp +---- +2024-09-23 08:06:20 +2024-09-23 08:06:22 + +query I +SELECT * FROM s.timestamp_fractional +---- +2024-09-23 08:06:20.5 +2024-09-23 08:06:22.123456 From bf970d3962b7c81b57285408bf73fd61cc35e87f Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Mon, 23 Sep 2024 13:06:44 +0200 Subject: [PATCH 2/4] Also support this for dates --- src/sqlite_scanner.cpp | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/src/sqlite_scanner.cpp b/src/sqlite_scanner.cpp index 70965ca..e5b50c3 100644 --- a/src/sqlite_scanner.cpp +++ b/src/sqlite_scanner.cpp @@ -204,6 +204,15 @@ static unique_ptr SqliteInitGlobalState(ClientContext return std::move(result); } +static timestamp_t ConvertTimestampInteger(sqlite3_value *val) { + return Timestamp::FromEpochSeconds(sqlite3_value_int64(val)); +} + +static timestamp_t ConvertTimestampFloat(sqlite3_value *val) { + int64_t timestamp_micros = Cast::Operation(sqlite3_value_double(val) * 1000000.0); + return Timestamp::FromEpochMicroSeconds(timestamp_micros); +} + static void SqliteScan(ClientContext &context, TableFunctionInput &data, DataChunk &output) { auto &state = data.local_state->Cast(); auto &gstate = data.global_state->Cast(); @@ -255,9 +264,20 @@ static void SqliteScan(ClientContext &context, TableFunctionInput &data, DataChu out_vec, (const char *)sqlite3_value_text(val), sqlite3_value_bytes(val)); break; case LogicalTypeId::DATE: - stmt.CheckTypeMatches(bind_data, val, sqlite_column_type, SQLITE_TEXT, col_idx); - FlatVector::GetData(out_vec)[out_idx] = - Date::FromCString((const char *)sqlite3_value_text(val), sqlite3_value_bytes(val)); + if (sqlite_column_type == SQLITE_INTEGER) { + // unix timestamp + FlatVector::GetData(out_vec)[out_idx] = + Timestamp::GetDate(ConvertTimestampInteger(val)); + } else if (sqlite_column_type == SQLITE_FLOAT) { + FlatVector::GetData(out_vec)[out_idx] = Timestamp::GetDate(ConvertTimestampFloat(val)); + } else if (sqlite_column_type == SQLITE_TEXT) { + FlatVector::GetData(out_vec)[out_idx] = + Date::FromCString((const char *)sqlite3_value_text(val), sqlite3_value_bytes(val)); + } else { + throw NotImplementedException( + "Unimplemented SQLite type for column of type DATE\n* SET sqlite_all_varchar=true to " + "load all columns as VARCHAR and skip type conversions"); + } break; case LogicalTypeId::TIMESTAMP: // SQLite does not have a timestamp type - but it has "conventions" @@ -269,13 +289,9 @@ static void SqliteScan(ClientContext &context, TableFunctionInput &data, DataChu // for now we only support ISO-8601 and unix timestamps if (sqlite_column_type == SQLITE_INTEGER) { // unix timestamp - FlatVector::GetData(out_vec)[out_idx] = - Timestamp::FromEpochSeconds(sqlite3_value_int64(val)); + FlatVector::GetData(out_vec)[out_idx] = ConvertTimestampInteger(val); } else if (sqlite_column_type == SQLITE_FLOAT) { - int64_t timestamp_micros = - Cast::Operation(sqlite3_value_double(val) * 1000000.0); - FlatVector::GetData(out_vec)[out_idx] = - Timestamp::FromEpochMicroSeconds(timestamp_micros); + FlatVector::GetData(out_vec)[out_idx] = ConvertTimestampFloat(val); } else if (sqlite_column_type == SQLITE_TEXT) { // ISO-8601 FlatVector::GetData(out_vec)[out_idx] = From 346752771eaf1b3bbdc7fef2029db86ecb442fd6 Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Mon, 23 Sep 2024 13:15:20 +0200 Subject: [PATCH 3/4] Add borked value --- data/db/unix_timestamp.db | Bin 12288 -> 16384 bytes test/sql/storage/attach_unix_timestamp.test | 5 +++++ 2 files changed, 5 insertions(+) diff --git a/data/db/unix_timestamp.db b/data/db/unix_timestamp.db index 6311af3cce9f66a76d7224fe18bda1fb4a4271b3..c8cc6e367e923d4284cd3f52c8fd2b9a4c4233c6 100644 GIT binary patch delta 191 zcmZojXlP)ZAT7wnz`(!)#4x}-QO8)Ai$O1M7cWqVnePz;|3-cvzDFAi3;3)unAyZF zEg5r55|eULOEPm)i%Sx73*z%jOXBm>;)@dV(s78gOuoo3CWWF6*tpuioz%|85F6ae(u B3Ml{p diff --git a/test/sql/storage/attach_unix_timestamp.test b/test/sql/storage/attach_unix_timestamp.test index 0141d9e..2b6a713 100644 --- a/test/sql/storage/attach_unix_timestamp.test +++ b/test/sql/storage/attach_unix_timestamp.test @@ -18,3 +18,8 @@ SELECT * FROM s.timestamp_fractional ---- 2024-09-23 08:06:20.5 2024-09-23 08:06:22.123456 + +statement error +SELECT * FROM s.timestamp_out_of_range +---- +out of range From 8c5248d3b0ae044ee069d2823c5f5d8a435273b3 Mon Sep 17 00:00:00 2001 From: Mark Raasveldt Date: Mon, 23 Sep 2024 16:03:52 +0200 Subject: [PATCH 4/4] v1.1.0 --- .github/workflows/MainDistributionPipeline.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 605224d..b65d2f6 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -14,7 +14,7 @@ concurrency: jobs: duckdb-stable-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.1.0 with: duckdb_version: main extension_name: sqlite_scanner @@ -22,7 +22,7 @@ jobs: duckdb-stable-deploy: name: Deploy extension binaries needs: duckdb-stable-build - uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@main + uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@v1.1.0 secrets: inherit with: duckdb_version: main