Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add boundary check for converting duckdb date/timestamps to PG dates/timestamps #653

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions include/pgduckdb/pgduckdb_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,21 @@ constexpr int32_t PGDUCKDB_DUCK_DATE_OFFSET = 10957;
constexpr int64_t PGDUCKDB_DUCK_TIMESTAMP_OFFSET =
static_cast<int64_t>(PGDUCKDB_DUCK_DATE_OFFSET) * static_cast<int64_t>(86400000000) /* USECS_PER_DAY */;

// Check from regress/sql/date.sql
#define PG_MINYEAR (-4713)
#define PG_MINMONTH (11)
#define PG_MINDAY (24)
#define PG_MAXYEAR (5874897)
#define PG_MAXMONTH (12)
#define PG_MAXDAY (31)

const duckdb::date_t PGDUCKDB_PG_MIN_DATE_VALUE = duckdb::Date::FromDate(PG_MINYEAR, PG_MINMONTH, PG_MINDAY);
const duckdb::date_t PGDUCKDB_PG_MAX_DATE_VALUE = duckdb::Date::FromDate(PG_MAXYEAR, PG_MAXMONTH, PG_MAXDAY);

// Check ValidTimestampOrTimestampTz() for the logic, These values are counted from 1/1/1970
constexpr int64_t PGDUCKDB_MAX_TIMESTAMP_VALUE = 9223371244800000000;
constexpr int64_t PGDUCKDB_MIN_TIMESTAMP_VALUE = -210866803200000000;

duckdb::LogicalType ConvertPostgresToDuckColumnType(Form_pg_attribute &attribute);
Oid GetPostgresDuckDBType(const duckdb::LogicalType &type);
int32_t GetPostgresDuckDBTypemod(const duckdb::LogicalType &type);
Expand Down
153 changes: 144 additions & 9 deletions src/pgduckdb_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,25 @@ struct DecimalConversionDouble {
}
};

static inline bool
ValidDate(duckdb::date_t dt) {
if ((dt < pgduckdb::PGDUCKDB_PG_MIN_DATE_VALUE || dt > pgduckdb::PGDUCKDB_PG_MAX_DATE_VALUE) &&
!(dt == duckdb::date_t::infinity() || dt == duckdb::date_t::ninfinity()))
return false;
return true;
Comment on lines +161 to +164
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of making this a huge condition lets split them up:

Suggested change
if ((dt < pgduckdb::PGDUCKDB_PG_MIN_DATE_VALUE || dt > pgduckdb::PGDUCKDB_PG_MAX_DATE_VALUE) &&
!(dt == duckdb::date_t::infinity() || dt == duckdb::date_t::ninfinity()))
return false;
return true;
if (dt == duckdb::date_t::infinity() || dt == duckdb::date_t::ninfinity())
return true;
return dt >= pgduckdb::PGDUCKDB_PG_MIN_DATE_VALUE && dt <= pgduckdb::PGDUCKDB_PG_MAX_DATE_VALUE) &&

}

static inline bool
ValidTimestampOrTimestampTz(int64_t timestamp) {
// PG TIMESTAMP RANGE = 4714-11-24 00:00:00 (BC) <-> 294276-12-31 23:59:59
// DUCK TIMESTAMP RANGE = 290308-12-22 00:00:00 (BC) <-> 294247-01-10 04:00:54
// Taking Intersection of the ranges
// MIN TIMESTAMP = 4714-11-24 00:00:00 (BC)
// MAX TIMESTAMP = 294246-12-31 23:59:59 , To keep it capped to a specific year.. also coincidently this is EXACTLY
// 30 years less than PG max value.
return timestamp >= pgduckdb::PGDUCKDB_MIN_TIMESTAMP_VALUE && timestamp < pgduckdb::PGDUCKDB_MAX_TIMESTAMP_VALUE;
}

static inline Datum
ConvertBoolDatum(const duckdb::Value &value) {
return value.GetValue<bool>();
Expand Down Expand Up @@ -216,6 +235,20 @@ ConvertBinaryDatum(const duckdb::Value &value) {
inline Datum
ConvertDateDatum(const duckdb::Value &value) {
duckdb::date_t date = value.GetValue<duckdb::date_t>();
if (!ValidDate(date))
throw duckdb::OutOfRangeException("The value should be between min and max value (%s <-> %s)",
duckdb::Date::ToString(pgduckdb::PGDUCKDB_PG_MIN_DATE_VALUE),
duckdb::Date::ToString(pgduckdb::PGDUCKDB_PG_MAX_DATE_VALUE));

// Special Handling for +/-infinity date values
if (!duckdb::Date::IsFinite(date)) {
// -infinity value is different for PG date
if (date == duckdb::date_t::ninfinity())
return date.days - 1;
else
return date.days;
}
Comment on lines +244 to +250
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of doing this -1 calculation let's use the constants provided by PG/DuckDB. Same for other places in the new code.

Suggested change
if (!duckdb::Date::IsFinite(date)) {
// -infinity value is different for PG date
if (date == duckdb::date_t::ninfinity())
return date.days - 1;
else
return date.days;
}
if (date == duckdb::date_t::ninfinity())
return DATEVAL_NOEND;
else if (date == duckdb::date_t::infinity())
return DATEVAL_NOBEGIN;


return date.days - pgduckdb::PGDUCKDB_DUCK_DATE_OFFSET;
}

Expand Down Expand Up @@ -255,6 +288,14 @@ ConvertTimestampDatum(const duckdb::Value &value) {
// Extract raw int64_t value of timestamp
int64_t rawValue = value.GetValue<int64_t>();

// Early Return for +/-Inf
if (!duckdb::Timestamp::IsFinite(static_cast<duckdb::timestamp_t>(rawValue))) {
if (rawValue == static_cast<int64_t>(duckdb::timestamp_t::ninfinity()))
return Int64GetDatum(rawValue - 1);
else
return Int64GetDatum(rawValue);
}

// Handle specific Timestamp unit(sec, ms, ns) types
switch (value.type().id()) {
case duckdb::LogicalType::TIMESTAMP_MS:
Expand All @@ -273,7 +314,36 @@ ConvertTimestampDatum(const duckdb::Value &value) {
// Since we don't want to handle anything here
break;
}
return rawValue - pgduckdb::PGDUCKDB_DUCK_TIMESTAMP_OFFSET;

if (!ValidTimestampOrTimestampTz(rawValue))
throw duckdb::OutOfRangeException(
"The Timestamp value should be between min and max value (%s <-> %s)",
duckdb::Timestamp::ToString(static_cast<duckdb::timestamp_t>(PGDUCKDB_MIN_TIMESTAMP_VALUE)),
duckdb::Timestamp::ToString(static_cast<duckdb::timestamp_t>(PGDUCKDB_MAX_TIMESTAMP_VALUE)));

return Int64GetDatum(rawValue - pgduckdb::PGDUCKDB_DUCK_TIMESTAMP_OFFSET);
}

inline Datum
ConvertTimestampTzDatum(const duckdb::Value &value) {
duckdb::timestamp_tz_t timestamp = value.GetValue<duckdb::timestamp_tz_t>();
int64_t rawValue = timestamp.value;

// Early Return for +/-Inf
if (!duckdb::Timestamp::IsFinite(static_cast<duckdb::timestamp_t>(rawValue))) {
if (rawValue == static_cast<int64_t>(duckdb::timestamp_t::ninfinity()))
return Int64GetDatum(rawValue - 1);
else
return Int64GetDatum(rawValue);
}

if (!ValidTimestampOrTimestampTz(rawValue))
throw duckdb::OutOfRangeException(
"The TimestampTz value should be between min and max value (%s <-> %s)",
duckdb::Timestamp::ToString(static_cast<duckdb::timestamp_t>(PGDUCKDB_MIN_TIMESTAMP_VALUE)),
duckdb::Timestamp::ToString(static_cast<duckdb::timestamp_t>(PGDUCKDB_MAX_TIMESTAMP_VALUE)));

return Int64GetDatum(rawValue - pgduckdb::PGDUCKDB_DUCK_TIMESTAMP_OFFSET);
}

inline Datum
Expand Down Expand Up @@ -876,8 +946,7 @@ ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col
break;
}
case TIMESTAMPTZOID: {
duckdb::timestamp_tz_t timestamp = value.GetValue<duckdb::timestamp_tz_t>();
slot->tts_values[col] = timestamp.value - pgduckdb::PGDUCKDB_DUCK_TIMESTAMP_OFFSET;
slot->tts_values[col] = ConvertTimestampTzDatum(value);
break;
}
case INTERVALOID: {
Expand Down Expand Up @@ -1278,6 +1347,75 @@ AppendJsonb(duckdb::Vector &result, Datum value, idx_t offset) {
data[offset] = duckdb::StringVector::AddString(result, str);
}

static void
AppendDate(duckdb::Vector &result, Datum value, idx_t offset) {
auto date = DatumGetDateADT(value);
bool is_inf_val = false;
if (date == DATEVAL_NOBEGIN) {
is_inf_val = true;
// -infinity value is different between PG and duck
date += 1;
}
if (date == DATEVAL_NOEND)
is_inf_val = true;

if (is_inf_val) {
Append<duckdb::date_t>(result, duckdb::date_t(static_cast<int32_t>(date)), offset);
} else
Append<duckdb::date_t>(result, duckdb::date_t(static_cast<int32_t>(date + PGDUCKDB_DUCK_DATE_OFFSET)), offset);
Comment on lines +1352 to +1365
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this function becomes a lot easier to follow if there are some early returns in there. Same for the two other functions below (AppendTimestamp and AppendTimestampTz).

Suggested change
auto date = DatumGetDateADT(value);
bool is_inf_val = false;
if (date == DATEVAL_NOBEGIN) {
is_inf_val = true;
// -infinity value is different between PG and duck
date += 1;
}
if (date == DATEVAL_NOEND)
is_inf_val = true;
if (is_inf_val) {
Append<duckdb::date_t>(result, duckdb::date_t(static_cast<int32_t>(date)), offset);
} else
Append<duckdb::date_t>(result, duckdb::date_t(static_cast<int32_t>(date + PGDUCKDB_DUCK_DATE_OFFSET)), offset);
auto date = DatumGetDateADT(value);
if (date == DATEVAL_NOBEGIN) {
// -infinity value is different between PG and duck
Append<duckdb::date_t>(result, duckdb::date_t::ninfinity()), offset);
return;
}
if (date == DATEVAL_NOEND) {
Append<duckdb::date_t>(result, duckdb::date_t::infinity()), offset);
return;
}
Append<duckdb::date_t>(result, duckdb::date_t(static_cast<int32_t>(date + PGDUCKDB_DUCK_DATE_OFFSET)), offset);

}

static void
AppendTimestamp(duckdb::Vector &result, Datum value, idx_t offset) {
int64_t timestamp = static_cast<int64_t>(DatumGetTimestamp(value));
bool is_inf_val = false;
if (timestamp == TIMESTAMP_MINUS_INFINITY) {
is_inf_val = true;
// -infinity value is different between PG and duck
timestamp += 1;
}
if (timestamp == TIMESTAMP_INFINITY)
is_inf_val = true;

// Bounds Check
if (!is_inf_val && !ValidTimestampOrTimestampTz(timestamp + PGDUCKDB_DUCK_TIMESTAMP_OFFSET))
throw duckdb::OutOfRangeException(
"The Timestamp value should be between min and max value (%s <-> %s)",
duckdb::Timestamp::ToString(static_cast<duckdb::timestamp_t>(PGDUCKDB_MIN_TIMESTAMP_VALUE)),
duckdb::Timestamp::ToString(static_cast<duckdb::timestamp_t>(PGDUCKDB_MAX_TIMESTAMP_VALUE)));

if (is_inf_val) {
Append<duckdb::timestamp_t>(result, duckdb::timestamp_t(timestamp), offset);
} else
Append<duckdb::timestamp_t>(result, duckdb::timestamp_t(timestamp + PGDUCKDB_DUCK_TIMESTAMP_OFFSET), offset);
}

static void
AppendTimestampTz(duckdb::Vector &result, Datum value, idx_t offset) {
int64_t timestamp = static_cast<int64_t>(DatumGetTimestamp(value));
bool is_inf_val = false;
if (timestamp == TIMESTAMP_MINUS_INFINITY) {
is_inf_val = true;
// -infinity value is different between PG and duck
timestamp += 1;
}
if (timestamp == TIMESTAMP_INFINITY)
is_inf_val = true;

// Bounds Check
if (!is_inf_val && !ValidTimestampOrTimestampTz(timestamp + PGDUCKDB_DUCK_TIMESTAMP_OFFSET))
throw duckdb::OutOfRangeException(
"The TimestampTz value should be between min and max value (%s <-> %s)",
duckdb::Timestamp::ToString(static_cast<duckdb::timestamp_tz_t>(PGDUCKDB_MIN_TIMESTAMP_VALUE)),
duckdb::Timestamp::ToString(static_cast<duckdb::timestamp_tz_t>(PGDUCKDB_MAX_TIMESTAMP_VALUE)));

if (is_inf_val) {
Append<duckdb::timestamp_tz_t>(result, duckdb::timestamp_tz_t(timestamp), offset);
} else
Append<duckdb::timestamp_tz_t>(result, duckdb::timestamp_tz_t(timestamp + PGDUCKDB_DUCK_TIMESTAMP_OFFSET),
offset);
}

template <class T, class OP = DecimalConversionInteger>
T
ConvertDecimal(const NumericVar &numeric) {
Expand Down Expand Up @@ -1434,19 +1572,17 @@ ConvertPostgresToDuckValue(Oid attr_type, Datum value, duckdb::Vector &result, i
break;
}
case duckdb::LogicalTypeId::DATE:
Append<duckdb::date_t>(result, duckdb::date_t(static_cast<int32_t>(value + PGDUCKDB_DUCK_DATE_OFFSET)), offset);
AppendDate(result, value, offset);
break;

case duckdb::LogicalTypeId::TIMESTAMP_SEC:
case duckdb::LogicalTypeId::TIMESTAMP_MS:
case duckdb::LogicalTypeId::TIMESTAMP_NS:
case duckdb::LogicalTypeId::TIMESTAMP:
Append<duckdb::timestamp_t>(
result, duckdb::timestamp_t(static_cast<int64_t>(value + PGDUCKDB_DUCK_TIMESTAMP_OFFSET)), offset);
AppendTimestamp(result, value, offset);
break;
case duckdb::LogicalTypeId::TIMESTAMP_TZ:
Append<duckdb::timestamp_tz_t>(
result, duckdb::timestamp_tz_t(static_cast<int64_t>(value + PGDUCKDB_DUCK_TIMESTAMP_OFFSET)), offset);
AppendTimestampTz(result, value, offset); // Timestamp and Timestamptz are basically same in PG
break;
case duckdb::LogicalTypeId::INTERVAL:
Append<duckdb::interval_t>(result, DatumGetInterval(value), offset);
Expand Down Expand Up @@ -1645,5 +1781,4 @@ FromNumeric(Numeric num) {
dest.buf = NULL; /* digits array is not palloc'd */
return dest;
}

} // namespace pgduckdb
54 changes: 54 additions & 0 deletions test/regression/expected/date.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
-- Test +/- inf values
CREATE TABLE t(a DATE);
INSERT INTO t VALUES('Infinity'), ('-Infinity');
-- PG Execution
SELECT * from t;
a
-----------
infinity
-infinity
(2 rows)

SELECT isfinite(a) FROM t;
isfinite
----------
f
f
(2 rows)

set duckdb.force_execution = true;
-- DuckDB execution
SELECT * from t;
a
-----------
infinity
-infinity
(2 rows)

SELECT isfinite(a) FROM t;
isfinite
----------
f
f
(2 rows)

-- Cleanup
set duckdb.force_execution = false;
DROP TABLE t;
-- Check upper and lower limits of date range
SELECT * FROM duckdb.query($$ SELECT '4714-11-24 (BC)'::date as date $$);
date
---------------
11-24-4714 BC
(1 row)

SELECT * FROM duckdb.query($$ SELECT '4714-11-23 (BC)'::date as date $$); -- out of range
ERROR: (PGDuckDB/Duckdb_ExecCustomScan_Cpp) Out of Range Error: The value should be between min and max value (4714-11-24 (BC) <-> 5874897-12-31)
SELECT * FROM duckdb.query($$ SELECT '5874897-12-31'::date as date $$);
date
---------------
12-31-5874897
(1 row)

SELECT * FROM duckdb.query($$ SELECT '5874898-01-01'::date as date $$); -- out of range
ERROR: (PGDuckDB/Duckdb_ExecCustomScan_Cpp) Out of Range Error: The value should be between min and max value (4714-11-24 (BC) <-> 5874897-12-31)
28 changes: 8 additions & 20 deletions test/regression/expected/test_all_types.out
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ SELECT * exclude(
fixed_array_of_int_list,
list_of_fixed_int_array,
nested_int_array, -- The nested array has different lengths, which is not possible in PG
date,
timestamp,
timestamp_s,
timestamp_ms,
timestamp_ns,
timestamp_tz
)
$$)
-[ RECORD 1 ]---+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Expand All @@ -37,14 +43,8 @@ usmallint | 0
uint | 0
ubigint | 0
varint | -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368
date | 07-14-5881580
time | 00:00:00
timestamp | Sun Jan 10 08:01:49.551616 294247
timestamp_s | Sun Jan 10 08:01:49.551616 294247
timestamp_ms | Sun Jan 10 08:01:49.551616 294247
timestamp_ns | Wed Sep 22 00:00:00 1677
time_tz | 00:00:00+15:59:59
timestamp_tz | Sun Jan 10 00:01:49.551616 294247 PST
float | -3.4028235e+38
double | -1.7976931348623157e+308
dec_4_1 | -999.9
Expand Down Expand Up @@ -72,14 +72,8 @@ usmallint | 65535
uint | 4294967295
ubigint | 18446744073709551615
varint | 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368
date | 07-10-5881580
time | 24:00:00
timestamp | Sun Jan 10 04:00:54.775806 294247
timestamp_s | Sun Jan 10 04:00:54 294247
timestamp_ms | Sun Jan 10 04:00:54.775 294247
timestamp_ns | Fri Apr 11 23:47:16.854775 2262
time_tz | 24:00:00-15:59:59
timestamp_tz | Sat Jan 09 20:00:54.775806 294247 PST
float | 3.4028235e+38
double | 1.7976931348623157e+308
dec_4_1 | 999.9
Expand All @@ -92,8 +86,8 @@ varchar | goo
blob | \000\000\000a
int_array | {42,999,NULL,NULL,-42}
double_array | {42,NaN,Infinity,-Infinity,NULL,-42}
date_array | {01-01-1970,07-11-5881580,07-13-5881580,NULL,05-12-2022}
timestamp_array | {"Thu Jan 01 00:00:00 1970","Sun Jan 10 04:00:54.775807 294247","Sun Jan 10 04:00:54.775809 294247",NULL,"Thu May 12 16:23:45 2022"}
date_array | {01-01-1970,infinity,-infinity,NULL,05-12-2022}
timestamp_array | {"Thu Jan 01 00:00:00 1970",infinity,-infinity,NULL,"Thu May 12 16:23:45 2022"}
varchar_array | {🦆🦆🦆🦆🦆🦆,goose,NULL,""}
-[ RECORD 3 ]---+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
bool |
Expand All @@ -107,14 +101,8 @@ usmallint |
uint |
ubigint |
varint |
date |
time |
timestamp |
timestamp_s |
timestamp_ms |
timestamp_ns |
time_tz |
timestamp_tz |
float |
double |
dec_4_1 |
Expand Down
Loading
Loading