-
Notifications
You must be signed in to change notification settings - Fork 100
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(c/driver/postgresql): Support for writing DECIMAL types #1288
Changes from all commits
a24b046
dbddd8b
c5dfd05
a091bcd
61eb3cc
a76ab65
078de30
bf8ed7b
75cbd58
94bf657
4b49999
c046632
3957b6d
c5d19bb
ba44774
06b6349
bc19709
10e6e09
e9967a7
6a0d3c9
df7ba3e
ac733bf
0cf303f
59cdb22
759b0f1
9472ff5
0eba157
97c2d5c
443efed
5a93f9e
b629aca
f5100d0
7e7351d
dc1b735
cc252cd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -693,6 +693,72 @@ TEST(PostgresCopyUtilsTest, PostgresCopyReadNumeric) { | |
EXPECT_EQ(std::string(item.data, item.size_bytes), "inf"); | ||
} | ||
|
||
// This buffer is similar to the read variant above but removes special values | ||
// nan, ±inf as they are not supported via the Arrow Decimal types | ||
// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (NULL), (-123.456), | ||
// ('0.00001234'), (1.0000), (123.456), (1000000)) AS drvd(col)) | ||
// TO STDOUT WITH (FORMAT binary); | ||
static uint8_t kTestPgCopyNumericWrite[] = { | ||
0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, | ||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, | ||
0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x40, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, | ||
0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, | ||
0x08, 0x04, 0xd2, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, | ||
0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, | ||
0x0a, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff}; | ||
|
||
TEST(PostgresCopyUtilsTest, PostgresCopyWriteNumeric) { | ||
adbc_validation::Handle<struct ArrowSchema> schema; | ||
adbc_validation::Handle<struct ArrowArray> array; | ||
struct ArrowError na_error; | ||
constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; | ||
constexpr int32_t size = 128; | ||
constexpr int32_t precision = 38; | ||
constexpr int32_t scale = 8; | ||
|
||
struct ArrowDecimal decimal1; | ||
struct ArrowDecimal decimal2; | ||
struct ArrowDecimal decimal3; | ||
struct ArrowDecimal decimal4; | ||
struct ArrowDecimal decimal5; | ||
|
||
ArrowDecimalInit(&decimal1, size, 19, 8); | ||
ArrowDecimalSetInt(&decimal1, -12345600000); | ||
ArrowDecimalInit(&decimal2, size, 19, 8); | ||
ArrowDecimalSetInt(&decimal2, 1234); | ||
ArrowDecimalInit(&decimal3, size, 19, 8); | ||
ArrowDecimalSetInt(&decimal3, 100000000); | ||
ArrowDecimalInit(&decimal4, size, 19, 8); | ||
ArrowDecimalSetInt(&decimal4, 12345600000); | ||
ArrowDecimalInit(&decimal5, size, 19, 8); | ||
ArrowDecimalSetInt(&decimal5, 100000000000000); | ||
|
||
const std::vector<std::optional<ArrowDecimal*>> values = { | ||
std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5}; | ||
|
||
ArrowSchemaInit(&schema.value); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These 4 lines are essentially what There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've been meaning to overhaul this for a while now. (Or possibly give in and depend on arrow-cpp...) The current approach really only works for primitive types. |
||
ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); | ||
ASSERT_EQ(AdbcNsArrowSchemaSetTypeDecimal(schema.value.children[0], | ||
type, precision, scale), 0); | ||
ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0); | ||
ASSERT_EQ(adbc_validation::MakeBatch<ArrowDecimal*>(&schema.value, &array.value, | ||
&na_error, values), ADBC_STATUS_OK); | ||
|
||
PostgresCopyStreamWriteTester tester; | ||
ASSERT_EQ(tester.Init(&schema.value, &array.value), NANOARROW_OK); | ||
ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); | ||
|
||
const struct ArrowBuffer buf = tester.WriteBuffer(); | ||
// The last 2 bytes of a message can be transmitted via PQputCopyData | ||
// so no need to test those bytes from the Writer | ||
constexpr size_t buf_size = sizeof(kTestPgCopyNumericWrite) - 2; | ||
ASSERT_EQ(buf.size_bytes, buf_size); | ||
for (size_t i = 0; i < buf_size; i++) { | ||
ASSERT_EQ(buf.data[i], kTestPgCopyNumericWrite[i]) << " at position " << i; | ||
} | ||
} | ||
|
||
// COPY (SELECT CAST(col AS TIMESTAMP) FROM ( VALUES ('1900-01-01 12:34:56'), | ||
// ('2100-01-01 12:34:56'), (NULL)) AS drvd("col")) TO STDOUT WITH (FORMAT BINARY); | ||
static uint8_t kTestPgCopyTimestamp[] = { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
probably too small of an optimization to matter, but in principle you should be able to put an upper bound on the number of digits needed to represent an Arrow decimal, and then just stack-allocate?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yea that's true and actually how postgres does it internally.
https://github.com/postgres/postgres/blob/8680bae8463a0b213893ca6a1c5bb2c2530e823c/src/backend/utils/adt/numeric.c#L8026
If we wanted to stack allocate I guess would just expand that out to whatever is required to store up to 4 decimal words?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, decimal128 would be 38 digits and decimal256 would be 76
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK cool. Shouldn't be too hard to switch to that - just need to figure out how to handle once I get multi-word decimals supported