Skip to content

Commit

Permalink
working for positive decimal values
Browse files Browse the repository at this point in the history
  • Loading branch information
WillAyd committed Nov 29, 2023
1 parent 078de30 commit bf8ed7b
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 33 deletions.
84 changes: 60 additions & 24 deletions c/driver/postgresql/postgres_copy_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -1231,63 +1231,63 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter {
ArrowArrayViewGetDecimalUnsafe(array_view_, index, &decimal);
constexpr uint16_t kNumericPos = 0x0000;
constexpr uint16_t kNumericNeg = 0x4000;
constexpr int64_t kNBase = 10000;
// Number of decimal digits per Postgres digit
constexpr int kDecDigits = 4;

// This assumes that we are dealing with Decimal128
// more work required for 256 support
uint8_t bytes_tmp[32];
ArrowDecimalGetBytes(&decimal, bytes_tmp);
__int128 tmp;
std::memcpy(&tmp, bytes_tmp, sizeof(tmp));
uint64_t tmp[2];
std::memcpy(tmp, bytes_tmp, sizeof(tmp));

// TODO: is -INT64_MIN possible? If so how do we handle?
unsigned __int128 value = tmp < 0 ? -tmp : tmp;
std::vector<int16_t> pg_digits;

int16_t weight = -(scale / kDecDigits);
int16_t dscale = scale;
constexpr size_t nloops = (precision + kDecDigits - 1) / kDecDigits;
bool seen_decimal = scale == 0;
bool truncating_trailing_zeros = true;

for (size_t i = 0; i < nloops; i++) {
const unsigned __int128 rem = value % kNBase;
// TODO: postgres seems to pack records to the left of a decimal place
// internally, so 1000000.0 would be sent as one digit of 100 with
// a weight of 1 (there are weight + 1 pg digits to the left of a decimal place)
// Here we still send two digits of 100 and 0000
if (rem == 0) {
const std::string decimal_string = DecimalBytesToString(tmp, 2);
const std::string_view vw = decimal_string;
int digits_remaining = vw.size();
do {
const int start_pos = digits_remaining < kDecDigits ?
0 : digits_remaining - kDecDigits;
// TODO: would be great to use a string_view here but wasn't sure
// how to make that work with stoi
const size_t len = digits_remaining < 4 ? digits_remaining : kDecDigits;
std::string substr{vw.substr(start_pos, len)};
//size_t ndigits; TODO: maybe should use ndigits output
int16_t val = static_cast<int16_t>(std::stoi(substr.data()));

if (val == 0) {
if (!seen_decimal && truncating_trailing_zeros) {
dscale -= kDecDigits;
}
} else {
pg_digits.insert(pg_digits.begin(), rem);
pg_digits.insert(pg_digits.begin(), val);
if (!seen_decimal && truncating_trailing_zeros) {
if (rem % 1000 == 0) {
if (val % 1000 == 0) {
dscale -= 3;
} else if (rem % 100 == 0) {
} else if (val % 100 == 0) {
dscale -= 2;
} else if (rem % 10 == 0) {
} else if (val % 10 == 0) {
dscale -= 1;
}
}
truncating_trailing_zeros = false;
}

// TODO: how does pg deal with words when integer and decimal part are sent
// in same word?
value /= kNBase;
if (value == 0) {
digits_remaining -= kDecDigits;
if (digits_remaining <= 0) {
break;
}
weight++;

if (i >= scale / kDecDigits - 1) {
if (start_pos <= static_cast<int>(vw.size()) - scale) {
seen_decimal = true;
}
}
} while (true);

int16_t ndigits = pg_digits.size();
const int16_t sign = ArrowDecimalSign(&decimal) > 0 ? kNumericPos : kNumericNeg;
Expand All @@ -1310,6 +1310,42 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter {

return ADBC_STATUS_OK;
}

private:
std::string DecimalBytesToString(const uint64_t* decimal_bytes, size_t size) {
// Basic approach adopted from https://stackoverflow.com/a/8023862/621736
// This currently only works with decimal128
char s[38];
uint64_t buf[2];

std::memset(s, '0', sizeof(s) - 1);
s[sizeof(s) - 1] = '\0';

std::memcpy(buf, decimal_bytes, sizeof(buf));

for (size_t i = 0; i < 128; i++) {
int carry;

carry = (buf[1] >= 0x7FFFFFFFFFFFFFFF);
buf[1] = ((buf[1] << 1) & 0xFFFFFFFFFFFFFFFF) + (buf[0] >= 0x7FFFFFFFFFFFFFFF);
buf[0] = ((buf[0] << 1) & 0xFFFFFFFFFFFFFFFF);

for (int j = sizeof(s) - 2; j>= 0; j--) {
s[j] += s[j] - '0' + carry;
carry = (s[j] > '9');
if (carry) {
s[j] -= 10;
}
}
}

char* p = s;
while ((p[0] == '0') && (p < &s[sizeof(s) - 2])) {
p++;
}

return std::string{p};
}
};

template <enum ArrowTimeUnit TU>
Expand Down
13 changes: 4 additions & 9 deletions c/validation/adbc_validation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1557,7 +1557,7 @@ void StatementTest::TestSqlIngestDecimal128() {
struct ArrowDecimal decimal6;

ArrowDecimalInit(&decimal1, size, 38, 8);
ArrowDecimalSetInt(&decimal1, -12345600000);
ArrowDecimalSetInt(&decimal1, 12345600000);
ArrowDecimalInit(&decimal2, size, 38, 8);
ArrowDecimalSetInt(&decimal2, 1234);
ArrowDecimalInit(&decimal3, size, 38, 8);
Expand All @@ -1567,16 +1567,11 @@ void StatementTest::TestSqlIngestDecimal128() {
ArrowDecimalInit(&decimal5, size, 38, 8);
ArrowDecimalSetInt(&decimal5, 100000000000000);
ArrowDecimalInit(&decimal6, size, 38, 8);
// 2342394230592.232349023094 in little endian
// 23423942305922323.49023094 in little endian
uint8_t le_data[16] = {
0x76, 0xbb, 0xc8, 0x2c, 0x1c, 0x2b, 0x18, 0x72,
0x05, 0xf0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00};
ArrowDecimalSetBytes(&decimal6, le_data);
//
const uint64_t large_decimal[2] = {1, 2345678901234567890};
uint8_t large_decimal_bytes[16];
std::memcpy(&large_decimal_bytes, large_decimal, sizeof(large_decimal_bytes));
ArrowDecimalSetBytes(&decimal6, large_decimal_bytes);

const std::vector<std::optional<ArrowDecimal*>> values = {
std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5, &decimal6};
Expand Down Expand Up @@ -1629,8 +1624,8 @@ void StatementTest::TestSqlIngestDecimal128() {
//}

const std::vector<std::optional<std::string>> str_values = {
std::nullopt, "-123.456", "0.00001234", "1", "123.456", "1000000",
"2342394230592.232349023094"};
std::nullopt, "0.00001234", "1", "123.456", "123.456", "1000000",
"23423942305922323.49023094"};
ASSERT_NO_FATAL_FAILURE(
CompareArray<std::string>(reader.array_view->children[0], str_values));

Expand Down

0 comments on commit bf8ed7b

Please sign in to comment.