Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Sep 26, 2024
1 parent 31137a6 commit 709e15e
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 36 deletions.
25 changes: 5 additions & 20 deletions velox/dwio/common/IntDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -453,27 +453,12 @@ inline T IntDecoder<isSigned>::readInt() {

template <bool isSigned>
inline int128_t IntDecoder<isSigned>::readInt96() {
int64_t offset = 0;
unsigned char ch;

// Read 8 unsigned bytes.
uint64_t nanos = 0;
for (uint32_t i = 0; i < 8; ++i) {
ch = readByte();
nanos |= (ch & BASE_256_MASK) << offset;
offset += 8;
int128_t result = 0;
for (int i = 0; i < 12; ++i) {
auto ch = readByte();
result |= static_cast<uint128_t>(ch & BASE_256_MASK) << (i * 8);
}

// Read 4 signed bytes.
int32_t days = 0;
offset = 0;
for (uint32_t i = 0; i < 4; ++i) {
ch = readByte();
days |= (ch & BASE_256_MASK) << offset;
offset += 8;
}
auto ts = Timestamp::fromDaysAndNanos(days, nanos);
return reinterpret_cast<int128_t&>(ts);
return result;
}

template <bool isSigned>
Expand Down
19 changes: 6 additions & 13 deletions velox/dwio/parquet/reader/PageReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ void PageReader::prepareDictionary(const PageHeader& pageHeader) {
break;
}
case thrift::Type::INT96: {
auto numVeloxBytes = dictionary_.numValues * sizeof(Timestamp);
auto numVeloxBytes = dictionary_.numValues * sizeof(int128_t);
dictionary_.values = AlignedBuffer::allocate<char>(numVeloxBytes, &pool_);
auto numBytes = dictionary_.numValues * sizeof(Int96Timestamp);
if (pageData_) {
Expand All @@ -392,23 +392,16 @@ void PageReader::prepareDictionary(const PageHeader& pageHeader) {
}
// Expand the Parquet type length values to Velox type length.
// We start from the end to allow in-place expansion.
auto values = dictionary_.values->asMutable<Timestamp>();
auto values = dictionary_.values->asMutable<int128_t>();
auto parquetValues = dictionary_.values->asMutable<char>();

for (auto i = dictionary_.numValues - 1; i >= 0; --i) {
// Convert the timestamp into seconds and nanos since the Unix epoch,
// 00:00:00.000000 on 1 January 1970.
int64_t nanos;
int128_t result = 0;
memcpy(
&nanos,
&result,
parquetValues + i * sizeof(Int96Timestamp),
sizeof(int64_t));
int32_t days;
memcpy(
&days,
parquetValues + i * sizeof(Int96Timestamp) + sizeof(int64_t),
sizeof(int32_t));
values[i] = Timestamp::fromDaysAndNanos(days, nanos);
sizeof(Int96Timestamp));
values[i] = result;
}
break;
}
Expand Down
8 changes: 6 additions & 2 deletions velox/dwio/parquet/reader/TimestampColumnReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,12 @@ class TimestampColumnReader : public IntegerColumnReader {
if (resultVector->isNullAt(i)) {
continue;
}
const auto timestamp = rawValues[i];
uint64_t nanos = timestamp.getNanos();
const int128_t encoded = reinterpret_cast<int128_t&>(rawValues[i]);
const int32_t days = static_cast<int32_t>(encoded >> 64);
uint64_t nanos = encoded & ((((1ULL << 63) - 1ULL) << 1) + 1);
const auto timestamp = Timestamp::fromDaysAndNanos(days, nanos);

nanos = timestamp.getNanos();
switch (timestampPrecision_) {
case TimestampPrecision::kMilliseconds:
nanos = nanos / 1'000'000 * 1'000'000;
Expand Down
4 changes: 3 additions & 1 deletion velox/type/Filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -1825,7 +1825,9 @@ class TimestampRange final : public Filter {
}

bool testInt128(int128_t value) const final {
const auto& ts = reinterpret_cast<const Timestamp&>(value);
const int32_t days = static_cast<int32_t>(value >> 64);
const uint64_t nanos = value & ((((1ULL << 63) - 1ULL) << 1) + 1);
const auto ts = Timestamp::fromDaysAndNanos(days, nanos);
return ts >= lower_ && ts <= upper_;
}

Expand Down

0 comments on commit 709e15e

Please sign in to comment.