Skip to content

Commit

Permalink
Add timestamp precision in DWRF reader and truncate to milliseconds b…
Browse files Browse the repository at this point in the history
…y default (facebookincubator#10019)

Summary:

For timestamp column, Presto can only handle millisecond by default and we should align with the behavior by truncating the value out of reader.

Differential Revision: D58085206
  • Loading branch information
Yuhta authored and facebook-github-bot committed Jun 3, 2024
1 parent 277d5c5 commit e2e3d03
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 7 deletions.
11 changes: 11 additions & 0 deletions velox/dwio/common/Options.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "velox/dwio/common/ScanSpec.h"
#include "velox/dwio/common/UnitLoader.h"
#include "velox/dwio/common/encryption/Encryption.h"
#include "velox/type/Timestamp.h"

namespace facebook::velox::dwio::common {

Expand Down Expand Up @@ -157,6 +158,8 @@ class RowReaderOptions {
uint64_t skipRows_ = 0;
std::shared_ptr<UnitLoaderFactory> unitLoaderFactory_;

TimestampPrecision timestampPrecision_ = TimestampPrecision::kMilliseconds;

public:
RowReaderOptions() noexcept
: dataStart(0),
Expand Down Expand Up @@ -412,6 +415,14 @@ class RowReaderOptions {
size_t getDecodingParallelismFactor() const {
return decodingParallelismFactor_;
}

TimestampPrecision timestampPrecision() const {
return timestampPrecision_;
}

void setTimestampPrecision(TimestampPrecision precision) {
timestampPrecision_ = precision;
}
};

/**
Expand Down
1 change: 1 addition & 0 deletions velox/dwio/common/tests/utils/E2EFilterTestBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ class E2EFilterTestBase : public testing::Test {
dwio::common::RowReaderOptions& opts,
const std::shared_ptr<ScanSpec>& spec) {
opts.setScanSpec(spec);
opts.setTimestampPrecision(TimestampPrecision::kNanoseconds);
}

void readWithoutFilter(
Expand Down
14 changes: 13 additions & 1 deletion velox/dwio/dwrf/reader/SelectiveTimestampColumnReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ SelectiveTimestampColumnReader::SelectiveTimestampColumnReader(
const std::shared_ptr<const TypeWithId>& fileType,
DwrfParams& params,
common::ScanSpec& scanSpec)
: SelectiveColumnReader(fileType->type(), fileType, params, scanSpec) {
: SelectiveColumnReader(fileType->type(), fileType, params, scanSpec),
precision_(
params.stripeStreams().getRowReaderOptions().timestampPrecision()) {
EncodingKey encodingKey{fileType_->id(), params.flatMapContext().sequence};
auto& stripe = params.stripeStreams();
version_ = convertRleVersion(stripe.getEncoding(encodingKey).kind());
Expand Down Expand Up @@ -148,6 +150,16 @@ void SelectiveTimestampColumnReader::readHelper(
if (seconds < 0 && nanos != 0) {
seconds -= 1;
}
switch (precision_) {
case TimestampPrecision::kMilliseconds:
nanos = nanos / 1'000'000 * 1'000'000;
break;
case TimestampPrecision::kMicroseconds:
nanos = nanos / 1'000 * 1'000;
break;
case TimestampPrecision::kNanoseconds:
break;
}
rawTs[i] = Timestamp(seconds, nanos);
}
}
Expand Down
2 changes: 2 additions & 0 deletions velox/dwio/dwrf/reader/SelectiveTimestampColumnReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class SelectiveTimestampColumnReader
const RowSet rows,
const uint64_t* rawNulls);

const TimestampPrecision precision_;

std::unique_ptr<dwio::common::IntDecoder</*isSigned*/ true>> seconds_;
std::unique_ptr<dwio::common::IntDecoder</*isSigned*/ false>> nano_;

Expand Down
2 changes: 1 addition & 1 deletion velox/exec/fuzzer/AggregationFuzzerOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ struct AggregationFuzzerOptions {

/// Timestamp precision to use when generating inputs of type TIMESTAMP.
VectorFuzzer::Options::TimestampPrecision timestampPrecision{
VectorFuzzer::Options::TimestampPrecision::kNanoSeconds};
VectorFuzzer::Options::TimestampPrecision::kMilliSeconds};

/// A set of configuration properties to use when running query plans.
/// Could be used to specify timezone or enable/disable settings that
Expand Down
28 changes: 28 additions & 0 deletions velox/exec/tests/TableScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,34 @@ TEST_F(TableScanTest, timestamp) {
"SELECT c0 FROM tmp WHERE c1 < timestamp'1970-01-01 01:30:00'");
}

TEST_F(TableScanTest, timestampPrecisionDefaultMillisecond) {
constexpr int kSize = 10;
auto vector = makeRowVector({
makeFlatVector<Timestamp>(
kSize, [](auto i) { return Timestamp(i, i * 1'001'001); }),
});
auto schema = asRowType(vector->type());
auto file = TempFilePath::create();
writeToFile(file->getPath(), {vector});
auto split = makeHiveConnectorSplit(file->getPath());

auto plan = PlanBuilder().tableScan(schema).planNode();
auto expected = makeRowVector({
makeFlatVector<Timestamp>(
kSize, [](auto i) { return Timestamp(i, i * 1'000'000); }),
});
AssertQueryBuilder(plan).split(split).assertResults(expected);

plan = PlanBuilder(pool_.get())
.tableScan(schema, {"c0 = timestamp '1970-01-01 00:00:01.001'"})
.planNode();
expected = makeRowVector({
makeFlatVector<Timestamp>(
1, [](auto) { return Timestamp(1, 1'000'000); }),
});
AssertQueryBuilder(plan).split(split).assertResults(expected);
}

DEBUG_ONLY_TEST_F(TableScanTest, timeLimitInGetOutput) {
// Create two different row vectors: with some nulls and with no nulls.
vector_size_t numRows = 100;
Expand Down
12 changes: 7 additions & 5 deletions velox/type/Timestamp.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ namespace date {
class time_zone;
}

enum class TimestampPrecision : int8_t {
kMilliseconds = 3, // 10^3 milliseconds are equal to one second.
kMicroseconds = 6, // 10^6 microseconds are equal to one second.
kNanoseconds = 9, // 10^9 nanoseconds are equal to one second.
};

struct TimestampToStringOptions {
enum class Precision : int8_t {
kMilliseconds = 3, // 10^3 milliseconds are equal to one second.
kMicroseconds = 6, // 10^6 microseconds are equal to one second.
kNanoseconds = 9, // 10^9 nanoseconds are equal to one second.
};
using Precision = TimestampPrecision;

Precision precision = Precision::kNanoseconds;

Expand Down

0 comments on commit e2e3d03

Please sign in to comment.