Skip to content

Commit

Permalink
Add timestamp precision in selective DWRF reader and truncate to mill…
Browse files Browse the repository at this point in the history
…iseconds by default (facebookincubator#10019)

Summary:

For timestamp column, Presto can only handle millisecond by default and we should align with the behavior by truncating the value out of reader.

Differential Revision: D58085206
  • Loading branch information
Yuhta authored and facebook-github-bot committed Jun 3, 2024
1 parent ab06a77 commit 9374be1
Show file tree
Hide file tree
Showing 9 changed files with 66 additions and 7 deletions.
11 changes: 11 additions & 0 deletions velox/dwio/common/Options.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "velox/dwio/common/ScanSpec.h"
#include "velox/dwio/common/UnitLoader.h"
#include "velox/dwio/common/encryption/Encryption.h"
#include "velox/type/Timestamp.h"

namespace facebook::velox::dwio::common {

Expand Down Expand Up @@ -157,6 +158,8 @@ class RowReaderOptions {
uint64_t skipRows_ = 0;
std::shared_ptr<UnitLoaderFactory> unitLoaderFactory_;

TimestampPrecision timestampPrecision_ = TimestampPrecision::kMilliseconds;

public:
RowReaderOptions() noexcept
: dataStart(0),
Expand Down Expand Up @@ -412,6 +415,14 @@ class RowReaderOptions {
size_t getDecodingParallelismFactor() const {
return decodingParallelismFactor_;
}

TimestampPrecision timestampPrecision() const {
return timestampPrecision_;
}

void setTimestampPrecision(TimestampPrecision precision) {
timestampPrecision_ = precision;
}
};

/**
Expand Down
1 change: 1 addition & 0 deletions velox/dwio/common/tests/utils/E2EFilterTestBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ class E2EFilterTestBase : public testing::Test {
dwio::common::RowReaderOptions& opts,
const std::shared_ptr<ScanSpec>& spec) {
opts.setScanSpec(spec);
opts.setTimestampPrecision(TimestampPrecision::kNanoseconds);
}

void readWithoutFilter(
Expand Down
14 changes: 13 additions & 1 deletion velox/dwio/dwrf/reader/SelectiveTimestampColumnReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ SelectiveTimestampColumnReader::SelectiveTimestampColumnReader(
const std::shared_ptr<const TypeWithId>& fileType,
DwrfParams& params,
common::ScanSpec& scanSpec)
: SelectiveColumnReader(fileType->type(), fileType, params, scanSpec) {
: SelectiveColumnReader(fileType->type(), fileType, params, scanSpec),
precision_(
params.stripeStreams().getRowReaderOptions().timestampPrecision()) {
EncodingKey encodingKey{fileType_->id(), params.flatMapContext().sequence};
auto& stripe = params.stripeStreams();
version_ = convertRleVersion(stripe.getEncoding(encodingKey).kind());
Expand Down Expand Up @@ -148,6 +150,16 @@ void SelectiveTimestampColumnReader::readHelper(
if (seconds < 0 && nanos != 0) {
seconds -= 1;
}
switch (precision_) {
case TimestampPrecision::kMilliseconds:
nanos = nanos / 1'000'000 * 1'000'000;
break;
case TimestampPrecision::kMicroseconds:
nanos = nanos / 1'000 * 1'000;
break;
case TimestampPrecision::kNanoseconds:
break;
}
rawTs[i] = Timestamp(seconds, nanos);
}
}
Expand Down
2 changes: 2 additions & 0 deletions velox/dwio/dwrf/reader/SelectiveTimestampColumnReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class SelectiveTimestampColumnReader
const RowSet rows,
const uint64_t* rawNulls);

const TimestampPrecision precision_;

std::unique_ptr<dwio::common::IntDecoder</*isSigned*/ true>> seconds_;
std::unique_ptr<dwio::common::IntDecoder</*isSigned*/ false>> nano_;

Expand Down
1 change: 1 addition & 0 deletions velox/dwio/dwrf/test/TestColumnReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ class ColumnReaderTestBase {
ColumnSelector cs(rowType, nodes, true);
auto options = RowReaderOptions();
options.setReturnFlatVector(returnFlatVector());
options.setTimestampPrecision(TimestampPrecision::kNanoseconds);

EXPECT_CALL(streams_, getColumnSelectorProxy())
.WillRepeatedly(testing::Return(&cs));
Expand Down
2 changes: 1 addition & 1 deletion velox/exec/fuzzer/AggregationFuzzerOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ struct AggregationFuzzerOptions {

/// Timestamp precision to use when generating inputs of type TIMESTAMP.
VectorFuzzer::Options::TimestampPrecision timestampPrecision{
VectorFuzzer::Options::TimestampPrecision::kNanoSeconds};
VectorFuzzer::Options::TimestampPrecision::kMilliSeconds};

/// A set of configuration properties to use when running query plans.
/// Could be used to specify timezone or enable/disable settings that
Expand Down
2 changes: 2 additions & 0 deletions velox/exec/fuzzer/JoinFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ class JoinFuzzer {
opts.stringVariableLength = true;
opts.stringLength = 100;
opts.nullRatio = FLAGS_null_ratio;
opts.timestampPrecision =
VectorFuzzer::Options::TimestampPrecision::kMilliSeconds;
return opts;
}

Expand Down
28 changes: 28 additions & 0 deletions velox/exec/tests/TableScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,34 @@ TEST_F(TableScanTest, timestamp) {
"SELECT c0 FROM tmp WHERE c1 < timestamp'1970-01-01 01:30:00'");
}

TEST_F(TableScanTest, timestampPrecisionDefaultMillisecond) {
constexpr int kSize = 10;
auto vector = makeRowVector({
makeFlatVector<Timestamp>(
kSize, [](auto i) { return Timestamp(i, i * 1'001'001); }),
});
auto schema = asRowType(vector->type());
auto file = TempFilePath::create();
writeToFile(file->getPath(), {vector});
auto split = makeHiveConnectorSplit(file->getPath());

auto plan = PlanBuilder().tableScan(schema).planNode();
auto expected = makeRowVector({
makeFlatVector<Timestamp>(
kSize, [](auto i) { return Timestamp(i, i * 1'000'000); }),
});
AssertQueryBuilder(plan).split(split).assertResults(expected);

plan = PlanBuilder(pool_.get())
.tableScan(schema, {"c0 = timestamp '1970-01-01 00:00:01.001'"})
.planNode();
expected = makeRowVector({
makeFlatVector<Timestamp>(
1, [](auto) { return Timestamp(1, 1'000'000); }),
});
AssertQueryBuilder(plan).split(split).assertResults(expected);
}

DEBUG_ONLY_TEST_F(TableScanTest, timeLimitInGetOutput) {
// Create two different row vectors: with some nulls and with no nulls.
vector_size_t numRows = 100;
Expand Down
12 changes: 7 additions & 5 deletions velox/type/Timestamp.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ namespace date {
class time_zone;
}

enum class TimestampPrecision : int8_t {
kMilliseconds = 3, // 10^3 milliseconds are equal to one second.
kMicroseconds = 6, // 10^6 microseconds are equal to one second.
kNanoseconds = 9, // 10^9 nanoseconds are equal to one second.
};

struct TimestampToStringOptions {
enum class Precision : int8_t {
kMilliseconds = 3, // 10^3 milliseconds are equal to one second.
kMicroseconds = 6, // 10^6 microseconds are equal to one second.
kNanoseconds = 9, // 10^9 nanoseconds are equal to one second.
};
using Precision = TimestampPrecision;

Precision precision = Precision::kNanoseconds;

Expand Down

0 comments on commit 9374be1

Please sign in to comment.