Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Add T-Digest data structure
Browse files Browse the repository at this point in the history
Summary:
Add the T-Digest data structure implementation to be used in T-Digest
related functions.  Also extract the `getRandomSeed` test utility that is used
in multiple unit tests.

Differential Revision: D66435741
Yuhta authored and facebook-github-bot committed Nov 26, 2024
1 parent c395c55 commit 446757a
Showing 8 changed files with 939 additions and 24 deletions.
3 changes: 2 additions & 1 deletion velox/common/testutil/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

velox_add_library(velox_test_util ScopedTestTime.cpp TestValue.cpp)
velox_add_library(velox_test_util ScopedTestTime.cpp TestValue.cpp
RandomSeed.cpp)
velox_link_libraries(velox_test_util PUBLIC velox_exception)

if(${VELOX_BUILD_TESTING})
37 changes: 37 additions & 0 deletions velox/common/testutil/RandomSeed.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/common/testutil/RandomSeed.h"

#include <folly/Conv.h>
#include <folly/Random.h>
#include <glog/logging.h>

#include <cstdlib>

namespace facebook::velox::common::testutil {

unsigned getRandomSeed(unsigned fixedValue) {
const char* env = getenv("VELOX_TEST_USE_RANDOM_SEED");
if (!(env && folly::to<bool>(env))) {
return fixedValue;
}
auto seed = folly::Random::secureRand32();
LOG(INFO) << "Random seed: " << seed;
return seed;
}

} // namespace facebook::velox::common::testutil
27 changes: 27 additions & 0 deletions velox/common/testutil/RandomSeed.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

namespace facebook::velox::common::testutil {

/// Get a truely random seed and log it for future reproducing if
/// VELOX_TEST_USE_RANDOM_SEED is set. Otherwise return a fixed value so test
/// runs are deterministic. We use environment variable because `buck test`
/// does not allow pass in command line arguments.
unsigned getRandomSeed(unsigned fixedValue);

} // namespace facebook::velox::common::testutil
13 changes: 2 additions & 11 deletions velox/dwio/common/tests/utils/E2EFilterTestBase.h
Original file line number Diff line number Diff line change
@@ -16,6 +16,7 @@

#pragma once

#include "velox/common/testutil/RandomSeed.h"
#include "velox/common/time/Timer.h"
#include "velox/dwio/common/BufferedInput.h"
#include "velox/dwio/common/FileSink.h"
@@ -102,20 +103,10 @@ class E2EFilterTestBase : public testing::Test {
memory::MemoryManager::testingSetInstance({});
}

static bool useRandomSeed() {
// Check environment variable because `buck test` does not allow pass in
// command line arguments.
const char* env = getenv("VELOX_TEST_USE_RANDOM_SEED");
return !env ? false : folly::to<bool>(env);
}

void SetUp() override {
rootPool_ = memory::memoryManager()->addRootPool("E2EFilterTestBase");
leafPool_ = rootPool_->addLeafChild("E2EFilterTestBase");
if (useRandomSeed()) {
seed_ = folly::Random::secureRand32();
LOG(INFO) << "Random seed: " << seed_;
}
seed_ = common::testutil::getRandomSeed(seed_);
}

static bool typeKindSupportsValueHook(TypeKind kind) {
483 changes: 483 additions & 0 deletions velox/functions/lib/TDigest.h

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions velox/functions/lib/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -23,6 +23,7 @@ add_executable(
MapConcatTest.cpp
Re2FunctionsTest.cpp
RepeatTest.cpp
TDigestTest.cpp
Utf8Test.cpp
ZetaDistributionTest.cpp)

384 changes: 384 additions & 0 deletions velox/functions/lib/tests/TDigestTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,384 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/functions/lib/TDigest.h"
#include "velox/common/testutil/RandomSeed.h"

#include <folly/base64.h>
#include <gtest/gtest.h>

#include <random>

namespace facebook::velox::functions {
namespace {

constexpr double kSumError = 1e-4;
constexpr double kRankError = 0.01;

constexpr double kQuantiles[] = {
0.0001, 0.0200, 0.0300, 0.04000, 0.0500, 0.1000, 0.2000,
0.3000, 0.4000, 0.5000, 0.6000, 0.7000, 0.8000, 0.9000,
0.9500, 0.9600, 0.9700, 0.9800, 0.9999,
};

void checkQuantiles(
folly::Range<const double*> values,
const TDigest<>& digest) {
VELOX_CHECK(std::is_sorted(values.begin(), values.end()));
auto sum = std::accumulate(values.begin(), values.end(), 0.0);
ASSERT_NEAR(digest.sum(), sum, kSumError);
for (auto q : kQuantiles) {
auto v = digest.estimateQuantile(q);
ASSERT_LE(values.front(), v);
ASSERT_LE(v, values.back());
auto hi = std::lower_bound(values.begin(), values.end(), v);
auto lo = hi;
while (lo != values.begin() && v > *lo) {
--lo;
}
while (std::next(hi) != values.end() && *hi == *std::next(hi)) {
++hi;
}
auto l = (lo - values.begin()) / (values.size() - 1.0);
auto r = (hi - values.begin()) / (values.size() - 1.0);
if (q < l) {
ASSERT_NEAR(l, q, kRankError);
} else if (q > r) {
ASSERT_NEAR(r, q, kRankError);
}
}
}

#define CHECK_QUANTILES(_values, _digest) \
do { \
SCOPED_TRACE("CHECK_QUANTILES"); \
checkQuantiles((_values), (_digest)); \
} while (false)

std::string decodeBase64(std::string_view input) {
std::string decoded(folly::base64DecodedSize(input), '\0');
folly::base64Decode(input, decoded.data());
return decoded;
}

TEST(TDigestTest, addElementsInOrder) {
constexpr int N = 1e6;
TDigest digest;
ASSERT_EQ(digest.compression(), tdigest::kDefaultCompression);
std::vector<int16_t> positions;
for (int i = 0; i < N; ++i) {
digest.add(positions, i);
}
digest.compress(positions);
ASSERT_NEAR(digest.sum(), 1.0 * N * (N - 1) / 2, kSumError);
for (auto q : kQuantiles) {
auto v = digest.estimateQuantile(q);
ASSERT_NEAR(v / (N - 1), q, kRankError);
}
}

TEST(TDigestTest, addElementsRandomized) {
constexpr int N = 1e5;
double values[N];
TDigest digest;
std::vector<int16_t> positions;
std::default_random_engine gen(common::testutil::getRandomSeed(42));
std::uniform_real_distribution<> dist;
for (int i = 0; i < N; ++i) {
auto v = dist(gen);
digest.add(positions, v);
values[i] = v;
}
digest.compress(positions);
std::sort(std::begin(values), std::end(values));
CHECK_QUANTILES(folly::Range(values, N), digest);
}

TEST(TDigestTest, fewElements) {
TDigest digest;
std::vector<int16_t> positions;
digest.compress(positions);
ASSERT_EQ(digest.sum(), 0);
for (auto q : kQuantiles) {
ASSERT_TRUE(std::isnan(digest.estimateQuantile(q)));
}
digest.add(positions, 1.0);
digest.compress(positions);
ASSERT_EQ(digest.sum(), 1);
for (auto q : kQuantiles) {
ASSERT_EQ(digest.estimateQuantile(q), 1.0);
}
}

// IMPORTANT: All these errors cannot be caught by TRY in Presto, so we should
// not make them user errors. If in another engine these are catchable errors,
// throw user errors in the corresponding UDFs before they reach the TDigest
// implementation.
TEST(TDigestTest, invalid) {
TDigest digest;
ASSERT_THROW(digest.setCompression(NAN), VeloxRuntimeError);
ASSERT_THROW(digest.setCompression(0), VeloxRuntimeError);
ASSERT_THROW(digest.setCompression(1000.1), VeloxRuntimeError);
std::vector<int16_t> positions;
ASSERT_THROW(digest.add(positions, NAN), VeloxRuntimeError);
ASSERT_THROW(digest.add(positions, 1, 0), VeloxRuntimeError);
ASSERT_THROW(digest.estimateQuantile(1.1), VeloxRuntimeError);
}

TEST(TDigestTest, unalignedSerialization) {
constexpr int N = 1e4;
TDigest digest;
std::vector<int16_t> positions;
for (int i = 0; i < N; ++i) {
digest.add(positions, i);
}
digest.compress(positions);
ASSERT_NEAR(digest.sum(), 1.0 * N * (N - 1) / 2, kSumError);
std::string buf(1 + digest.serializedByteSize(), '\0');
for (int offset = 0; offset < 2; ++offset) {
SCOPED_TRACE(fmt::format("offset={}", offset));
digest.serialize(buf.data() + offset);
TDigest digest2;
digest2.mergeDeserialized(positions, buf.data() + offset);
digest2.compress(positions);
for (auto q : kQuantiles) {
auto v = digest2.estimateQuantile(q);
ASSERT_NEAR(v / (N - 1), q, kRankError);
}
}
}

TEST(TDigestTest, mergeEmpty) {
std::vector<int16_t> positions;
TDigest<> digests[2];
std::string buf(digests[1].serializedByteSize(), '\0');
digests[1].serialize(buf.data());
digests[0].mergeDeserialized(positions, buf.data());
digests[0].compress(positions);
ASSERT_EQ(digests[0].sum(), 0);
ASSERT_TRUE(std::isnan(digests[0].estimateQuantile(0.5)));
digests[0].add(positions, 1.0);
digests[0].compress(positions);
ASSERT_EQ(digests[0].sum(), 1);
ASSERT_EQ(digests[0].estimateQuantile(0.5), 1);
digests[0].mergeDeserialized(positions, buf.data());
digests[0].compress(positions);
ASSERT_EQ(digests[0].sum(), 1);
ASSERT_EQ(digests[0].estimateQuantile(0.5), 1);
}

TEST(TDigestTest, deserializeJava) {
std::vector<int16_t> positions;
{
SCOPED_TRACE(
"select to_base64(cast(tdigest_agg(x) as varbinary)) from (values (2.0)) as t(x)");
auto data = decodeBase64(
"AQAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAABZQAAAAAAAAPA/AQAAAAAAAAAAAPA/AAAAAAAAAEA=");
TDigest digest;
digest.mergeDeserialized(positions, data.data());
digest.compress(positions);
ASSERT_EQ(digest.compression(), tdigest::kDefaultCompression);
ASSERT_EQ(digest.sum(), 2.0);
for (auto q : kQuantiles) {
ASSERT_EQ(digest.estimateQuantile(q), 2.0);
}
}
{
SCOPED_TRACE(
"select to_base64(cast(tdigest_agg(x, w, c) as varbinary)) from (values (2.0, 2, 200.0)) as t(x, w, c)");
auto data = decodeBase64(
"AQAAAAAAAAAAQAAAAAAAAABAAAAAAAAAEEAAAAAAAABpQAAAAAAAAABAAQAAAAAAAAAAAABAAAAAAAAAAEA=");
TDigest digest;
digest.setCompression(200);
digest.mergeDeserialized(positions, data.data());
digest.compress(positions);
ASSERT_EQ(digest.compression(), 200);
ASSERT_EQ(digest.sum(), 4.0);
for (auto q : kQuantiles) {
ASSERT_EQ(digest.estimateQuantile(q), 2.0);
}
}
{
SCOPED_TRACE(
"select to_base64(cast(tdigest_agg(x) as varbinary)) from unnest(sequence(0, 1000)) as t(x)");
auto data = decodeBase64(
"AQAAAAAAAAAAAAAAAAAAQI9AAAAAAFCMHkEAAAAAAABZQAAAAAAASI9AMgAAAAAAAAAAAPA/AAAAAAAA8D8AAAAAAADwPwAAAAAAAPA/AAAAAAAA8D8AAAAAAADwPwAAAAAAAPA/AAAAAAAAAEAAAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAAUQAAAAAAAABxAAAAAAAAAIkAAAAAAAAAoQAAAAAAAADBAAAAAAAAANEAAAAAAAAA6QAAAAAAAgEBAAAAAAACAREAAAAAAAABJQAAAAAAAAE5AAAAAAABAUUAAAAAAAEBTQAAAAAAAgFRAAAAAAADAU0AAAAAAAABSQAAAAAAAAFBAAAAAAAAAS0AAAAAAAIBGQAAAAAAAAEJAAAAAAAAAPUAAAAAAAAA2QAAAAAAAADFAAAAAAAAAKkAAAAAAAAAkQAAAAAAAACBAAAAAAAAAGEAAAAAAAAAUQAAAAAAAAAhAAAAAAAAACEAAAAAAAAAAQAAAAAAAAPA/AAAAAAAA8D8AAAAAAADwPwAAAAAAAPA/AAAAAAAA8D8AAAAAAADwPwAAAAAAAPA/AAAAAAAA8D8AAAAAAAAAAAAAAAAAAPA/AAAAAAAAAEAAAAAAAAAIQAAAAAAAABBAAAAAAAAAFEAAAAAAAAAYQAAAAAAAAB5AAAAAAAAAI0AAAAAAAAAoQAAAAAAAAC9AAAAAAAAANEAAAAAAAAA6QAAAAAAAAEFAAAAAAABARkAAAAAAAEBNQAAAAAAAIFNAAAAAAADgWEAAAAAAACBgQAAAAAAAwGRAAAAAAABwakAAAAAAAKhwQAAAAAAAsHRAAAAAAABAeUAAAAAAADh+QAAAAAAAoIFAAAAAAAD8g0AAAAAAAByGQAAAAAAA9IdAAAAAAACAiUAAAAAAAMSKQAAAAAAAyItAAAAAAACUjEAAAAAAADCNQAAAAAAAqI1AAAAAAAAEjkAAAAAAAEyOQAAAAAAAhI5AAAAAAACwjkAAAAAAANCOQAAAAAAA6I5AAAAAAAD8jkAAAAAAAAiPQAAAAAAAEI9AAAAAAAAYj0AAAAAAACCPQAAAAAAAKI9AAAAAAAAwj0AAAAAAADiPQAAAAAAAQI9A");
TDigest digest;
digest.mergeDeserialized(positions, data.data());
digest.compress(positions);
ASSERT_NEAR(digest.sum(), 500500, kSumError);
for (auto q : kQuantiles) {
auto v = digest.estimateQuantile(q);
ASSERT_NEAR(v / 1000, q, kRankError);
}
}
{
SCOPED_TRACE(
"select to_base64(cast(tdigest_agg(x, w) as varbinary)) from (values (0.0, 1), (1.0, 100)) as t(x, w)");
auto data = decodeBase64(
"AQAAAAAAAAAAAAAAAAAAAPA/AAAAAAAAWUAAAAAAAABZQAAAAAAAQFlAAgAAAAAAAAAAAPA/AAAAAAAAWUAAAAAAAAAAAAAAAAAAAPA/");
TDigest digest;
digest.mergeDeserialized(positions, data.data());
digest.compress(positions);
double values[101];
values[0] = 0;
std::fill(values + 1, values + 101, 1);
CHECK_QUANTILES(folly::Range(values, 101), digest);
}
{
SCOPED_TRACE(
"select to_base64(cast(tdigest_agg(cast(x as double), 1001 - x) as varbinary)) from unnest(sequence(1, 1000)) as t(x)");
auto data = decodeBase64(
"AQAAAAAAAADwPwAAAAAAQI9AAAAAMIjto0EAAAAAAABZQAAAAABQjB5BLAAAAAAAAAAAQI9AAAAAAAA4j0AAAAAAADCPQAAAAAAAKI9AAAAAAAAcn0AAAAAAAEanQAAAAAAAUbNAAAAAAADhukAAAAAAAO3EQAAAAAAA4M9AAAAAAEDU10AAAAAAwEDhQAAAAABA4edAAAAAAEB57kAAAAAAwELxQAAAAABgiu1AAAAAAKAE50AAAAAA4LzgQAAAAADAANdAAAAAAABYz0AAAAAAAHzEQAAAAAAAqbpAAAAAAAD0sEAAAAAAAOClQAAAAAAA+JtAAAAAAADgkUAAAAAAAJCFQAAAAAAAEH1AAAAAAADAckAAAAAAAOBmQAAAAAAAQF9AAAAAAACAVEAAAAAAAIBJQAAAAAAAAEVAAAAAAAAAN0AAAAAAAAAzQAAAAAAAACBAAAAAAAAAHEAAAAAAAAAYQAAAAAAAABRAAAAAAAAAEEAAAAAAAAAIQAAAAAAAAABAAAAAAAAA8D8AAAAAAADwPwAAAAAAAABAAAAAAAAACEAAAAAAAAAQQCT88Sq+/xVAQP1fAVD/H0C9Xrrw9v4nQHwxjlL1/jFAoIdSJV/9OkDMzMzMzHxEQM6g0QNUOE9AN9zhXw23V0AB/5K759VhQNPiJszvSmpATJIkSZK8ckDxSWWlSwl5QMvdEa6CIH9A8tiKoOFUgkCutAVRS7qEQHt4eHh4v4ZAwq5fKvlsiECiNVqjNcqJQLdt27Zt44pAJEmSJEnEi0CGUZ0mB3mMQN/NI1SfCY1ACis2j1d6jUBTSimllNKNQHsUrkfhGo5Aul0rJzxTjkCQwvUoXH+OQPQxOB+Do45AsK+vr6+/jkB6nud5nteOQOpNb3rT645AvYbyGsr7jkAAAAAAAAiPQAAAAAAAEI9AAAAAAAAYj0AAAAAAACCPQAAAAAAAKI9AAAAAAAAwj0AAAAAAADiPQAAAAAAAQI9A");
TDigest digest;
digest.mergeDeserialized(positions, data.data());
digest.compress(positions);
std::vector<double> values;
values.reserve(500500);
for (int i = 1; i <= 1000; ++i) {
values.insert(values.end(), 1001 - i, i);
}
CHECK_QUANTILES(values, digest);
}
}

TEST(TDigestTest, mergeNoOverlap) {
constexpr int N = 1e5;
TDigest<> digests[2];
std::vector<int16_t> positions;
for (int i = 0; i < N; ++i) {
digests[0].add(positions, i);
digests[1].add(positions, i + N);
}
digests[1].compress(positions);
std::string buf(digests[1].serializedByteSize(), '\0');
digests[1].serialize(buf.data());
digests[0].mergeDeserialized(positions, buf.data());
digests[0].compress(positions);
ASSERT_NEAR(digests[0].sum(), N * (2.0 * N - 1), kSumError);
for (auto q : kQuantiles) {
auto v = digests[0].estimateQuantile(q);
ASSERT_NEAR(v / (2 * N - 1), q, kRankError);
}
}

TEST(TDigestTest, mergeOverlap) {
constexpr int N = 1e5;
TDigest digest;
std::vector<int16_t> positions;
std::vector<double> values;
values.reserve(2 * N);
for (int i = 0; i < N; ++i) {
digest.add(positions, i);
values.insert(values.end(), 2, i);
}
digest.compress(positions);
std::string buf(digest.serializedByteSize(), '\0');
digest.serialize(buf.data());
digest.mergeDeserialized(positions, buf.data());
digest.compress(positions);
CHECK_QUANTILES(values, digest);
}

TEST(TDigestTest, normalDistribution) {
constexpr int N = 1e5;
std::vector<int16_t> positions;
double values[N];
std::default_random_engine gen(common::testutil::getRandomSeed(42));
for (double mean : {0, 1000}) {
SCOPED_TRACE(fmt::format("mean={}", mean));
std::normal_distribution<> dist(mean, 1);
TDigest digest;
for (int i = 0; i < N; ++i) {
auto v = dist(gen);
digest.add(positions, v);
values[i] = v;
}
digest.compress(positions);
std::sort(values, values + N);
CHECK_QUANTILES(folly::Range(values, N), digest);
}
}

TEST(TDigestTest, addWeighed) {
std::vector<int16_t> positions;
TDigest digest;
std::vector<double> values;
values.reserve(5050);
for (int i = 1; i <= 100; ++i) {
digest.add(positions, i, i);
values.insert(values.end(), i, i);
}
digest.compress(positions);
CHECK_QUANTILES(values, digest);
}

TEST(TDigestTest, merge) {
std::vector<int16_t> positions;
std::default_random_engine gen(common::testutil::getRandomSeed(42));
std::vector<double> values;
std::string buf;
auto test = [&](int numDigests, int size, double mean, double stddev) {
SCOPED_TRACE(fmt::format(
"numDigests={} size={} mean={} stddev={}",
numDigests,
size,
mean,
stddev));
values.clear();
values.reserve(numDigests * size);
std::normal_distribution<> dist(mean, stddev);
TDigest digest;
for (int i = 0; i < numDigests; ++i) {
TDigest current;
for (int j = 0; j < size; ++j) {
auto v = dist(gen);
current.add(positions, v);
values.push_back(v);
}
current.compress(positions);
buf.resize(current.serializedByteSize());
current.serialize(buf.data());
digest.mergeDeserialized(positions, buf.data());
}
digest.compress(positions);
std::sort(std::begin(values), std::end(values));
CHECK_QUANTILES(values, digest);
};
test(2, 5e4, 0, 50);
test(100, 1000, 500, 20);
test(1e4, 10, 500, 20);
}

TEST(TDigestTest, infinity) {
std::vector<int16_t> positions;
TDigest digest;
digest.add(positions, 0.0);
digest.add(positions, INFINITY);
digest.add(positions, -INFINITY);
digest.compress(positions);
ASSERT_TRUE(std::isnan(digest.sum()));
ASSERT_EQ(digest.estimateQuantile(0), -INFINITY);
ASSERT_EQ(digest.estimateQuantile(0.3), -INFINITY);
ASSERT_EQ(digest.estimateQuantile(0.4), 0.0);
ASSERT_EQ(digest.estimateQuantile(0.5), 0.0);
ASSERT_EQ(digest.estimateQuantile(0.6), 0.0);
ASSERT_EQ(digest.estimateQuantile(0.7), INFINITY);
ASSERT_EQ(digest.estimateQuantile(1), INFINITY);
}

} // namespace
} // namespace facebook::velox::functions
15 changes: 3 additions & 12 deletions velox/type/tests/TimestampTest.cpp
Original file line number Diff line number Diff line change
@@ -18,6 +18,7 @@
#include <random>

#include "velox/common/base/tests/GTestUtils.h"
#include "velox/common/testutil/RandomSeed.h"
#include "velox/type/Timestamp.h"
#include "velox/type/tz/TimeZoneMap.h"

@@ -259,16 +260,6 @@ TEST(TimestampTest, toStringPrestoCastBehavior) {

namespace {

uint64_t randomSeed() {
if (const char* env = getenv("VELOX_TEST_USE_RANDOM_SEED")) {
auto seed = std::random_device{}();
LOG(INFO) << "Random seed: " << seed;
return seed;
} else {
return 42;
}
}

std::string toStringAlt(
const Timestamp& t,
TimestampToStringOptions::Precision precision) {
@@ -308,7 +299,7 @@ bool checkUtcToEpoch(int year, int mon, int mday, int hour, int min, int sec) {
} // namespace

TEST(TimestampTest, compareWithToStringAlt) {
std::default_random_engine gen(randomSeed());
std::default_random_engine gen(common::testutil::getRandomSeed(42));
std::uniform_int_distribution<int64_t> distSec(
Timestamp::kMinSeconds, Timestamp::kMaxSeconds);
std::uniform_int_distribution<uint64_t> distNano(0, Timestamp::kMaxNanos);
@@ -349,7 +340,7 @@ TEST(TimestampTest, utcToEpoch) {
}

TEST(TimestampTest, utcToEpochRandomInputs) {
std::default_random_engine gen(randomSeed());
std::default_random_engine gen(common::testutil::getRandomSeed(42));
std::uniform_int_distribution<int32_t> dist(INT32_MIN, INT32_MAX);
for (int i = 0; i < 10'000; ++i) {
checkUtcToEpoch(

0 comments on commit 446757a

Please sign in to comment.