From 9da75985f28e71591f7411192f29fd9a2839064d Mon Sep 17 00:00:00 2001 From: Deepak Majeti Date: Fri, 25 Oct 2024 11:22:28 -0400 Subject: [PATCH] more cleanup --- .../gcs/tests/GCSFileSystemTest.cpp | 85 ++++++------------- .../gcs/tests/GCSInsertTest.cpp | 27 ++---- .../tests/{GcsTestbench.h => GCSTestbench.h} | 35 +++++--- .../s3fs/tests/S3MultipleEndpointsTest.cpp | 2 +- 4 files changed, 58 insertions(+), 91 deletions(-) rename velox/connectors/hive/storage_adapters/gcs/tests/{GcsTestbench.h => GCSTestbench.h} (79%) diff --git a/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp b/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp index cd52f927aeb60..0900f9502ae97 100644 --- a/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp +++ b/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp @@ -16,10 +16,9 @@ #include "velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.h" #include "velox/common/base/tests/GTestUtils.h" -#include "velox/common/config/Config.h" #include "velox/common/file/File.h" #include "velox/connectors/hive/storage_adapters/gcs/GCSUtil.h" -#include "velox/connectors/hive/storage_adapters/gcs/tests/GcsTestbench.h" +#include "velox/connectors/hive/storage_adapters/gcs/tests/GCSTestbench.h" #include "velox/exec/tests/utils/TempFilePath.h" #include "gtest/gtest.h" @@ -27,33 +26,20 @@ namespace facebook::velox::filesystems { namespace { class GCSFileSystemTest : public testing::Test { - protected: - static void SetUpTestSuite() { - if (testbench_ == nullptr) { - testbench_ = std::make_shared(); - testbench_->bootstrap(); - } + public: + void SetUp() { + testbench_ = std::make_shared(); + testbench_->bootstrap(); } - std::shared_ptr testGcsOptions() const { - std::unordered_map configOverride = {}; - - configOverride["hive.gcs.scheme"] = "http"; - configOverride["hive.gcs.endpoint"] = "localhost:" + testbench_->port(); - return std::make_shared( - std::move(configOverride)); - } - - static std::shared_ptr testbench_; + std::shared_ptr testbench_; }; -std::shared_ptr GCSFileSystemTest::testbench_ = nullptr; - TEST_F(GCSFileSystemTest, readFile) { const auto gcsFile = gcsURI( testbench_->preexistingBucketName(), testbench_->preexistingObjectName()); - filesystems::GCSFileSystem gcfs(testGcsOptions()); + filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); auto readFile = gcfs.openFileForRead(gcsFile); std::int64_t size = readFile->size(); @@ -90,7 +76,7 @@ TEST_F(GCSFileSystemTest, writeAndReadFile) { const std::string_view newFile = "readWriteFile.txt"; const auto gcsFile = gcsURI(testbench_->preexistingBucketName(), newFile); - filesystems::GCSFileSystem gcfs(testGcsOptions()); + filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); auto writeFile = gcfs.openFileForWrite(gcsFile); std::string dataContent = @@ -114,15 +100,11 @@ TEST_F(GCSFileSystemTest, writeAndReadFile) { std::int64_t size = readFile->size(); EXPECT_EQ(readFile->size(), contentSize); EXPECT_EQ(readFile->pread(0, size), dataContent); -} - -TEST_F(GCSFileSystemTest, openExistingFileForWrite) { - const std::string_view newFile = "readWriteFile.txt"; - const auto gcsFile = gcsURI(testbench_->preexistingBucketName(), newFile); - filesystems::GCSFileSystem gcfs(testGcsOptions()); - gcfs.initializeClient(); - VELOX_ASSERT_THROW(gcfs.openFileForWrite(gcsFile), "File already exists"); + // Opening an existing file for write must be an error. + filesystems::GCSFileSystem newGcfs(testbench_->hiveConfig()); + newGcfs.initializeClient(); + VELOX_ASSERT_THROW(newGcfs.openFileForWrite(gcsFile), "File already exists"); } TEST_F(GCSFileSystemTest, renameNotImplemented) { @@ -130,7 +112,7 @@ TEST_F(GCSFileSystemTest, renameNotImplemented) { const auto gcsExistingFile = gcsURI( testbench_->preexistingBucketName(), testbench_->preexistingObjectName()); const auto gcsNewFile = gcsURI(testbench_->preexistingBucketName(), file); - filesystems::GCSFileSystem gcfs(testGcsOptions()); + filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); gcfs.openFileForRead(gcsExistingFile); VELOX_ASSERT_THROW( @@ -141,7 +123,7 @@ TEST_F(GCSFileSystemTest, renameNotImplemented) { TEST_F(GCSFileSystemTest, mkdirNotImplemented) { const std::string_view dir = "newDirectory"; const auto gcsNewDirectory = gcsURI(testbench_->preexistingBucketName(), dir); - filesystems::GCSFileSystem gcfs(testGcsOptions()); + filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); VELOX_ASSERT_THROW( gcfs.mkdir(gcsNewDirectory), "mkdir for GCS not implemented"); @@ -150,7 +132,7 @@ TEST_F(GCSFileSystemTest, mkdirNotImplemented) { TEST_F(GCSFileSystemTest, rmdirNotImplemented) { const std::string_view dir = "Directory"; const auto gcsDirectory = gcsURI(testbench_->preexistingBucketName(), dir); - filesystems::GCSFileSystem gcfs(testGcsOptions()); + filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); VELOX_ASSERT_THROW(gcfs.rmdir(gcsDirectory), "rmdir for GCS not implemented"); } @@ -158,7 +140,7 @@ TEST_F(GCSFileSystemTest, rmdirNotImplemented) { TEST_F(GCSFileSystemTest, missingFile) { const std::string_view file = "newTest.txt"; const auto gcsFile = gcsURI(testbench_->preexistingBucketName(), file); - filesystems::GCSFileSystem gcfs(testGcsOptions()); + filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); VELOX_ASSERT_RUNTIME_THROW_CODE( gcfs.openFileForRead(gcsFile), @@ -167,7 +149,7 @@ TEST_F(GCSFileSystemTest, missingFile) { } TEST_F(GCSFileSystemTest, missingBucket) { - filesystems::GCSFileSystem gcfs(testGcsOptions()); + filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); const std::string_view gcsFile = "gs://dummy/foo.txt"; VELOX_ASSERT_RUNTIME_THROW_CODE( @@ -177,14 +159,12 @@ TEST_F(GCSFileSystemTest, missingBucket) { } TEST_F(GCSFileSystemTest, credentialsConfig) { - std::unordered_map configOverride = {}; - // credentials from arrow gcsfs test case // While this service account key has the correct format, it cannot be used // for authentication because the key has been deactivated on the server-side, // *and* the account(s) involved are deleted *and* they are not the accounts // or projects do not match its contents. - auto creds = R"""({ + const std::string_view kCreds = R"""({ "type": "service_account", "project_id": "foo-project", "private_key_id": "a1a111aa1111a11a11a11aa111a111a1a1111111", @@ -220,28 +200,19 @@ TEST_F(GCSFileSystemTest, credentialsConfig) { })"""; auto jsonFile = exec::test::TempFilePath::create(); std::ofstream credsOut(jsonFile->getPath()); - credsOut << creds; + credsOut << kCreds; credsOut.close(); - configOverride["hive.gcs.json-key-file-path"] = jsonFile->getPath(); - configOverride["hive.gcs.scheme"] = "http"; - configOverride["hive.gcs.endpoint"] = "localhost:" + testbench_->port(); - std::shared_ptr conf = - std::make_shared(std::move(configOverride)); - filesystems::GCSFileSystem gcfs(conf); + std::unordered_map configOverride = { + {"hive.gcs.json-key-file-path", jsonFile->getPath()}}; + auto hiveConfig = testbench_->hiveConfig(configOverride); + + filesystems::GCSFileSystem gcfs(hiveConfig); gcfs.initializeClient(); - try { - const std::string gcsFile = gcsURI( - testbench_->preexistingBucketName(), - testbench_->preexistingObjectName()); - gcfs.openFileForRead(gcsFile); - FAIL() << "Expected VeloxException"; - } catch (VeloxException const& err) { - EXPECT_THAT( - err.message(), testing::HasSubstr("gs://test1-gcs/test-object-name")); - EXPECT_THAT( - err.message(), testing::HasSubstr("Invalid ServiceAccountCredentials")); - } + const auto gcsFile = gcsURI( + testbench_->preexistingBucketName(), testbench_->preexistingObjectName()); + VELOX_ASSERT_THROW( + gcfs.openFileForRead(gcsFile), "Invalid ServiceAccountCredentials"); } } // namespace } // namespace facebook::velox::filesystems diff --git a/velox/connectors/hive/storage_adapters/gcs/tests/GCSInsertTest.cpp b/velox/connectors/hive/storage_adapters/gcs/tests/GCSInsertTest.cpp index f9db097b100bc..ef34e7c2f76c1 100644 --- a/velox/connectors/hive/storage_adapters/gcs/tests/GCSInsertTest.cpp +++ b/velox/connectors/hive/storage_adapters/gcs/tests/GCSInsertTest.cpp @@ -18,7 +18,7 @@ #include #include "velox/connectors/hive/storage_adapters/gcs/RegisterGCSFileSystem.h" -#include "velox/connectors/hive/storage_adapters/gcs/tests/GcsTestbench.h" +#include "velox/connectors/hive/storage_adapters/gcs/tests/GCSTestbench.h" #include "velox/connectors/hive/storage_adapters/test_common/InsertTest.h" using namespace facebook::velox::exec::test; @@ -31,20 +31,20 @@ class GCSInsertTest : public testing::Test, public test::InsertTest { static void SetUpTestSuite() { registerGCSFileSystem(); memory::MemoryManager::testingSetInstance({}); - if (testbench_ == nullptr) { - testbench_ = std::make_shared(); - testbench_->bootstrap(); - } } void SetUp() override { connector::registerConnectorFactory( std::make_shared()); + testbench_ = std::make_shared(); + testbench_->bootstrap(); auto hiveConnector = connector::getConnectorFactory( connector::hive::HiveConnectorFactory::kHiveConnectorName) ->newConnector( - exec::test::kHiveConnectorId, gcsOptions(), ioExecutor_.get()); + exec::test::kHiveConnectorId, + testbench_->hiveConfig(), + ioExecutor_.get()); connector::registerConnector(hiveConnector); parquet::registerParquetReaderFactory(); parquet::registerParquetWriterFactory(); @@ -59,25 +59,14 @@ class GCSInsertTest : public testing::Test, public test::InsertTest { connector::unregisterConnector(exec::test::kHiveConnectorId); } - std::shared_ptr gcsOptions() const { - static std::unordered_map configOverride = {}; - - configOverride["hive.gcs.scheme"] = "http"; - configOverride["hive.gcs.endpoint"] = "localhost:" + testbench_->port(); - return std::make_shared( - std::move(configOverride)); - } - - static std::shared_ptr testbench_; + std::shared_ptr testbench_; std::unique_ptr ioExecutor_; }; - -std::shared_ptr GCSInsertTest::testbench_ = nullptr; } // namespace TEST_F(GCSInsertTest, gcsInsertTest) { const int64_t kExpectedRows = 1'000; - const auto gcsBucket = gcsURI(testbench_->preexistingBucketName()); + const auto gcsBucket = gcsURI(testbench_->preexistingBucketName(), ""); runInsertTest(gcsBucket, kExpectedRows, pool()); } } // namespace facebook::velox::filesystems diff --git a/velox/connectors/hive/storage_adapters/gcs/tests/GcsTestbench.h b/velox/connectors/hive/storage_adapters/gcs/tests/GCSTestbench.h similarity index 79% rename from velox/connectors/hive/storage_adapters/gcs/tests/GcsTestbench.h rename to velox/connectors/hive/storage_adapters/gcs/tests/GCSTestbench.h index 38e5bd561bf90..f9bcffcc8c465 100644 --- a/velox/connectors/hive/storage_adapters/gcs/tests/GcsTestbench.h +++ b/velox/connectors/hive/storage_adapters/gcs/tests/GCSTestbench.h @@ -21,6 +21,7 @@ #include #include "gtest/gtest.h" +#include "velox/common/config/Config.h" #include "velox/connectors/hive/storage_adapters/gcs/GCSUtil.h" #include "velox/exec/tests/utils/PortUtil.h" @@ -30,11 +31,11 @@ namespace gcs = google::cloud::storage; namespace facebook::velox::filesystems { -class GcsTestbench : public testing::Environment { +class GCSTestbench : public testing::Environment { public: - GcsTestbench() { - auto port = facebook::velox::exec::test::getFreePorts(1); - port_ = std::to_string(port[0]); + GCSTestbench() { + auto port = std::to_string(facebook::velox::exec::test::getFreePorts(1)[0]); + endpoint_ = "http://localhost:" + port; std::vector names{"python3", "python"}; // If the build script or application developer provides a value in the // PYTHON environment variable, then just use that. @@ -58,7 +59,7 @@ class GcsTestbench : public testing::Environment { "-m", "testbench", "--port", - port_, + port, group_); if (serverProcess_.valid() && serverProcess_.running()) break; @@ -71,7 +72,7 @@ class GcsTestbench : public testing::Environment { error_ = std::move(error); } - ~GcsTestbench() override { + ~GCSTestbench() override { // Brutal shutdown, kill the full process group because the GCS testbench // may launch additional children. group_.terminate(); @@ -80,12 +81,18 @@ class GcsTestbench : public testing::Environment { } } - const std::string& port() const { - return port_; - } + std::shared_ptr hiveConfig( + const std::unordered_map configOverride = {}) + const { + std::unordered_map config( + {{"hive.gcs.endpoint", endpoint_}}); + + // Update the default config map with the supplied configOverride map + for (const auto& [configName, configValue] : configOverride) { + config[configName] = configValue; + } - const std::string& error() const { - return error_; + return std::make_shared(std::move(config)); } std::string_view preexistingBucketName() { @@ -98,13 +105,13 @@ class GcsTestbench : public testing::Environment { void bootstrap() { ASSERT_THAT(this, ::testing::NotNull()); - ASSERT_THAT(this->error(), ::testing::IsEmpty()); + ASSERT_THAT(this->error_, ::testing::IsEmpty()); // Create a bucket and a small file in the testbench. This makes it easier // to bootstrap GcsFileSystem and its tests. auto client = gcs::Client( google::cloud::Options{} - .set("http://localhost:" + this->port()) + .set(this->endpoint_) .set(gc::MakeInsecureCredentials())); bucketName_ = "test1-gcs"; @@ -122,7 +129,7 @@ class GcsTestbench : public testing::Environment { } private: - std::string port_; + std::string endpoint_; bp::child serverProcess_; bp::group group_; std::string error_; diff --git a/velox/connectors/hive/storage_adapters/s3fs/tests/S3MultipleEndpointsTest.cpp b/velox/connectors/hive/storage_adapters/s3fs/tests/S3MultipleEndpointsTest.cpp index c0ce3af684048..5472905238077 100644 --- a/velox/connectors/hive/storage_adapters/s3fs/tests/S3MultipleEndpointsTest.cpp +++ b/velox/connectors/hive/storage_adapters/s3fs/tests/S3MultipleEndpointsTest.cpp @@ -33,7 +33,7 @@ using namespace facebook::velox::exec::test; namespace facebook::velox { namespace { -class S3MultipleEndpoints : public S3Test { +class S3MultipleEndpoints : public S3Test, public ::test::VectorTestBase { public: static void SetUpTestCase() { memory::MemoryManager::testingSetInstance({});