From 731f492e4648fd73ef4ee378f38b79e86ca77b78 Mon Sep 17 00:00:00 2001 From: Deepak Majeti Date: Sat, 26 Oct 2024 07:29:16 -0400 Subject: [PATCH] address review comments and Gcs naming --- velox/benchmarks/filesystem/ReadBenchmark.cpp | 4 +- velox/connectors/hive/HiveConfig.cpp | 8 +- velox/connectors/hive/HiveConfig.h | 8 +- .../hive/storage_adapters/gcs/CMakeLists.txt | 4 +- .../{GCSFileSystem.cpp => GcsFileSystem.cpp} | 88 +++++++++---------- .../gcs/{GCSFileSystem.h => GcsFileSystem.h} | 4 +- .../gcs/{GCSUtil.cpp => GcsUtil.cpp} | 4 +- .../gcs/{GCSUtil.h => GcsUtil.h} | 24 ++--- ...leSystem.cpp => RegisterGcsFileSystem.cpp} | 20 ++--- ...CSFileSystem.h => RegisterGcsFileSystem.h} | 2 +- .../gcs/examples/CMakeLists.txt | 2 +- ...emExample.cpp => GcsFileSystemExample.cpp} | 4 +- .../storage_adapters/gcs/tests/CMakeLists.txt | 4 +- .../tests/{GCSTestbench.h => GcsEmulator.h} | 20 +++-- ...leSystemTest.cpp => GcsFileSystemTest.cpp} | 47 +++++----- .../{GCSInsertTest.cpp => GcsInsertTest.cpp} | 14 +-- .../{GCSUtilTest.cpp => GcsUtilTest.cpp} | 16 ++-- .../connectors/hive/tests/HiveConfigTest.cpp | 4 +- velox/tool/trace/QueryReplayer.cpp | 4 +- 19 files changed, 141 insertions(+), 140 deletions(-) rename velox/connectors/hive/storage_adapters/gcs/{GCSFileSystem.cpp => GcsFileSystem.cpp} (85%) rename velox/connectors/hive/storage_adapters/gcs/{GCSFileSystem.h => GcsFileSystem.h} (97%) rename velox/connectors/hive/storage_adapters/gcs/{GCSUtil.cpp => GcsUtil.cpp} (88%) rename velox/connectors/hive/storage_adapters/gcs/{GCSUtil.h => GcsUtil.h} (59%) rename velox/connectors/hive/storage_adapters/gcs/{RegisterGCSFileSystem.cpp => RegisterGcsFileSystem.cpp} (82%) rename velox/connectors/hive/storage_adapters/gcs/{RegisterGCSFileSystem.h => RegisterGcsFileSystem.h} (96%) rename velox/connectors/hive/storage_adapters/gcs/examples/{GCSFileSystemExample.cpp => GcsFileSystemExample.cpp} (94%) rename velox/connectors/hive/storage_adapters/gcs/tests/{GCSTestbench.h => GcsEmulator.h} (84%) rename velox/connectors/hive/storage_adapters/gcs/tests/{GCSFileSystemTest.cpp => GcsFileSystemTest.cpp} (87%) rename velox/connectors/hive/storage_adapters/gcs/tests/{GCSInsertTest.cpp => GcsInsertTest.cpp} (86%) rename velox/connectors/hive/storage_adapters/gcs/tests/{GCSUtilTest.cpp => GcsUtilTest.cpp} (69%) diff --git a/velox/benchmarks/filesystem/ReadBenchmark.cpp b/velox/benchmarks/filesystem/ReadBenchmark.cpp index f99639b8c9663..844f84c34ad60 100644 --- a/velox/benchmarks/filesystem/ReadBenchmark.cpp +++ b/velox/benchmarks/filesystem/ReadBenchmark.cpp @@ -18,7 +18,7 @@ #include "velox/common/config/Config.h" #include "velox/connectors/hive/storage_adapters/abfs/RegisterAbfsFileSystem.h" -#include "velox/connectors/hive/storage_adapters/gcs/RegisterGCSFileSystem.h" +#include "velox/connectors/hive/storage_adapters/gcs/RegisterGcsFileSystem.h" #include "velox/connectors/hive/storage_adapters/hdfs/RegisterHdfsFileSystem.h" #include "velox/connectors/hive/storage_adapters/s3fs/RegisterS3FileSystem.h" @@ -105,7 +105,7 @@ void ReadBenchmark::initialize() { } else { filesystems::registerLocalFileSystem(); filesystems::registerS3FileSystem(); - filesystems::registerGCSFileSystem(); + filesystems::registerGcsFileSystem(); filesystems::registerHdfsFileSystem(); filesystems::abfs::registerAbfsFileSystem(); std::shared_ptr config; diff --git a/velox/connectors/hive/HiveConfig.cpp b/velox/connectors/hive/HiveConfig.cpp index 837d18aeeb4bf..f93fbd760dc1f 100644 --- a/velox/connectors/hive/HiveConfig.cpp +++ b/velox/connectors/hive/HiveConfig.cpp @@ -136,20 +136,20 @@ std::optional HiveConfig::s3RetryMode() const { } std::string HiveConfig::gcsEndpoint() const { - return config_->get(kGCSEndpoint, std::string("")); + return config_->get(kGcsEndpoint, std::string("")); } std::string HiveConfig::gcsCredentialsPath() const { - return config_->get(kGCSCredentialsPath, std::string("")); + return config_->get(kGcsCredentialsPath, std::string("")); } std::optional HiveConfig::gcsMaxRetryCount() const { - return static_cast>(config_->get(kGCSMaxRetryCount)); + return static_cast>(config_->get(kGcsMaxRetryCount)); } std::optional HiveConfig::gcsMaxRetryTime() const { return static_cast>( - config_->get(kGCSMaxRetryTime)); + config_->get(kGcsMaxRetryTime)); } bool HiveConfig::isOrcUseColumnNames(const config::ConfigBase* session) const { diff --git a/velox/connectors/hive/HiveConfig.h b/velox/connectors/hive/HiveConfig.h index 800bf498ac0bc..b894fbc29c583 100644 --- a/velox/connectors/hive/HiveConfig.h +++ b/velox/connectors/hive/HiveConfig.h @@ -105,17 +105,17 @@ class HiveConfig { static constexpr const char* kS3RetryMode = "hive.s3.retry-mode"; /// The GCS storage endpoint server. - static constexpr const char* kGCSEndpoint = "hive.gcs.endpoint"; + static constexpr const char* kGcsEndpoint = "hive.gcs.endpoint"; /// The GCS service account configuration JSON key file. - static constexpr const char* kGCSCredentialsPath = + static constexpr const char* kGcsCredentialsPath = "hive.gcs.json-key-file-path"; /// The GCS maximum retry counter of transient errors. - static constexpr const char* kGCSMaxRetryCount = "hive.gcs.max-retry-count"; + static constexpr const char* kGcsMaxRetryCount = "hive.gcs.max-retry-count"; /// The GCS maximum time allowed to retry transient errors. - static constexpr const char* kGCSMaxRetryTime = "hive.gcs.max-retry-time"; + static constexpr const char* kGcsMaxRetryTime = "hive.gcs.max-retry-time"; /// Maps table field names to file field names using names, not indices. // TODO: remove hive_orc_use_column_names since it doesn't exist in presto, diff --git a/velox/connectors/hive/storage_adapters/gcs/CMakeLists.txt b/velox/connectors/hive/storage_adapters/gcs/CMakeLists.txt index 57ee434d50f43..e9f5f98520c04 100644 --- a/velox/connectors/hive/storage_adapters/gcs/CMakeLists.txt +++ b/velox/connectors/hive/storage_adapters/gcs/CMakeLists.txt @@ -14,10 +14,10 @@ # for generated headers -velox_add_library(velox_gcs RegisterGCSFileSystem.cpp) +velox_add_library(velox_gcs RegisterGcsFileSystem.cpp) if(VELOX_ENABLE_GCS) - velox_sources(velox_gcs PRIVATE GCSFileSystem.cpp GCSUtil.cpp) + velox_sources(velox_gcs PRIVATE GcsFileSystem.cpp GcsUtil.cpp) velox_link_libraries(velox_gcs velox_dwio_common Folly::folly google-cloud-cpp::storage) diff --git a/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp b/velox/connectors/hive/storage_adapters/gcs/GcsFileSystem.cpp similarity index 85% rename from velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp rename to velox/connectors/hive/storage_adapters/gcs/GcsFileSystem.cpp index 07d85f5746b50..6bc7aaf0cb82a 100644 --- a/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp +++ b/velox/connectors/hive/storage_adapters/gcs/GcsFileSystem.cpp @@ -14,12 +14,12 @@ * limitations under the License. */ -#include "velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.h" +#include "velox/connectors/hive/storage_adapters/gcs/GcsFileSystem.h" #include "velox/common/base/Exceptions.h" #include "velox/common/config/Config.h" #include "velox/common/file/File.h" #include "velox/connectors/hive/HiveConfig.h" -#include "velox/connectors/hive/storage_adapters/gcs/GCSUtil.h" +#include "velox/connectors/hive/storage_adapters/gcs/GcsUtil.h" #include "velox/core/QueryConfig.h" #include @@ -43,7 +43,7 @@ namespace gc = ::google::cloud; // upload this buffer with zero copies if possible. auto constexpr kUploadBufferSize = 256 * 1024; -inline void checkGCSStatus( +inline void checkGcsStatus( const gc::Status outcome, const std::string_view& errorMsgPrefix, const std::string& bucket, @@ -54,7 +54,7 @@ inline void checkGCSStatus( errorMsgPrefix, gcsURI(bucket, key), outcome.error_info().domain(), - getErrorStringFromGCSError(outcome.code()), + getErrorStringFromGcsError(outcome.code()), outcome.message()); if (outcome.code() == gc::StatusCode::kNotFound) { VELOX_FILE_NOT_FOUND_ERROR(errMsg); @@ -63,12 +63,12 @@ inline void checkGCSStatus( } } -class GCSReadFile final : public ReadFile { +class GcsReadFile final : public ReadFile { public: - GCSReadFile(const std::string& path, std::shared_ptr client) + GcsReadFile(const std::string& path, std::shared_ptr client) : client_(std::move(client)) { // assumption it's a proper path - setBucketAndKeyFromGCSPath(path, bucket_, key_); + setBucketAndKeyFromGcsPath(path, bucket_, key_); } // Gets the length of the file. @@ -87,7 +87,7 @@ class GCSReadFile final : public ReadFile { // get metadata and initialize length auto metadata = client_->GetObjectMetadata(bucket_, key_); if (!metadata.ok()) { - checkGCSStatus( + checkGcsStatus( metadata.status(), "Failed to get metadata for GCS object", bucket_, @@ -138,7 +138,7 @@ class GCSReadFile final : public ReadFile { } uint64_t memoryUsage() const override { - return sizeof(GCSReadFile) // this class + return sizeof(GcsReadFile) // this class + sizeof(gcs::Client) // pointee + kUploadBufferSize; // buffer size } @@ -162,13 +162,13 @@ class GCSReadFile final : public ReadFile { gcs::ObjectReadStream stream = client_->ReadObject( bucket_, key_, gcs::ReadRange(offset, offset + length)); if (!stream) { - checkGCSStatus( + checkGcsStatus( stream.status(), "Failed to get GCS object", bucket_, key_); } stream.read(position, length); if (!stream) { - checkGCSStatus( + checkGcsStatus( stream.status(), "Failed to get read object", bucket_, key_); } bytesRead_ += length; @@ -180,16 +180,16 @@ class GCSReadFile final : public ReadFile { std::atomic length_ = -1; }; -class GCSWriteFile final : public WriteFile { +class GcsWriteFile final : public WriteFile { public: - explicit GCSWriteFile( + explicit GcsWriteFile( const std::string& path, std::shared_ptr client) : client_(client) { - setBucketAndKeyFromGCSPath(path, bucket_, key_); + setBucketAndKeyFromGcsPath(path, bucket_, key_); } - ~GCSWriteFile() { + ~GcsWriteFile() { close(); } @@ -204,7 +204,7 @@ class GCSWriteFile final : public WriteFile { VELOX_CHECK(!object_metadata.ok(), "File already exists"); auto stream = client_->WriteObject(bucket_, key_); - checkGCSStatus( + checkGcsStatus( stream.last_status(), "Failed to open GCS object for writing", bucket_, @@ -254,9 +254,9 @@ class GCSWriteFile final : public WriteFile { namespace filesystems { using namespace connector::hive; -auto constexpr kGCSInvalidPath = "File {} is not a valid gcs file"; +auto constexpr kGcsInvalidPath = "File {} is not a valid gcs file"; -class GCSFileSystem::Impl { +class GcsFileSystem::Impl { public: Impl(const config::ConfigBase* config) : hiveConfig_(std::make_shared( @@ -264,7 +264,7 @@ class GCSFileSystem::Impl { ~Impl() = default; - // Use the input Config parameters and initialize the GCSClient. + // Use the input Config parameters and initialize the GcsClient. void initializeClient() { constexpr std::string_view kHttpsScheme{"https://"}; auto options = gc::Options{}; @@ -329,48 +329,48 @@ class GCSFileSystem::Impl { std::shared_ptr client_; }; -GCSFileSystem::GCSFileSystem(std::shared_ptr config) +GcsFileSystem::GcsFileSystem(std::shared_ptr config) : FileSystem(config) { impl_ = std::make_shared(config.get()); } -void GCSFileSystem::initializeClient() { +void GcsFileSystem::initializeClient() { impl_->initializeClient(); } -std::unique_ptr GCSFileSystem::openFileForRead( +std::unique_ptr GcsFileSystem::openFileForRead( std::string_view path, const FileOptions& options) { const auto gcspath = gcsPath(path); - auto gcsfile = std::make_unique(gcspath, impl_->getClient()); + auto gcsfile = std::make_unique(gcspath, impl_->getClient()); gcsfile->initialize(options); return gcsfile; } -std::unique_ptr GCSFileSystem::openFileForWrite( +std::unique_ptr GcsFileSystem::openFileForWrite( std::string_view path, const FileOptions& /*unused*/) { const auto gcspath = gcsPath(path); - auto gcsfile = std::make_unique(gcspath, impl_->getClient()); + auto gcsfile = std::make_unique(gcspath, impl_->getClient()); gcsfile->initialize(); return gcsfile; } -void GCSFileSystem::remove(std::string_view path) { - if (!isGCSFile(path)) { - VELOX_FAIL(kGCSInvalidPath, path); +void GcsFileSystem::remove(std::string_view path) { + if (!isGcsFile(path)) { + VELOX_FAIL(kGcsInvalidPath, path); } // We assume 'path' is well-formed here. std::string bucket; std::string object; const auto file = gcsPath(path); - setBucketAndKeyFromGCSPath(file, bucket, object); + setBucketAndKeyFromGcsPath(file, bucket, object); if (!object.empty()) { auto stat = impl_->getClient()->GetObjectMetadata(bucket, object); if (!stat.ok()) { - checkGCSStatus( + checkGcsStatus( stat.status(), "Failed to get metadata for GCS object", bucket, @@ -379,21 +379,21 @@ void GCSFileSystem::remove(std::string_view path) { } auto ret = impl_->getClient()->DeleteObject(bucket, object); if (!ret.ok()) { - checkGCSStatus( + checkGcsStatus( ret, "Failed to get metadata for GCS object", bucket, object); } } -bool GCSFileSystem::exists(std::string_view path) { +bool GcsFileSystem::exists(std::string_view path) { std::vector result; - if (!isGCSFile(path)) - VELOX_FAIL(kGCSInvalidPath, path); + if (!isGcsFile(path)) + VELOX_FAIL(kGcsInvalidPath, path); // We assume 'path' is well-formed here. const auto file = gcsPath(path); std::string bucket; std::string object; - setBucketAndKeyFromGCSPath(file, bucket, object); + setBucketAndKeyFromGcsPath(file, bucket, object); using ::google::cloud::StatusOr; StatusOr metadata = impl_->getClient()->GetBucketMetadata(bucket); @@ -401,19 +401,19 @@ bool GCSFileSystem::exists(std::string_view path) { return metadata.ok(); } -std::vector GCSFileSystem::list(std::string_view path) { +std::vector GcsFileSystem::list(std::string_view path) { std::vector result; - if (!isGCSFile(path)) - VELOX_FAIL(kGCSInvalidPath, path); + if (!isGcsFile(path)) + VELOX_FAIL(kGcsInvalidPath, path); // We assume 'path' is well-formed here. const auto file = gcsPath(path); std::string bucket; std::string object; - setBucketAndKeyFromGCSPath(file, bucket, object); + setBucketAndKeyFromGcsPath(file, bucket, object); for (auto&& metadata : impl_->getClient()->ListObjects(bucket)) { if (!metadata.ok()) { - checkGCSStatus( + checkGcsStatus( metadata.status(), "Failed to get metadata for GCS object", bucket, @@ -425,19 +425,19 @@ std::vector GCSFileSystem::list(std::string_view path) { return result; } -std::string GCSFileSystem::name() const { +std::string GcsFileSystem::name() const { return "GCS"; } -void GCSFileSystem::rename(std::string_view, std::string_view, bool) { +void GcsFileSystem::rename(std::string_view, std::string_view, bool) { VELOX_UNSUPPORTED("rename for GCS not implemented"); } -void GCSFileSystem::mkdir(std::string_view path) { +void GcsFileSystem::mkdir(std::string_view path) { VELOX_UNSUPPORTED("mkdir for GCS not implemented"); } -void GCSFileSystem::rmdir(std::string_view path) { +void GcsFileSystem::rmdir(std::string_view path) { VELOX_UNSUPPORTED("rmdir for GCS not implemented"); } diff --git a/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.h b/velox/connectors/hive/storage_adapters/gcs/GcsFileSystem.h similarity index 97% rename from velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.h rename to velox/connectors/hive/storage_adapters/gcs/GcsFileSystem.h index 0d80cacd9df13..99e4f94ab99ef 100644 --- a/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.h +++ b/velox/connectors/hive/storage_adapters/gcs/GcsFileSystem.h @@ -24,9 +24,9 @@ namespace facebook::velox::filesystems { /// We provide a registration method for read and write files so the appropriate /// type of file can be constructed based on a filename. See the /// (register|generate)ReadFile and (register|generate)WriteFile functions. -class GCSFileSystem : public FileSystem { +class GcsFileSystem : public FileSystem { public: - explicit GCSFileSystem(std::shared_ptr config); + explicit GcsFileSystem(std::shared_ptr config); /// Initialize the google::cloud::storage::Client from the input Config /// parameters. diff --git a/velox/connectors/hive/storage_adapters/gcs/GCSUtil.cpp b/velox/connectors/hive/storage_adapters/gcs/GcsUtil.cpp similarity index 88% rename from velox/connectors/hive/storage_adapters/gcs/GCSUtil.cpp rename to velox/connectors/hive/storage_adapters/gcs/GcsUtil.cpp index 5fefe0c18be7c..8035e4a1e29fc 100644 --- a/velox/connectors/hive/storage_adapters/gcs/GCSUtil.cpp +++ b/velox/connectors/hive/storage_adapters/gcs/GcsUtil.cpp @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "velox/connectors/hive/storage_adapters/gcs/GCSUtil.h" +#include "velox/connectors/hive/storage_adapters/gcs/GcsUtil.h" namespace facebook::velox { -std::string getErrorStringFromGCSError(const google::cloud::StatusCode& code) { +std::string getErrorStringFromGcsError(const google::cloud::StatusCode& code) { using ::google::cloud::StatusCode; switch (code) { diff --git a/velox/connectors/hive/storage_adapters/gcs/GCSUtil.h b/velox/connectors/hive/storage_adapters/gcs/GcsUtil.h similarity index 59% rename from velox/connectors/hive/storage_adapters/gcs/GCSUtil.h rename to velox/connectors/hive/storage_adapters/gcs/GcsUtil.h index c40df22fd5a6d..e16736fb938f1 100644 --- a/velox/connectors/hive/storage_adapters/gcs/GCSUtil.h +++ b/velox/connectors/hive/storage_adapters/gcs/GcsUtil.h @@ -22,25 +22,17 @@ namespace facebook::velox { namespace { constexpr const char* kSep{"/"}; -constexpr std::string_view kGCSScheme{"gs://"}; +constexpr std::string_view kGcsScheme{"gs://"}; } // namespace -static std::string_view kLoremIpsum = - "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor" - "incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis " - "nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat." - "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu" - "fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in" - "culpa qui officia deserunt mollit anim id est laborum."; +std::string getErrorStringFromGcsError(const google::cloud::StatusCode& error); -std::string getErrorStringFromGCSError(const google::cloud::StatusCode& error); - -inline bool isGCSFile(const std::string_view filename) { - return (filename.substr(0, kGCSScheme.size()) == kGCSScheme); +inline bool isGcsFile(const std::string_view filename) { + return (filename.substr(0, kGcsScheme.size()) == kGcsScheme); } -inline void setBucketAndKeyFromGCSPath( +inline void setBucketAndKeyFromGcsPath( const std::string& path, std::string& bucket, std::string& key) { @@ -51,19 +43,19 @@ inline void setBucketAndKeyFromGCSPath( inline std::string gcsURI(std::string_view bucket) { std::stringstream ss; - ss << kGCSScheme << bucket; + ss << kGcsScheme << bucket; return ss.str(); } inline std::string gcsURI(std::string_view bucket, std::string_view key) { std::stringstream ss; - ss << kGCSScheme << bucket << kSep << key; + ss << kGcsScheme << bucket << kSep << key; return ss.str(); } inline std::string gcsPath(const std::string_view& path) { // Remove the prefix gcs:// from the given path - return std::string(path.substr(kGCSScheme.length())); + return std::string(path.substr(kGcsScheme.length())); } } // namespace facebook::velox diff --git a/velox/connectors/hive/storage_adapters/gcs/RegisterGCSFileSystem.cpp b/velox/connectors/hive/storage_adapters/gcs/RegisterGcsFileSystem.cpp similarity index 82% rename from velox/connectors/hive/storage_adapters/gcs/RegisterGCSFileSystem.cpp rename to velox/connectors/hive/storage_adapters/gcs/RegisterGcsFileSystem.cpp index cb70a1a6087c5..1622e43f60856 100644 --- a/velox/connectors/hive/storage_adapters/gcs/RegisterGCSFileSystem.cpp +++ b/velox/connectors/hive/storage_adapters/gcs/RegisterGcsFileSystem.cpp @@ -16,15 +16,15 @@ #ifdef VELOX_ENABLE_GCS #include "velox/common/config/Config.h" -#include "velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.h" // @manual -#include "velox/connectors/hive/storage_adapters/gcs/GCSUtil.h" // @manual +#include "velox/connectors/hive/storage_adapters/gcs/GcsFileSystem.h" // @manual +#include "velox/connectors/hive/storage_adapters/gcs/GcsUtil.h" // @manual #include "velox/dwio/common/FileSink.h" #endif namespace facebook::velox::filesystems { #ifdef VELOX_ENABLE_GCS -folly::once_flag GCSInstantiationFlag; +folly::once_flag GcsInstantiationFlag; std::function(std::shared_ptr, std::string_view)> @@ -37,12 +37,12 @@ gcsFileSystemGenerator() { // TODO: Support multiple GCSFileSystem instances using a cache // Initialize on first access and reuse after that. static std::shared_ptr gcsfs; - folly::call_once(GCSInstantiationFlag, [&properties]() { - std::shared_ptr fs; + folly::call_once(GcsInstantiationFlag, [&properties]() { + std::shared_ptr fs; if (properties != nullptr) { - fs = std::make_shared(properties); + fs = std::make_shared(properties); } else { - fs = std::make_shared( + fs = std::make_shared( std::make_shared( std::unordered_map())); } @@ -57,7 +57,7 @@ gcsFileSystemGenerator() { std::unique_ptr gcsWriteFileSinkGenerator( const std::string& fileURI, const velox::dwio::common::FileSink::Options& options) { - if (isGCSFile(fileURI)) { + if (isGcsFile(fileURI)) { auto fileSystem = filesystems::getFileSystem(fileURI, options.connectorProperties); return std::make_unique( @@ -70,9 +70,9 @@ std::unique_ptr gcsWriteFileSinkGenerator( } #endif -void registerGCSFileSystem() { +void registerGcsFileSystem() { #ifdef VELOX_ENABLE_GCS - registerFileSystem(isGCSFile, gcsFileSystemGenerator()); + registerFileSystem(isGcsFile, gcsFileSystemGenerator()); dwio::common::FileSink::registerFactory( std::function(gcsWriteFileSinkGenerator)); #endif diff --git a/velox/connectors/hive/storage_adapters/gcs/RegisterGCSFileSystem.h b/velox/connectors/hive/storage_adapters/gcs/RegisterGcsFileSystem.h similarity index 96% rename from velox/connectors/hive/storage_adapters/gcs/RegisterGCSFileSystem.h rename to velox/connectors/hive/storage_adapters/gcs/RegisterGcsFileSystem.h index 806c987bad9b1..b0f668d6f413d 100644 --- a/velox/connectors/hive/storage_adapters/gcs/RegisterGCSFileSystem.h +++ b/velox/connectors/hive/storage_adapters/gcs/RegisterGcsFileSystem.h @@ -19,6 +19,6 @@ namespace facebook::velox::filesystems { // Register the GCS filesystem. -void registerGCSFileSystem(); +void registerGcsFileSystem(); } // namespace facebook::velox::filesystems diff --git a/velox/connectors/hive/storage_adapters/gcs/examples/CMakeLists.txt b/velox/connectors/hive/storage_adapters/gcs/examples/CMakeLists.txt index 29f9342874604..1363d688da446 100644 --- a/velox/connectors/hive/storage_adapters/gcs/examples/CMakeLists.txt +++ b/velox/connectors/hive/storage_adapters/gcs/examples/CMakeLists.txt @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -add_executable(velox_gcsfile_example GCSFileSystemExample.cpp) +add_executable(velox_gcsfile_example GcsFileSystemExample.cpp) target_link_libraries( velox_gcsfile_example Folly::folly diff --git a/velox/connectors/hive/storage_adapters/gcs/examples/GCSFileSystemExample.cpp b/velox/connectors/hive/storage_adapters/gcs/examples/GcsFileSystemExample.cpp similarity index 94% rename from velox/connectors/hive/storage_adapters/gcs/examples/GCSFileSystemExample.cpp rename to velox/connectors/hive/storage_adapters/gcs/examples/GcsFileSystemExample.cpp index ee026a86e0db9..08d9b83a01525 100644 --- a/velox/connectors/hive/storage_adapters/gcs/examples/GCSFileSystemExample.cpp +++ b/velox/connectors/hive/storage_adapters/gcs/examples/GcsFileSystemExample.cpp @@ -15,7 +15,7 @@ */ #include "velox/common/config/Config.h" #include "velox/common/file/File.h" -#include "velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.h" +#include "velox/connectors/hive/storage_adapters/gcs/GcsFileSystem.h" #include #include @@ -45,7 +45,7 @@ int main(int argc, char** argv) { gflags::ShowUsageWithFlags(argv[0]); return 1; } - filesystems::GCSFileSystem gcfs(newConfiguration()); + filesystems::GcsFileSystem gcfs(newConfiguration()); gcfs.initializeClient(); std::cout << "Opening file for read " << FLAGS_gcs_path << std::endl; std::unique_ptr file_read = gcfs.openFileForRead(FLAGS_gcs_path); diff --git a/velox/connectors/hive/storage_adapters/gcs/tests/CMakeLists.txt b/velox/connectors/hive/storage_adapters/gcs/tests/CMakeLists.txt index 3f3971981d717..6a775a72f07d3 100644 --- a/velox/connectors/hive/storage_adapters/gcs/tests/CMakeLists.txt +++ b/velox/connectors/hive/storage_adapters/gcs/tests/CMakeLists.txt @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -add_executable(velox_gcsfile_test GCSUtilTest.cpp GCSFileSystemTest.cpp) +add_executable(velox_gcsfile_test GcsUtilTest.cpp GcsFileSystemTest.cpp) add_test(velox_gcsfile_test velox_gcsfile_test) target_link_libraries( velox_gcsfile_test @@ -28,7 +28,7 @@ target_link_libraries( GTest::gtest GTest::gtest_main) -add_executable(velox_gcs_insert_test GCSInsertTest.cpp) +add_executable(velox_gcs_insert_test GcsInsertTest.cpp) add_test(velox_gcs_insert_test velox_gcs_insert_test) target_link_libraries( velox_gcs_insert_test diff --git a/velox/connectors/hive/storage_adapters/gcs/tests/GCSTestbench.h b/velox/connectors/hive/storage_adapters/gcs/tests/GcsEmulator.h similarity index 84% rename from velox/connectors/hive/storage_adapters/gcs/tests/GCSTestbench.h rename to velox/connectors/hive/storage_adapters/gcs/tests/GcsEmulator.h index f9bcffcc8c465..1a33326850d28 100644 --- a/velox/connectors/hive/storage_adapters/gcs/tests/GCSTestbench.h +++ b/velox/connectors/hive/storage_adapters/gcs/tests/GcsEmulator.h @@ -22,7 +22,7 @@ #include "gtest/gtest.h" #include "velox/common/config/Config.h" -#include "velox/connectors/hive/storage_adapters/gcs/GCSUtil.h" +#include "velox/connectors/hive/storage_adapters/gcs/GcsUtil.h" #include "velox/exec/tests/utils/PortUtil.h" namespace bp = boost::process; @@ -31,10 +31,18 @@ namespace gcs = google::cloud::storage; namespace facebook::velox::filesystems { -class GCSTestbench : public testing::Environment { +static std::string_view kLoremIpsum = + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor" + "incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis " + "nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat." + "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu" + "fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in" + "culpa qui officia deserunt mollit anim id est laborum."; + +class GcsEmulator : public testing::Environment { public: - GCSTestbench() { - auto port = std::to_string(facebook::velox::exec::test::getFreePorts(1)[0]); + GcsEmulator() { + auto port = std::to_string(exec::test::getFreePort()); endpoint_ = "http://localhost:" + port; std::vector names{"python3", "python"}; // If the build script or application developer provides a value in the @@ -61,7 +69,7 @@ class GCSTestbench : public testing::Environment { "--port", port, group_); - if (serverProcess_.valid() && serverProcess_.running()) + if (serverProcess_.valid()) break; error += " (failed to start)"; serverProcess_.terminate(); @@ -72,7 +80,7 @@ class GCSTestbench : public testing::Environment { error_ = std::move(error); } - ~GCSTestbench() override { + ~GcsEmulator() override { // Brutal shutdown, kill the full process group because the GCS testbench // may launch additional children. group_.terminate(); diff --git a/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp b/velox/connectors/hive/storage_adapters/gcs/tests/GcsFileSystemTest.cpp similarity index 87% rename from velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp rename to velox/connectors/hive/storage_adapters/gcs/tests/GcsFileSystemTest.cpp index 0900f9502ae97..69202142340a7 100644 --- a/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp +++ b/velox/connectors/hive/storage_adapters/gcs/tests/GcsFileSystemTest.cpp @@ -14,32 +14,33 @@ * limitations under the License. */ -#include "velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.h" +#include "velox/connectors/hive/storage_adapters/gcs/GcsFileSystem.h" #include "velox/common/base/tests/GTestUtils.h" #include "velox/common/file/File.h" -#include "velox/connectors/hive/storage_adapters/gcs/GCSUtil.h" -#include "velox/connectors/hive/storage_adapters/gcs/tests/GCSTestbench.h" +#include "velox/connectors/hive/storage_adapters/gcs/GcsUtil.h" +#include "velox/connectors/hive/storage_adapters/gcs/tests/GcsEmulator.h" #include "velox/exec/tests/utils/TempFilePath.h" #include "gtest/gtest.h" namespace facebook::velox::filesystems { namespace { -class GCSFileSystemTest : public testing::Test { + +class GcsFileSystemTest : public testing::Test { public: void SetUp() { - testbench_ = std::make_shared(); + testbench_ = std::make_shared(); testbench_->bootstrap(); } - std::shared_ptr testbench_; + std::shared_ptr testbench_; }; -TEST_F(GCSFileSystemTest, readFile) { +TEST_F(GcsFileSystemTest, readFile) { const auto gcsFile = gcsURI( testbench_->preexistingBucketName(), testbench_->preexistingObjectName()); - filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); + filesystems::GcsFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); auto readFile = gcfs.openFileForRead(gcsFile); std::int64_t size = readFile->size(); @@ -72,11 +73,11 @@ TEST_F(GCSFileSystemTest, readFile) { ASSERT_EQ(std::string_view(buff3, sizeof(buff3)), kLoremIpsum.substr(80, 30)); } -TEST_F(GCSFileSystemTest, writeAndReadFile) { +TEST_F(GcsFileSystemTest, writeAndReadFile) { const std::string_view newFile = "readWriteFile.txt"; const auto gcsFile = gcsURI(testbench_->preexistingBucketName(), newFile); - filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); + filesystems::GcsFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); auto writeFile = gcfs.openFileForWrite(gcsFile); std::string dataContent = @@ -102,17 +103,17 @@ TEST_F(GCSFileSystemTest, writeAndReadFile) { EXPECT_EQ(readFile->pread(0, size), dataContent); // Opening an existing file for write must be an error. - filesystems::GCSFileSystem newGcfs(testbench_->hiveConfig()); + filesystems::GcsFileSystem newGcfs(testbench_->hiveConfig()); newGcfs.initializeClient(); VELOX_ASSERT_THROW(newGcfs.openFileForWrite(gcsFile), "File already exists"); } -TEST_F(GCSFileSystemTest, renameNotImplemented) { +TEST_F(GcsFileSystemTest, renameNotImplemented) { const std::string_view file = "newTest.txt"; const auto gcsExistingFile = gcsURI( testbench_->preexistingBucketName(), testbench_->preexistingObjectName()); const auto gcsNewFile = gcsURI(testbench_->preexistingBucketName(), file); - filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); + filesystems::GcsFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); gcfs.openFileForRead(gcsExistingFile); VELOX_ASSERT_THROW( @@ -120,27 +121,27 @@ TEST_F(GCSFileSystemTest, renameNotImplemented) { "rename for GCS not implemented"); } -TEST_F(GCSFileSystemTest, mkdirNotImplemented) { +TEST_F(GcsFileSystemTest, mkdirNotImplemented) { const std::string_view dir = "newDirectory"; const auto gcsNewDirectory = gcsURI(testbench_->preexistingBucketName(), dir); - filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); + filesystems::GcsFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); VELOX_ASSERT_THROW( gcfs.mkdir(gcsNewDirectory), "mkdir for GCS not implemented"); } -TEST_F(GCSFileSystemTest, rmdirNotImplemented) { +TEST_F(GcsFileSystemTest, rmdirNotImplemented) { const std::string_view dir = "Directory"; const auto gcsDirectory = gcsURI(testbench_->preexistingBucketName(), dir); - filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); + filesystems::GcsFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); VELOX_ASSERT_THROW(gcfs.rmdir(gcsDirectory), "rmdir for GCS not implemented"); } -TEST_F(GCSFileSystemTest, missingFile) { +TEST_F(GcsFileSystemTest, missingFile) { const std::string_view file = "newTest.txt"; const auto gcsFile = gcsURI(testbench_->preexistingBucketName(), file); - filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); + filesystems::GcsFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); VELOX_ASSERT_RUNTIME_THROW_CODE( gcfs.openFileForRead(gcsFile), @@ -148,8 +149,8 @@ TEST_F(GCSFileSystemTest, missingFile) { "\\\"message\\\": \\\"Live version of object test1-gcs/newTest.txt does not exist.\\\""); } -TEST_F(GCSFileSystemTest, missingBucket) { - filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); +TEST_F(GcsFileSystemTest, missingBucket) { + filesystems::GcsFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); const std::string_view gcsFile = "gs://dummy/foo.txt"; VELOX_ASSERT_RUNTIME_THROW_CODE( @@ -158,7 +159,7 @@ TEST_F(GCSFileSystemTest, missingBucket) { "\\\"message\\\": \\\"Bucket dummy does not exist.\\\""); } -TEST_F(GCSFileSystemTest, credentialsConfig) { +TEST_F(GcsFileSystemTest, credentialsConfig) { // credentials from arrow gcsfs test case // While this service account key has the correct format, it cannot be used // for authentication because the key has been deactivated on the server-side, @@ -207,7 +208,7 @@ TEST_F(GCSFileSystemTest, credentialsConfig) { {"hive.gcs.json-key-file-path", jsonFile->getPath()}}; auto hiveConfig = testbench_->hiveConfig(configOverride); - filesystems::GCSFileSystem gcfs(hiveConfig); + filesystems::GcsFileSystem gcfs(hiveConfig); gcfs.initializeClient(); const auto gcsFile = gcsURI( testbench_->preexistingBucketName(), testbench_->preexistingObjectName()); diff --git a/velox/connectors/hive/storage_adapters/gcs/tests/GCSInsertTest.cpp b/velox/connectors/hive/storage_adapters/gcs/tests/GcsInsertTest.cpp similarity index 86% rename from velox/connectors/hive/storage_adapters/gcs/tests/GCSInsertTest.cpp rename to velox/connectors/hive/storage_adapters/gcs/tests/GcsInsertTest.cpp index ef34e7c2f76c1..04ad6eedd5d00 100644 --- a/velox/connectors/hive/storage_adapters/gcs/tests/GCSInsertTest.cpp +++ b/velox/connectors/hive/storage_adapters/gcs/tests/GcsInsertTest.cpp @@ -17,8 +17,8 @@ #include #include -#include "velox/connectors/hive/storage_adapters/gcs/RegisterGCSFileSystem.h" -#include "velox/connectors/hive/storage_adapters/gcs/tests/GCSTestbench.h" +#include "velox/connectors/hive/storage_adapters/gcs/RegisterGcsFileSystem.h" +#include "velox/connectors/hive/storage_adapters/gcs/tests/GcsEmulator.h" #include "velox/connectors/hive/storage_adapters/test_common/InsertTest.h" using namespace facebook::velox::exec::test; @@ -26,17 +26,17 @@ using namespace facebook::velox::exec::test; namespace facebook::velox::filesystems { namespace { -class GCSInsertTest : public testing::Test, public test::InsertTest { +class GcsInsertTest : public testing::Test, public test::InsertTest { protected: static void SetUpTestSuite() { - registerGCSFileSystem(); + registerGcsFileSystem(); memory::MemoryManager::testingSetInstance({}); } void SetUp() override { connector::registerConnectorFactory( std::make_shared()); - testbench_ = std::make_shared(); + testbench_ = std::make_shared(); testbench_->bootstrap(); auto hiveConnector = connector::getConnectorFactory( @@ -59,12 +59,12 @@ class GCSInsertTest : public testing::Test, public test::InsertTest { connector::unregisterConnector(exec::test::kHiveConnectorId); } - std::shared_ptr testbench_; + std::shared_ptr testbench_; std::unique_ptr ioExecutor_; }; } // namespace -TEST_F(GCSInsertTest, gcsInsertTest) { +TEST_F(GcsInsertTest, gcsInsertTest) { const int64_t kExpectedRows = 1'000; const auto gcsBucket = gcsURI(testbench_->preexistingBucketName(), ""); runInsertTest(gcsBucket, kExpectedRows, pool()); diff --git a/velox/connectors/hive/storage_adapters/gcs/tests/GCSUtilTest.cpp b/velox/connectors/hive/storage_adapters/gcs/tests/GcsUtilTest.cpp similarity index 69% rename from velox/connectors/hive/storage_adapters/gcs/tests/GCSUtilTest.cpp rename to velox/connectors/hive/storage_adapters/gcs/tests/GcsUtilTest.cpp index 2e3896f99f7e1..fd31b692e4e89 100644 --- a/velox/connectors/hive/storage_adapters/gcs/tests/GCSUtilTest.cpp +++ b/velox/connectors/hive/storage_adapters/gcs/tests/GcsUtilTest.cpp @@ -14,23 +14,23 @@ * limitations under the License. */ -#include "velox/connectors/hive/storage_adapters/gcs/GCSUtil.h" +#include "velox/connectors/hive/storage_adapters/gcs/GcsUtil.h" #include "gtest/gtest.h" using namespace facebook::velox; -TEST(GCSUtilTest, isGCSFile) { - EXPECT_FALSE(isGCSFile("gs:")); - EXPECT_FALSE(isGCSFile("gs::/bucket")); - EXPECT_FALSE(isGCSFile("gs:/bucket")); - EXPECT_TRUE(isGCSFile("gs://bucket/file.txt")); +TEST(GcsUtilTest, isGcsFile) { + EXPECT_FALSE(isGcsFile("gs:")); + EXPECT_FALSE(isGcsFile("gs::/bucket")); + EXPECT_FALSE(isGcsFile("gs:/bucket")); + EXPECT_TRUE(isGcsFile("gs://bucket/file.txt")); } -TEST(GCSUtilTest, setBucketAndKeyFromGCSPath) { +TEST(GcsUtilTest, setBucketAndKeyFromGcsPath) { std::string bucket, key; auto path = "bucket/file.txt"; - setBucketAndKeyFromGCSPath(path, bucket, key); + setBucketAndKeyFromGcsPath(path, bucket, key); EXPECT_EQ(bucket, "bucket"); EXPECT_EQ(key, "file.txt"); } diff --git a/velox/connectors/hive/tests/HiveConfigTest.cpp b/velox/connectors/hive/tests/HiveConfigTest.cpp index 875002dbd0b13..fef2a6d8c1b94 100644 --- a/velox/connectors/hive/tests/HiveConfigTest.cpp +++ b/velox/connectors/hive/tests/HiveConfigTest.cpp @@ -94,8 +94,8 @@ TEST(HiveConfigTest, overrideConfig) { {HiveConfig::kS3AwsSecretKey, "hello"}, {HiveConfig::kS3IamRole, "hello"}, {HiveConfig::kS3IamRoleSessionName, "velox"}, - {HiveConfig::kGCSEndpoint, "hey"}, - {HiveConfig::kGCSCredentialsPath, "hey"}, + {HiveConfig::kGcsEndpoint, "hey"}, + {HiveConfig::kGcsCredentialsPath, "hey"}, {HiveConfig::kOrcUseColumnNames, "true"}, {HiveConfig::kFileColumnNamesReadAsLowerCase, "true"}, {HiveConfig::kAllowNullPartitionKeys, "false"}, diff --git a/velox/tool/trace/QueryReplayer.cpp b/velox/tool/trace/QueryReplayer.cpp index a0680d30a387a..a153d168657ae 100644 --- a/velox/tool/trace/QueryReplayer.cpp +++ b/velox/tool/trace/QueryReplayer.cpp @@ -24,7 +24,7 @@ #include "velox/connectors/hive/HiveDataSink.h" #include "velox/connectors/hive/TableHandle.h" #include "velox/connectors/hive/storage_adapters/abfs/RegisterAbfsFileSystem.h" -#include "velox/connectors/hive/storage_adapters/gcs/RegisterGCSFileSystem.h" +#include "velox/connectors/hive/storage_adapters/gcs/RegisterGcsFileSystem.h" #include "velox/connectors/hive/storage_adapters/hdfs/RegisterHdfsFileSystem.h" #include "velox/connectors/hive/storage_adapters/s3fs/RegisterS3FileSystem.h" #include "velox/core/PlanNode.h" @@ -82,7 +82,7 @@ void init() { filesystems::registerLocalFileSystem(); filesystems::registerS3FileSystem(); filesystems::registerHdfsFileSystem(); - filesystems::registerGCSFileSystem(); + filesystems::registerGcsFileSystem(); filesystems::abfs::registerAbfsFileSystem(); dwio::common::registerFileSinks();