Skip to content

Commit

Permalink
Fix GCS credential config and rename to hive.gcs.json-key-file-path (#…
Browse files Browse the repository at this point in the history
…11086)

Summary:
Current `hive.gcs.credentials` expects a JSON string with the GCS service account configuration.
However, this JSON string is expected to be unescaped by the google C++ SDK. It is not possible to pass an unescaped JSON string via a config file.
The norm seems to specify a file path with the service account configuration.
Presto uses `hive.gcs.json-key-file-path`. Velox will use the same.
https://github.com/prestodb/presto/blob/62ca0801fba8448b162fe89f8dab10e0c14158e1/presto-docs/src/main/sphinx/release/release-0.225.rst#hive-changes

Pull Request resolved: #11086

Reviewed By: xiaoxmeng, DanielHunte

Differential Revision: D63647134

Pulled By: kagamiori

fbshipit-source-id: 92ae01a348f51a2283a583b6e708b03d4ee87efa
majetideepak authored and facebook-github-bot committed Oct 1, 2024
1 parent b682500 commit fcc1b1c
Showing 7 changed files with 36 additions and 21 deletions.
4 changes: 2 additions & 2 deletions velox/connectors/hive/HiveConfig.cpp
Original file line number Diff line number Diff line change
@@ -143,8 +143,8 @@ std::string HiveConfig::gcsScheme() const {
return config_->get<std::string>(kGCSScheme, std::string("https"));
}

std::string HiveConfig::gcsCredentials() const {
return config_->get<std::string>(kGCSCredentials, std::string(""));
std::string HiveConfig::gcsCredentialsPath() const {
return config_->get<std::string>(kGCSCredentialsPath, std::string(""));
}

std::optional<int> HiveConfig::gcsMaxRetryCount() const {
7 changes: 4 additions & 3 deletions velox/connectors/hive/HiveConfig.h
Original file line number Diff line number Diff line change
@@ -110,8 +110,9 @@ class HiveConfig {
/// The GCS storage scheme, https for default credentials.
static constexpr const char* kGCSScheme = "hive.gcs.scheme";

/// The GCS service account configuration as json string
static constexpr const char* kGCSCredentials = "hive.gcs.credentials";
/// The GCS service account configuration JSON key file.
static constexpr const char* kGCSCredentialsPath =
"hive.gcs.json-key-file-path";

/// The GCS maximum retry counter of transient errors.
static constexpr const char* kGCSMaxRetryCount = "hive.gcs.max-retry-count";
@@ -284,7 +285,7 @@ class HiveConfig {

std::string gcsScheme() const;

std::string gcsCredentials() const;
std::string gcsCredentialsPath() const;

std::optional<int> gcsMaxRetryCount() const;

19 changes: 14 additions & 5 deletions velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp
Original file line number Diff line number Diff line change
@@ -295,12 +295,21 @@ class GCSFileSystem::Impl {
options.set<gcs::RestEndpointOption>(scheme + "://" + endpointOverride);
}

auto cred = hiveConfig_->gcsCredentials();
if (!cred.empty()) {
auto credentials = gc::MakeServiceAccountCredentials(cred);
options.set<gc::UnifiedCredentialsOption>(credentials);
auto credFile = hiveConfig_->gcsCredentialsPath();
if (!credFile.empty() && std::filesystem::exists(credFile)) {
std::ifstream jsonFile(credFile, std::ios::in);
if (!jsonFile.is_open()) {
LOG(WARNING) << "Error opening file " << credFile;
} else {
std::stringstream credsBuffer;
credsBuffer << jsonFile.rdbuf();
auto creds = credsBuffer.str();
auto credentials = gc::MakeServiceAccountCredentials(std::move(creds));
options.set<gc::UnifiedCredentialsOption>(credentials);
}
} else {
LOG(WARNING) << "Config::gcsCredentials is empty";
LOG(WARNING)
<< "Config hive.gcs.json-key-file-path is empty or key file path not found";
}

client_ = std::make_shared<gcs::Client>(options);
Original file line number Diff line number Diff line change
@@ -16,12 +16,13 @@ add_executable(velox_gcsfile_test GCSUtilTest.cpp GCSFileSystemTest.cpp)
add_test(velox_gcsfile_test velox_gcsfile_test)
target_link_libraries(
velox_gcsfile_test
velox_file
velox_gcs
velox_core
velox_hive_connector
velox_dwio_common_exception
velox_exec
velox_file
velox_gcs
velox_hive_connector
velox_temp_path
GTest::gmock
GTest::gtest
GTest::gtest_main)
Original file line number Diff line number Diff line change
@@ -310,7 +310,7 @@ TEST_F(GCSFileSystemTest, credentialsConfig) {
// for authentication because the key has been deactivated on the server-side,
// *and* the account(s) involved are deleted *and* they are not the accounts
// or projects do not match its contents.
configOverride["hive.gcs.credentials"] = R"""({
auto creds = R"""({
"type": "service_account",
"project_id": "foo-project",
"private_key_id": "a1a111aa1111a11a11a11aa111a111a1a1111111",
@@ -344,13 +344,17 @@ TEST_F(GCSFileSystemTest, credentialsConfig) {
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/foo-email%40foo-project.iam.gserviceaccount.com"
})""";
auto jsonFile = exec::test::TempFilePath::create();
std::ofstream credsOut(jsonFile->getPath());
credsOut << creds;
credsOut.close();
configOverride["hive.gcs.json-key-file-path"] = jsonFile->getPath();
configOverride["hive.gcs.scheme"] = "http";
configOverride["hive.gcs.endpoint"] = "localhost:" + testbench_->port();
std::shared_ptr<const config::ConfigBase> conf =
std::make_shared<const config::ConfigBase>(std::move(configOverride));

filesystems::GCSFileSystem gcfs(conf);

gcfs.initializeClient();
try {
const std::string gcsFile =
8 changes: 4 additions & 4 deletions velox/connectors/hive/tests/HiveConfigTest.cpp
Original file line number Diff line number Diff line change
@@ -44,7 +44,7 @@ TEST(HiveConfigTest, defaultConfig) {
ASSERT_EQ(hiveConfig.s3IAMRoleSessionName(), "velox-session");
ASSERT_EQ(hiveConfig.gcsEndpoint(), "");
ASSERT_EQ(hiveConfig.gcsScheme(), "https");
ASSERT_EQ(hiveConfig.gcsCredentials(), "");
ASSERT_EQ(hiveConfig.gcsCredentialsPath(), "");
ASSERT_EQ(hiveConfig.isOrcUseColumnNames(emptySession.get()), false);
ASSERT_EQ(
hiveConfig.isFileColumnNamesReadAsLowerCase(emptySession.get()), false);
@@ -95,7 +95,7 @@ TEST(HiveConfigTest, overrideConfig) {
{HiveConfig::kS3IamRoleSessionName, "velox"},
{HiveConfig::kGCSEndpoint, "hey"},
{HiveConfig::kGCSScheme, "http"},
{HiveConfig::kGCSCredentials, "hey"},
{HiveConfig::kGCSCredentialsPath, "hey"},
{HiveConfig::kOrcUseColumnNames, "true"},
{HiveConfig::kFileColumnNamesReadAsLowerCase, "true"},
{HiveConfig::kAllowNullPartitionKeys, "false"},
@@ -134,7 +134,7 @@ TEST(HiveConfigTest, overrideConfig) {
ASSERT_EQ(hiveConfig.s3IAMRoleSessionName(), "velox");
ASSERT_EQ(hiveConfig.gcsEndpoint(), "hey");
ASSERT_EQ(hiveConfig.gcsScheme(), "http");
ASSERT_EQ(hiveConfig.gcsCredentials(), "hey");
ASSERT_EQ(hiveConfig.gcsCredentialsPath(), "hey");
ASSERT_EQ(hiveConfig.isOrcUseColumnNames(emptySession.get()), true);
ASSERT_EQ(
hiveConfig.isFileColumnNamesReadAsLowerCase(emptySession.get()), true);
@@ -206,7 +206,7 @@ TEST(HiveConfigTest, overrideSession) {
ASSERT_EQ(hiveConfig.s3IAMRoleSessionName(), "velox-session");
ASSERT_EQ(hiveConfig.gcsEndpoint(), "");
ASSERT_EQ(hiveConfig.gcsScheme(), "https");
ASSERT_EQ(hiveConfig.gcsCredentials(), "");
ASSERT_EQ(hiveConfig.gcsCredentialsPath(), "");
ASSERT_EQ(hiveConfig.isOrcUseColumnNames(session.get()), true);
ASSERT_EQ(hiveConfig.isFileColumnNamesReadAsLowerCase(session.get()), true);

4 changes: 2 additions & 2 deletions velox/docs/configs.rst
Original file line number Diff line number Diff line change
@@ -641,10 +641,10 @@ Each query can override the config by setting corresponding query session proper
- string
-
- The GCS storage scheme, https for default credentials.
* - hive.gcs.credentials
* - hive.gcs.json-key-file-path
- string
-
- The GCS service account configuration as json string.
- The GCS service account configuration JSON key file.
* - hive.gcs.max-retry-count
- integer
-

0 comments on commit fcc1b1c

Please sign in to comment.