diff --git a/cpp/velox/compute/VeloxBackend.cc b/cpp/velox/compute/VeloxBackend.cc index c453b9981fab..3beda675398b 100644 --- a/cpp/velox/compute/VeloxBackend.cc +++ b/cpp/velox/compute/VeloxBackend.cc @@ -256,18 +256,6 @@ void VeloxBackend::initConnector() { connectorConfMap[k] = v; } -#ifdef ENABLE_ABFS - const auto& confValue = backendConf_->rawConfigs(); - for (auto& [k, v] : confValue) { - if (k.find("fs.azure.account.key") == 0) { - connectorConfMap[k] = v; - } else if (k.find("spark.hadoop.fs.azure.account.key") == 0) { - constexpr int32_t accountKeyPrefixLength = 13; - connectorConfMap[k.substr(accountKeyPrefixLength)] = v; - } - } -#endif - connectorConfMap[velox::connector::hive::HiveConfig::kEnableFileHandleCache] = backendConf_->get(kVeloxFileHandleCacheEnabled, kVeloxFileHandleCacheEnabledDefault) ? "true" : "false"; diff --git a/cpp/velox/utils/ConfigExtractor.cc b/cpp/velox/utils/ConfigExtractor.cc index 9f2f8e0a950f..16531b6b60a9 100644 --- a/cpp/velox/utils/ConfigExtractor.cc +++ b/cpp/velox/utils/ConfigExtractor.cc @@ -52,8 +52,8 @@ std::shared_ptr getHiveConfig( #ifdef ENABLE_S3 using namespace facebook::velox::filesystems; - std::string_view kSparkHadoopPrefix = "spark.hadoop.fs.s3a."; - std::string_view kSparkHadoopBucketPrefix = "spark.hadoop.fs.s3a.bucket."; + std::string_view kSparkHadoopS3Prefix = "spark.hadoop.fs.s3a."; + std::string_view kSparkHadoopS3BucketPrefix = "spark.hadoop.fs.s3a.bucket."; // Log granularity of AWS C++ SDK const std::string kVeloxAwsSdkLogLevel = "spark.gluten.velox.awsSdkLogLevel"; @@ -87,7 +87,7 @@ std::shared_ptr getHiveConfig( auto sparkBaseConfigValue = [&](S3Config::Keys key) { std::stringstream ss; auto keyValue = sparkSuffixes.find(key)->second; - ss << kSparkHadoopPrefix << keyValue.first; + ss << kSparkHadoopS3Prefix << keyValue.first; auto sparkKey = ss.str(); if (conf->valueExists(sparkKey)) { return static_cast>(conf->get(sparkKey)); @@ -131,9 +131,9 @@ std::shared_ptr getHiveConfig( // Convert all Spark bucket configs to Velox bucket configs. for (const auto& [key, value] : conf->rawConfigs()) { - if (key.find(kSparkHadoopBucketPrefix) == 0) { + if (key.find(kSparkHadoopS3BucketPrefix) == 0) { std::string_view skey = key; - auto remaining = skey.substr(kSparkHadoopBucketPrefix.size()); + auto remaining = skey.substr(kSparkHadoopS3BucketPrefix.size()); int dot = remaining.find("."); auto bucketName = remaining.substr(0, dot); auto suffix = remaining.substr(dot + 1); @@ -189,6 +189,17 @@ std::shared_ptr getHiveConfig( } #endif +#ifdef ENABLE_ABFS + std::string_view kSparkHadoopPrefix = "spark.hadoop."; + std::string_view kSparkHadoopAbfsPrefix = "spark.hadoop.fs.azure."; + for (const auto& [key, value] : conf->rawConfigs()) { + if (key.find(kSparkHadoopAbfsPrefix) == 0) { + // Remove the SparkHadoopPrefix + hiveConfMap[key.substr(kSparkHadoopPrefix.size())] = value; + } + } +#endif + hiveConfMap[facebook::velox::connector::hive::HiveConfig::kEnableFileHandleCache] = conf->get(kVeloxFileHandleCacheEnabled, kVeloxFileHandleCacheEnabledDefault) ? "true" : "false"; diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala index 2c03ecf3e824..2a5aee07b390 100644 --- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala +++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala @@ -544,9 +544,9 @@ object GlutenConfig { // Hardware acceleraters backend val GLUTEN_SHUFFLE_CODEC_BACKEND = "spark.gluten.sql.columnar.shuffle.codecBackend" + // ABFS config - val ABFS_ACCOUNT_KEY = "hadoop.fs.azure.account.key" - val SPARK_ABFS_ACCOUNT_KEY: String = "spark." + ABFS_ACCOUNT_KEY + val ABFS_PREFIX = "fs.azure." // GCS config val GCS_PREFIX = "fs.gs." @@ -854,7 +854,7 @@ object GlutenConfig { // handle ABFS config conf - .filter(_._1.startsWith(SPARK_ABFS_ACCOUNT_KEY)) + .filter(_._1.startsWith(HADOOP_PREFIX + ABFS_PREFIX)) .foreach(entry => nativeConfMap.put(entry._1, entry._2)) // put in all GCS configs