From a1817a9ad035aa4a65110b86c5d9e85a4e4a225b Mon Sep 17 00:00:00 2001 From: "w.montaz" Date: Wed, 11 Sep 2024 14:24:41 +0200 Subject: [PATCH 1/2] [CRITEO] Add a property to force canonicalization of hostname with WebHdfsFileSystem WebHdfsFileSystem does not enforce SPNEGO when using connectionFactory because the jdk automatically performs SPNEGO when a response is received with 401 + header 'WWW-Authenticate: Negotiate'. This part actually works fine, WebHdfsFileSystem gets a delegation token with SPNEGO and continues with this token. However, if we expect hostname canonicalization, the jdk has some restrictions and forces the canonical hostname to be a longer format of the hostname, otherwise it is ignored. This behavior can be found in class sun.security.krb5.PrincipalName, in the constructor: // RFC4120 does not recommend canonicalizing a hostname. // However, for compatibility reason, we will try // canonicalize it and see if the output looks better. String canonicalized = (InetAddress.getByName(hostName)). getCanonicalHostName(); // Looks if canonicalized is a longer format of hostName, // we accept cases like // bunny -> bunny.rabbit.hole if (canonicalized.toLowerCase(Locale.ENGLISH).startsWith( hostName.toLowerCase(Locale.ENGLISH)+".")) { hostName = canonicalized; } This means that when reaching namenodes via consul for instance (ex. hadoop-hdfs-namenode-active-root.query.consul.preprod.crto.in) the canonicalization is purely ignored by the jdk because the canonicalized hostname is something like {something}.{dc}.hpc.criteo.(pre)prod This commit allows the possibility to canonicalize namenode addresses in WebHdfsFileSystem to overcome this issue. This behavior is activated by the property `dfs.webhdfs.host.canonicalize.enabled` (default: false) --- .../hadoop/hdfs/client/HdfsClientConfigKeys.java | 3 +++ .../apache/hadoop/hdfs/web/WebHdfsFileSystem.java | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java index 407462c6e757d..b7fdf26b60e37 100755 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java @@ -48,6 +48,9 @@ public interface HdfsClientConfigKeys { String DFS_WEBHDFS_OAUTH_ENABLED_KEY = "dfs.webhdfs.oauth2.enabled"; boolean DFS_WEBHDFS_OAUTH_ENABLED_DEFAULT = false; + String DFS_WEBHDFS_URI_CANONICALIZE = "dfs.webhdfs.host.canonicalize.enabled"; + boolean DFS_WEBHDFS_URI_CANONICALIZE_DEFAULT = false; + String DFS_WEBHDFS_REST_CSRF_ENABLED_KEY = "dfs.webhdfs.rest-csrf.enabled"; boolean DFS_WEBHDFS_REST_CSRF_ENABLED_DEFAULT = false; String DFS_WEBHDFS_REST_CSRF_CUSTOM_HEADER_KEY = diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java index dea58d6309016..e5fead7a9b831 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java @@ -250,6 +250,21 @@ public synchronized void initialize(URI uri, Configuration conf this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority()); this.nnAddrs = resolveNNAddr(); + boolean canonicalizeWebHdfsUri = conf.getBoolean( + HdfsClientConfigKeys.DFS_WEBHDFS_URI_CANONICALIZE, + HdfsClientConfigKeys.DFS_WEBHDFS_URI_CANONICALIZE_DEFAULT + ); + + if (canonicalizeWebHdfsUri) { + for (int i = 0; i < nnAddrs.length; i++) { + InetSocketAddress nonCanonicalizedAddr = nnAddrs[i]; + nnAddrs[i] = new InetSocketAddress( + nonCanonicalizedAddr.getAddress().getCanonicalHostName(), + nonCanonicalizedAddr.getPort() + ); + } + } + boolean isHA = HAUtilClient.isClientFailoverConfigured(conf, this.uri); boolean isLogicalUri = isHA && HAUtilClient.isLogicalUri(conf, this.uri); // In non-HA or non-logical URI case, the code needs to call From 5cbf219c7693271bcc41022c1737797c87095658 Mon Sep 17 00:00:00 2001 From: "w.montaz" Date: Wed, 11 Sep 2024 14:50:26 +0200 Subject: [PATCH 2/2] Change constant names --- .../org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java | 4 ++-- .../java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java index b7fdf26b60e37..f5b25cd9b07fc 100755 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java @@ -48,8 +48,8 @@ public interface HdfsClientConfigKeys { String DFS_WEBHDFS_OAUTH_ENABLED_KEY = "dfs.webhdfs.oauth2.enabled"; boolean DFS_WEBHDFS_OAUTH_ENABLED_DEFAULT = false; - String DFS_WEBHDFS_URI_CANONICALIZE = "dfs.webhdfs.host.canonicalize.enabled"; - boolean DFS_WEBHDFS_URI_CANONICALIZE_DEFAULT = false; + String DFS_WEBHDFS_HOST_CANONICALIZE_ENABLED_KEY = "dfs.webhdfs.host.canonicalize.enabled"; + boolean DFS_WEBHDFS_HOST_CANONICALIZE_ENABLED_DEFAULT = false; String DFS_WEBHDFS_REST_CSRF_ENABLED_KEY = "dfs.webhdfs.rest-csrf.enabled"; boolean DFS_WEBHDFS_REST_CSRF_ENABLED_DEFAULT = false; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java index e5fead7a9b831..0b16a1278e00d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java @@ -251,8 +251,8 @@ public synchronized void initialize(URI uri, Configuration conf this.nnAddrs = resolveNNAddr(); boolean canonicalizeWebHdfsUri = conf.getBoolean( - HdfsClientConfigKeys.DFS_WEBHDFS_URI_CANONICALIZE, - HdfsClientConfigKeys.DFS_WEBHDFS_URI_CANONICALIZE_DEFAULT + HdfsClientConfigKeys.DFS_WEBHDFS_HOST_CANONICALIZE_ENABLED_KEY, + HdfsClientConfigKeys.DFS_WEBHDFS_HOST_CANONICALIZE_ENABLED_DEFAULT ); if (canonicalizeWebHdfsUri) {