Skip to content

Commit

Permalink
HADOOP-19120. ApacheHttpClient adaptation in ABFS. (#6633)
Browse files Browse the repository at this point in the history
Apache httpclient 4.5.x is the new default implementation of http connections;
this supports a large configurable pool of connections along with
the ability to limit their lifespan.

The networking library can be chosen using the configuration
option fs.azure.networking.library

The supported values are
- APACHE_HTTP_CLIENT : Use Apache HttpClient [Default]
- JDK_HTTP_URL_CONNECTION : Use JDK networking library

Important: unless the networking library is switched back to
the JDK, the apache httpcore and httpclient must be on the classpath

Contributed by Pranav Saxena
  • Loading branch information
saxenapranav authored Jul 22, 2024
1 parent e48cd0e commit b60497f
Show file tree
Hide file tree
Showing 50 changed files with 3,934 additions and 443 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;

/**
* Exception to denote if the underlying stream, cache or other closable resource
* is closed.
*/
@InterfaceAudience.Public
@InterfaceStability.Unstable
public class ClosedIOException extends PathIOException {

/**
* Appends the custom error-message to the default error message.
* @param path path that encountered the closed resource.
* @param message custom error message.
*/
public ClosedIOException(String path, String message) {
super(path, message);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.fs.azurebfs.services.FixedSASTokenProvider;
import org.apache.hadoop.fs.azurebfs.constants.HttpOperationType;
import org.apache.hadoop.fs.azurebfs.utils.MetricFormat;
import org.apache.hadoop.util.Preconditions;

Expand Down Expand Up @@ -390,6 +391,20 @@ public class AbfsConfiguration{
FS_AZURE_ENABLE_PAGINATED_DELETE, DefaultValue = DEFAULT_ENABLE_PAGINATED_DELETE)
private boolean isPaginatedDeleteEnabled;

@IntegerConfigurationValidatorAnnotation(ConfigurationKey =
FS_AZURE_APACHE_HTTP_CLIENT_MAX_IO_EXCEPTION_RETRIES, DefaultValue = DEFAULT_APACHE_HTTP_CLIENT_MAX_IO_EXCEPTION_RETRIES)
private int maxApacheHttpClientIoExceptionsRetries;

/**
* Max idle TTL configuration for connection given in
* {@value org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys#FS_AZURE_APACHE_HTTP_CLIENT_IDLE_CONNECTION_TTL}
* with default of
* {@value org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations#DEFAULT_HTTP_CLIENT_CONN_MAX_IDLE_TIME}
*/
@LongConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_APACHE_HTTP_CLIENT_IDLE_CONNECTION_TTL,
DefaultValue = DEFAULT_HTTP_CLIENT_CONN_MAX_IDLE_TIME)
private long maxApacheHttpClientConnectionIdleTime;

private String clientProvidedEncryptionKey;
private String clientProvidedEncryptionKeySHA;

Expand Down Expand Up @@ -491,6 +506,17 @@ public long getLong(String key, long defaultValue) {
return rawConfig.getLong(accountConf(key), rawConfig.getLong(key, defaultValue));
}

/**
* Returns the account-specific value if it exists, then looks for an
* account-agnostic value, and finally tries the default value.
* @param key Account-agnostic configuration key
* @param defaultValue Value returned if none is configured
* @return value if one exists, else the default value
*/
public int getInt(String key, int defaultValue) {
return rawConfig.getInt(accountConf(key), rawConfig.getInt(key, defaultValue));
}

/**
* Returns the account-specific password in string form if it exists, then
* looks for an account-agnostic value.
Expand Down Expand Up @@ -889,6 +915,24 @@ public DelegatingSSLSocketFactory.SSLChannelMode getPreferredSSLFactoryOption()
return getEnum(FS_AZURE_SSL_CHANNEL_MODE_KEY, DEFAULT_FS_AZURE_SSL_CHANNEL_MODE);
}

/**
* @return Config to select netlib for server communication.
*/
public HttpOperationType getPreferredHttpOperationType() {
return getEnum(FS_AZURE_NETWORKING_LIBRARY, DEFAULT_NETWORKING_LIBRARY);
}

public int getMaxApacheHttpClientIoExceptionsRetries() {
return maxApacheHttpClientIoExceptionsRetries;
}

/**
* @return {@link #maxApacheHttpClientConnectionIdleTime}.
*/
public long getMaxApacheHttpClientConnectionIdleTime() {
return maxApacheHttpClientConnectionIdleTime;
}

/**
* Enum config to allow user to pick format of x-ms-client-request-id header
* @return tracingContextFormat config if valid, else default ALL_ID_FORMAT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,8 @@ public synchronized void close() throws IOException {
IOSTATISTICS_LOGGING_LEVEL_DEFAULT);
logIOStatisticsAtLevel(LOG, iostatisticsLoggingLevel, getIOStatistics());
}
IOUtils.cleanupWithLogger(LOG, abfsStore, delegationTokenManager);
IOUtils.cleanupWithLogger(LOG, abfsStore, delegationTokenManager,
getAbfsClient());
this.isClosed = true;
if (LOG.isDebugEnabled()) {
LOG.debug("Closing Abfs: {}", toString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import java.util.concurrent.TimeUnit;

import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation;
import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider;
import org.apache.hadoop.fs.azurebfs.security.ContextProviderEncryptionAdapter;
import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter;
Expand Down Expand Up @@ -106,7 +107,6 @@
import org.apache.hadoop.fs.azurebfs.services.AbfsClientContextBuilder;
import org.apache.hadoop.fs.azurebfs.services.AbfsClientRenameResult;
import org.apache.hadoop.fs.azurebfs.services.AbfsCounters;
import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation;
import org.apache.hadoop.fs.azurebfs.services.AbfsInputStream;
import org.apache.hadoop.fs.azurebfs.services.AbfsInputStreamContext;
import org.apache.hadoop.fs.azurebfs.services.AbfsInputStreamStatisticsImpl;
Expand Down Expand Up @@ -694,7 +694,7 @@ public OutputStream createFile(final Path path,
populateAbfsOutputStreamContext(
isAppendBlob,
lease,
client,
getClient(),
statistics,
relativePath,
0,
Expand Down Expand Up @@ -933,7 +933,7 @@ public AbfsInputStream openFileForRead(Path path,
perfInfo.registerSuccess(true);

// Add statistics for InputStream
return new AbfsInputStream(client, statistics, relativePath,
return new AbfsInputStream(getClient(), statistics, relativePath,
contentLength, populateAbfsInputStreamContext(
parameters.map(OpenFileParameters::getOptions),
contextEncryptionAdapter),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,5 +199,16 @@ public static ApiVersion getCurrentVersion() {
+ "non-hierarchical-namespace account:"
+ CPK_CONFIG_LIST;

/**
* System property that define maximum number of cached-connection per fileSystem for
* ApacheHttpClient. JDK network library uses the same property to define maximum
* number of cached-connections at JVM level.
*/
public static final String HTTP_MAX_CONN_SYS_PROP = "http.maxConnections";
public static final String JDK_IMPL = "JDK";
public static final String APACHE_IMPL = "Apache";
public static final String JDK_FALLBACK = "JDK_fallback";
public static final String KEEP_ALIVE_CACHE_CLOSED = "KeepAliveCache is closed";

private AbfsHttpConstants() {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -321,5 +321,17 @@ public static String accountProperty(String property, String account) {
* @see FileSystem#openFile(org.apache.hadoop.fs.Path)
*/
public static final String FS_AZURE_BUFFERED_PREAD_DISABLE = "fs.azure.buffered.pread.disable";
/**Defines what network library to use for server IO calls: {@value}*/
public static final String FS_AZURE_NETWORKING_LIBRARY = "fs.azure.networking.library";
/**
* Maximum number of IOExceptions retries for a single server call on ApacheHttpClient.
* Breach of this count would turn off future uses of the ApacheHttpClient library
* in the JVM lifecycle: {@value}
*/
public static final String FS_AZURE_APACHE_HTTP_CLIENT_MAX_IO_EXCEPTION_RETRIES = "fs.azure.apache.http.client.max.io.exception.retries";
/**Maximum ApacheHttpClient-connection cache size at filesystem level: {@value}*/
public static final String FS_AZURE_APACHE_HTTP_CLIENT_MAX_CACHE_CONNECTION_SIZE = "fs.azure.apache.http.client.max.cache.connection.size";
/**Maximum idle time for a ApacheHttpClient-connection: {@value}*/
public static final String FS_AZURE_APACHE_HTTP_CLIENT_IDLE_CONNECTION_TTL = "fs.azure.apache.http.client.idle.connection.ttl";
private ConfigurationKeys() {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -167,5 +167,14 @@ public final class FileSystemConfigurations {
public static final int HUNDRED = 100;
public static final long THOUSAND = 1000L;

public static final HttpOperationType DEFAULT_NETWORKING_LIBRARY
= HttpOperationType.APACHE_HTTP_CLIENT;

public static final int DEFAULT_APACHE_HTTP_CLIENT_MAX_IO_EXCEPTION_RETRIES = 3;

public static final long DEFAULT_HTTP_CLIENT_CONN_MAX_IDLE_TIME = 5_000L;

public static final int DEFAULT_HTTP_CLIENT_CONN_MAX_CACHED_CONNECTIONS = 5;

private FileSystemConfigurations() {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs.azurebfs.constants;

public enum HttpOperationType {
JDK_HTTP_URL_CONNECTION,
APACHE_HTTP_CLIENT;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs.azurebfs.contracts.exceptions;

import org.apache.http.HttpResponse;

import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EXPECT_100_JDK_ERROR;

/**
* Exception that marks expect100 handshake error. This exception is thrown when
* the expect100 handshake fails with ADLS server sending 4xx or 5xx status code.
*/
public class AbfsApacheHttpExpect100Exception extends HttpResponseException {

public AbfsApacheHttpExpect100Exception(final HttpResponse httpResponse) {
super(EXPECT_100_JDK_ERROR, httpResponse);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs.azurebfs.contracts.exceptions;

import java.io.IOException;
import java.util.Objects;

import org.apache.http.HttpResponse;

/**
* Encapsulates an exception thrown from ApacheHttpClient response parsing.
*/
public class HttpResponseException extends IOException {
private final HttpResponse httpResponse;
public HttpResponseException(final String s, final HttpResponse httpResponse) {
super(s);
Objects.requireNonNull(httpResponse, "httpResponse should be non-null");
this.httpResponse = httpResponse;
}

public HttpResponse getHttpResponse() {
return httpResponse;
}
}
Loading

0 comments on commit b60497f

Please sign in to comment.