Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-5124 configurable http timeouts #5125

Merged
merged 3 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.http.client.utils.HttpClientUtils;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.protocol.HttpContext;
import org.eclipse.rdf4j.http.client.util.HttpClientBuilders;
import org.slf4j.Logger;
Expand All @@ -53,6 +54,163 @@ public class SharedHttpClientSessionManager implements HttpClientSessionManager,

private static final AtomicLong threadCount = new AtomicLong();

// System property constants for regular timeouts

/**
* Configurable system property {@code org.eclipse.rdf4j.client.http.connectionTimeout} for specifying the HTTP
* connection timeout in milliseconds for general use. Default is 1 hour.
*
* <p>
* The connection timeout determines the maximum time the client will wait to establish a TCP connection to the
* server. A default of 1 hour is set to allow for potential network delays without causing unnecessary timeouts.
* </p>
*/
public static final String CONNECTION_TIMEOUT_PROPERTY = "org.eclipse.rdf4j.client.http.connectionTimeout";

/**
* Configurable system property {@code org.eclipse.rdf4j.client.http.connectionRequestTimeout} for specifying the
* HTTP connection request timeout in milliseconds for general use. Default is 10 days.
*
* <p>
* The connection request timeout defines how long the client will wait for a connection from the connection pool. A
* longer timeout is acceptable here since operations like large file uploads may need to wait for an available
* connection.
* </p>
*/
public static final String CONNECTION_REQUEST_TIMEOUT_PROPERTY = "org.eclipse.rdf4j.client.http.connectionRequestTimeout";

/**
* Configurable system property {@code org.eclipse.rdf4j.client.http.socketTimeout} for specifying the HTTP socket
* timeout in milliseconds for general use. Default is 10 days.
*
* <p>
* The socket timeout controls the maximum period of inactivity between data packets during data transfer. A longer
* timeout is appropriate for large data transfers, ensuring that operations are not interrupted prematurely.
* </p>
*/
public static final String SOCKET_TIMEOUT_PROPERTY = "org.eclipse.rdf4j.client.http.socketTimeout";

// System property constants for SPARQL SERVICE timeouts

/**
* Configurable system property {@code org.eclipse.rdf4j.client.sparql.http.connectionTimeout} for specifying the
* HTTP connection timeout in milliseconds when used in SPARQL SERVICE calls. Default is 10 minutes.
*
* <p>
* A shorter connection timeout is set for SPARQL SERVICE calls to quickly detect unresponsive endpoints in
* federated queries, improving overall query performance by avoiding long waits for unreachable servers.
* </p>
*/
public static final String SPARQL_CONNECTION_TIMEOUT_PROPERTY = "org.eclipse.rdf4j.client.sparql.http.connectionTimeout";

/**
* Configurable system property {@code org.eclipse.rdf4j.client.sparql.http.connectionRequestTimeout} for specifying
* the HTTP connection request timeout in milliseconds when used in SPARQL SERVICE calls. Default is 6 hours.
*
* <p>
* This timeout controls how long the client waits for a connection from the pool when making SPARQL SERVICE calls.
* A shorter timeout than general use ensures that queries fail fast if resources are constrained, maintaining
* responsiveness.
* </p>
*/
public static final String SPARQL_CONNECTION_REQUEST_TIMEOUT_PROPERTY = "org.eclipse.rdf4j.client.sparql.http.connectionRequestTimeout";

/**
* Configurable system property {@code org.eclipse.rdf4j.client.sparql.http.socketTimeout} for specifying the HTTP
* socket timeout in milliseconds when used in SPARQL SERVICE calls. Default is 6 hours.
*
* <p>
* The socket timeout for SPARQL SERVICE calls is set to a shorter duration to detect unresponsive servers during
* data transfer, ensuring that the client does not wait indefinitely for data that may never arrive.
* </p>
*/
public static final String SPARQL_SOCKET_TIMEOUT_PROPERTY = "org.eclipse.rdf4j.client.sparql.http.socketTimeout";

// Default timeout values for general use

/**
* Default HTTP connection timeout in milliseconds for general use. Set to 1 hour.
*/
public static final int DEFAULT_CONNECTION_TIMEOUT = 60 * 60 * 1000; // 1 hour
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the connection timeout should be typically rather small (in the range of seconds): this is the timeout for establishing the first connection. In our application we have set the default to 5 seconds

Maybe you can explain the different timeouts also better in the javadoc?

https://www.baeldung.com/httpclient-timeout#properties

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is an explanation on the timeout variables, but not on the default constants.

I'm essentially afraid of breaking existing code, since the default timeouts for the Apache HttpClient is infinite.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about having the default be 60 seconds?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about having the default be 60 seconds?

I think for 99% of practical cases this is very long: this timeout is only for establishing the first connection (which is usually really fast or does not work at all). As a user you would want to know rather fast if the connection cannot be established (i.e. if the packets go into the nirvana). See also my comment w.r.t query timeout below

60s is definitely better than 1 hour though


/**
* Default HTTP connection request timeout in milliseconds for general use. Set to 10 days.
*/
public static final int DEFAULT_CONNECTION_REQUEST_TIMEOUT = 10 * 24 * 60 * 60 * 1000; // 10 days

/**
* Default HTTP socket timeout in milliseconds for general use. Set to 10 days.
*/
public static final int DEFAULT_SOCKET_TIMEOUT = 10 * 24 * 60 * 60 * 1000; // 10 days
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest to define this value in the range of expected query timeouts, 10 days sounds unreasonable.

What the value does:

the time waiting for data – after establishing the connection; maximum time of inactivity between two data packets

so, in essence how long the connection remains open waiting between two packets in an established transfer. We have configured default values of 60 seconds for this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I'm worried about here is what happens if you have some computationally heavy operation on the server side, SHACL validation or maybe inference. If the users actions trigger this then I'm curious what happens if the timeout is low.

I know we have some sort of ping thread that makes sure that one or maybe both parties are still alive during a transaction, but if that thread is sharing the same http client pool then it might be a bit brittle.

It's essentially the same situation as for the other timeouts. The default in the Apache HttpClient is infinite, and I don't want to break any existing code.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, so the main use-case is to distinguish interactive from long-running queries. I am not sure whether this is possible, but can the read timeout be co-related with the query timeout on the query that is using the specific connection?


// Default timeout values for SPARQL SERVICE calls

/**
* Default HTTP connection timeout in milliseconds for SPARQL SERVICE calls. Set to 10 minutes.
*/
public static final int DEFAULT_SPARQL_CONNECTION_TIMEOUT = 10 * 60 * 1000; // 10 minutes

/**
* Default HTTP connection request timeout in milliseconds for SPARQL SERVICE calls. Set to 6 hours.
*/
public static final int DEFAULT_SPARQL_CONNECTION_REQUEST_TIMEOUT = 6 * 60 * 60 * 1000; // 6 hours

/**
* Default HTTP socket timeout in milliseconds for SPARQL SERVICE calls. Set to 6 hours.
*/
public static final int DEFAULT_SPARQL_SOCKET_TIMEOUT = 6 * 60 * 60 * 1000; // 6 hours

// Timeout values as read from system properties or defaults

/**
* HTTP connection timeout in milliseconds for general use.
*/
public static final int CONNECTION_TIMEOUT = Integer.parseInt(
System.getProperty(CONNECTION_TIMEOUT_PROPERTY, String.valueOf(DEFAULT_CONNECTION_TIMEOUT))
);

/**
* HTTP connection request timeout in milliseconds for general use.
*/
public static final int CONNECTION_REQUEST_TIMEOUT = Integer.parseInt(
System.getProperty(CONNECTION_REQUEST_TIMEOUT_PROPERTY, String.valueOf(DEFAULT_CONNECTION_REQUEST_TIMEOUT))
);

/**
* HTTP socket timeout in milliseconds for general use.
*/
public static final int SOCKET_TIMEOUT = Integer.parseInt(
System.getProperty(SOCKET_TIMEOUT_PROPERTY, String.valueOf(DEFAULT_SOCKET_TIMEOUT))
);

/**
* HTTP connection timeout in milliseconds for SPARQL SERVICE calls.
*/
public static final int SPARQL_CONNECTION_TIMEOUT = Integer.parseInt(
System.getProperty(SPARQL_CONNECTION_TIMEOUT_PROPERTY, String.valueOf(DEFAULT_SPARQL_CONNECTION_TIMEOUT))
);

/**
* HTTP connection request timeout in milliseconds for SPARQL SERVICE calls.
*/
public static final int SPARQL_CONNECTION_REQUEST_TIMEOUT = Integer.parseInt(
System.getProperty(SPARQL_CONNECTION_REQUEST_TIMEOUT_PROPERTY,
String.valueOf(DEFAULT_SPARQL_CONNECTION_REQUEST_TIMEOUT))
);

/**
* HTTP socket timeout in milliseconds for SPARQL SERVICE calls.
*/
public static final int SPARQL_SOCKET_TIMEOUT = Integer.parseInt(
System.getProperty(SPARQL_SOCKET_TIMEOUT_PROPERTY, String.valueOf(DEFAULT_SPARQL_SOCKET_TIMEOUT))
);

// Variables for the currently used timeouts

private int currentConnectionTimeout = CONNECTION_TIMEOUT;
private int currentConnectionRequestTimeout = CONNECTION_REQUEST_TIMEOUT;
private int currentSocketTimeout = SOCKET_TIMEOUT;

private final Logger logger = LoggerFactory.getLogger(SharedHttpClientSessionManager.class);

/**
Expand Down Expand Up @@ -267,6 +425,7 @@ public void close() {
@Override
public void shutDown() {
try {
// Close open sessions
openSessions.keySet().forEach(session -> {
try {
session.close();
Expand All @@ -280,11 +439,11 @@ public void shutDown() {
HttpClientUtils.closeQuietly(toCloseDependentClient);
}
} finally {
// Shutdown the executor
try {
executor.shutdown();
executor.awaitTermination(10, TimeUnit.SECONDS);
} catch (InterruptedException e) {
// Preserve the interrupt status so others can check it as necessary
Thread.currentThread().interrupt();
} finally {
if (!executor.isTerminated()) {
Expand Down Expand Up @@ -314,17 +473,67 @@ protected final ExecutorService getExecutorService() {
}

private CloseableHttpClient createHttpClient() {

HttpClientBuilder nextHttpClientBuilder = httpClientBuilder;
if (nextHttpClientBuilder != null) {
return nextHttpClientBuilder.build();
}

RequestConfig requestConfig = getDefaultRequestConfig();

return HttpClientBuilder.create()
.evictExpiredConnections()
.evictIdleConnections(30, TimeUnit.MINUTES)
.setRetryHandler(retryHandlerStale)
.setServiceUnavailableRetryStrategy(serviceUnavailableRetryHandler)
.useSystemProperties()
.setDefaultRequestConfig(RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD).build())
.setDefaultRequestConfig(requestConfig)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we are also using a configurable value for the max number of connections

.setMaxConnPerRoute(maxConnections)
.setMaxConnTotal(maxConnections)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a very good idea. What are your defaults?

I think that part of the issue is simply that the default number of connections is too low, maybe even 1, which is quite terrible for the way that RDF4J typically does joins.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have currently 25 connections as default value (for quite some time). Note that we haven't done systematic benchmarks on the impact of adjusting the number of connections.

.build();
}

/**
* Returns the default {@link RequestConfig} using the currently set timeout values.
*
* @return a configured {@link RequestConfig} with the current timeouts.
*/
public RequestConfig getDefaultRequestConfig() {
return RequestConfig.custom()
.setConnectTimeout(currentConnectionTimeout)
.setConnectionRequestTimeout(currentConnectionRequestTimeout)
.setSocketTimeout(currentSocketTimeout)
.setCookieSpec(CookieSpecs.STANDARD)
.build();
}

/**
* Switches the current timeout settings to use the SPARQL-specific timeouts. This method should be called when
* making SPARQL SERVICE calls to apply shorter timeout values.
*
* <p>
* The SPARQL-specific timeouts are shorter to ensure that unresponsive or slow SPARQL endpoints do not cause long
* delays in federated query processing. Quick detection of such issues improves the responsiveness and reliability
* of SPARQL queries.
* </p>
*/
public void setDefaultSparqlServiceTimeouts() {
this.currentConnectionTimeout = SPARQL_CONNECTION_TIMEOUT;
this.currentConnectionRequestTimeout = SPARQL_CONNECTION_REQUEST_TIMEOUT;
this.currentSocketTimeout = SPARQL_SOCKET_TIMEOUT;
}

/**
* Resets the current timeout settings to the general timeouts. This method should be called to revert any changes
* made by {@link #setDefaultSparqlServiceTimeouts()} and apply the general timeout values.
*
* <p>
* The general timeouts are longer to accommodate operations that may take more time, such as large data transfers
* or extensive processing, without causing premature timeouts.
* </p>
*/
public void setDefaultTimeouts() {
this.currentConnectionTimeout = CONNECTION_TIMEOUT;
this.currentConnectionRequestTimeout = CONNECTION_REQUEST_TIMEOUT;
this.currentSocketTimeout = SOCKET_TIMEOUT;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,19 @@ public void setHttpClientSessionManager(HttpClientSessionManager client) {

@Override
public HttpClient getHttpClient() {
return getHttpClientSessionManager().getHttpClient();
HttpClientSessionManager httpClientSessionManager = getHttpClientSessionManager();

try {
if (httpClientSessionManager instanceof SharedHttpClientSessionManager) {
((SharedHttpClientSessionManager) httpClientSessionManager).setDefaultSparqlServiceTimeouts();
}
return getHttpClientSessionManager().getHttpClient();
} finally {
if (httpClientSessionManager instanceof SharedHttpClientSessionManager) {
((SharedHttpClientSessionManager) httpClientSessionManager).setDefaultTimeouts();
}
}

}

@Override
Expand All @@ -86,6 +98,7 @@ public void setHttpClient(HttpClient httpClient) {
getHttpClientSessionManager();
toSetDependentClient = dependentClient;
}

// The strange lifecycle results in the possibility that the
// dependentClient will be null due to a call to setSesameClient, so add
// a null guard here for that possibility
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
*******************************************************************************/
package org.eclipse.rdf4j.federated.endpoint.provider;

import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.eclipse.rdf4j.federated.endpoint.Endpoint;
Expand Down Expand Up @@ -40,13 +42,19 @@ public Endpoint loadEndpoint(RemoteRepositoryRepositoryInformation repoInfo)
}

try {

HTTPRepository repo = new HTTPRepository(repositoryServer, repositoryName);
SharedHttpClientSessionManager httpClientSessionManager = (SharedHttpClientSessionManager) repo
.getHttpClientSessionManager();

HttpClientBuilder httpClientBuilder = HttpClients.custom()
.useSystemProperties()
.setDefaultRequestConfig(httpClientSessionManager.getDefaultRequestConfig())
.setMaxConnTotal(20)
.setMaxConnPerRoute(20);
((SharedHttpClientSessionManager) repo.getHttpClientSessionManager())
.setHttpClientBuilder(httpClientBuilder);

httpClientSessionManager.setHttpClientBuilder(httpClientBuilder);

try {
repo.init();
} finally {
Expand Down
Loading