-
Notifications
You must be signed in to change notification settings - Fork 165
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-5124 configurable http timeouts #5125
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,7 @@ | |
import org.apache.http.client.utils.HttpClientUtils; | ||
import org.apache.http.impl.client.CloseableHttpClient; | ||
import org.apache.http.impl.client.HttpClientBuilder; | ||
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; | ||
import org.apache.http.protocol.HttpContext; | ||
import org.eclipse.rdf4j.http.client.util.HttpClientBuilders; | ||
import org.slf4j.Logger; | ||
|
@@ -53,6 +54,163 @@ public class SharedHttpClientSessionManager implements HttpClientSessionManager, | |
|
||
private static final AtomicLong threadCount = new AtomicLong(); | ||
|
||
// System property constants for regular timeouts | ||
|
||
/** | ||
* Configurable system property {@code org.eclipse.rdf4j.client.http.connectionTimeout} for specifying the HTTP | ||
* connection timeout in milliseconds for general use. Default is 1 hour. | ||
* | ||
* <p> | ||
* The connection timeout determines the maximum time the client will wait to establish a TCP connection to the | ||
* server. A default of 1 hour is set to allow for potential network delays without causing unnecessary timeouts. | ||
* </p> | ||
*/ | ||
public static final String CONNECTION_TIMEOUT_PROPERTY = "org.eclipse.rdf4j.client.http.connectionTimeout"; | ||
|
||
/** | ||
* Configurable system property {@code org.eclipse.rdf4j.client.http.connectionRequestTimeout} for specifying the | ||
* HTTP connection request timeout in milliseconds for general use. Default is 10 days. | ||
* | ||
* <p> | ||
* The connection request timeout defines how long the client will wait for a connection from the connection pool. A | ||
* longer timeout is acceptable here since operations like large file uploads may need to wait for an available | ||
* connection. | ||
* </p> | ||
*/ | ||
public static final String CONNECTION_REQUEST_TIMEOUT_PROPERTY = "org.eclipse.rdf4j.client.http.connectionRequestTimeout"; | ||
|
||
/** | ||
* Configurable system property {@code org.eclipse.rdf4j.client.http.socketTimeout} for specifying the HTTP socket | ||
* timeout in milliseconds for general use. Default is 10 days. | ||
* | ||
* <p> | ||
* The socket timeout controls the maximum period of inactivity between data packets during data transfer. A longer | ||
* timeout is appropriate for large data transfers, ensuring that operations are not interrupted prematurely. | ||
* </p> | ||
*/ | ||
public static final String SOCKET_TIMEOUT_PROPERTY = "org.eclipse.rdf4j.client.http.socketTimeout"; | ||
|
||
// System property constants for SPARQL SERVICE timeouts | ||
|
||
/** | ||
* Configurable system property {@code org.eclipse.rdf4j.client.sparql.http.connectionTimeout} for specifying the | ||
* HTTP connection timeout in milliseconds when used in SPARQL SERVICE calls. Default is 10 minutes. | ||
* | ||
* <p> | ||
* A shorter connection timeout is set for SPARQL SERVICE calls to quickly detect unresponsive endpoints in | ||
* federated queries, improving overall query performance by avoiding long waits for unreachable servers. | ||
* </p> | ||
*/ | ||
public static final String SPARQL_CONNECTION_TIMEOUT_PROPERTY = "org.eclipse.rdf4j.client.sparql.http.connectionTimeout"; | ||
|
||
/** | ||
* Configurable system property {@code org.eclipse.rdf4j.client.sparql.http.connectionRequestTimeout} for specifying | ||
* the HTTP connection request timeout in milliseconds when used in SPARQL SERVICE calls. Default is 6 hours. | ||
* | ||
* <p> | ||
* This timeout controls how long the client waits for a connection from the pool when making SPARQL SERVICE calls. | ||
* A shorter timeout than general use ensures that queries fail fast if resources are constrained, maintaining | ||
* responsiveness. | ||
* </p> | ||
*/ | ||
public static final String SPARQL_CONNECTION_REQUEST_TIMEOUT_PROPERTY = "org.eclipse.rdf4j.client.sparql.http.connectionRequestTimeout"; | ||
|
||
/** | ||
* Configurable system property {@code org.eclipse.rdf4j.client.sparql.http.socketTimeout} for specifying the HTTP | ||
* socket timeout in milliseconds when used in SPARQL SERVICE calls. Default is 6 hours. | ||
* | ||
* <p> | ||
* The socket timeout for SPARQL SERVICE calls is set to a shorter duration to detect unresponsive servers during | ||
* data transfer, ensuring that the client does not wait indefinitely for data that may never arrive. | ||
* </p> | ||
*/ | ||
public static final String SPARQL_SOCKET_TIMEOUT_PROPERTY = "org.eclipse.rdf4j.client.sparql.http.socketTimeout"; | ||
|
||
// Default timeout values for general use | ||
|
||
/** | ||
* Default HTTP connection timeout in milliseconds for general use. Set to 1 hour. | ||
*/ | ||
public static final int DEFAULT_CONNECTION_TIMEOUT = 60 * 60 * 1000; // 1 hour | ||
|
||
/** | ||
* Default HTTP connection request timeout in milliseconds for general use. Set to 10 days. | ||
*/ | ||
public static final int DEFAULT_CONNECTION_REQUEST_TIMEOUT = 10 * 24 * 60 * 60 * 1000; // 10 days | ||
|
||
/** | ||
* Default HTTP socket timeout in milliseconds for general use. Set to 10 days. | ||
*/ | ||
public static final int DEFAULT_SOCKET_TIMEOUT = 10 * 24 * 60 * 60 * 1000; // 10 days | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would suggest to define this value in the range of expected query timeouts, 10 days sounds unreasonable. What the value does:
so, in essence how long the connection remains open waiting between two packets in an established transfer. We have configured default values of 60 seconds for this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What I'm worried about here is what happens if you have some computationally heavy operation on the server side, SHACL validation or maybe inference. If the users actions trigger this then I'm curious what happens if the timeout is low. I know we have some sort of ping thread that makes sure that one or maybe both parties are still alive during a transaction, but if that thread is sharing the same http client pool then it might be a bit brittle. It's essentially the same situation as for the other timeouts. The default in the Apache HttpClient is infinite, and I don't want to break any existing code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, so the main use-case is to distinguish interactive from long-running queries. I am not sure whether this is possible, but can the read timeout be co-related with the query timeout on the query that is using the specific connection? |
||
|
||
// Default timeout values for SPARQL SERVICE calls | ||
|
||
/** | ||
* Default HTTP connection timeout in milliseconds for SPARQL SERVICE calls. Set to 10 minutes. | ||
*/ | ||
public static final int DEFAULT_SPARQL_CONNECTION_TIMEOUT = 10 * 60 * 1000; // 10 minutes | ||
|
||
/** | ||
* Default HTTP connection request timeout in milliseconds for SPARQL SERVICE calls. Set to 6 hours. | ||
*/ | ||
public static final int DEFAULT_SPARQL_CONNECTION_REQUEST_TIMEOUT = 6 * 60 * 60 * 1000; // 6 hours | ||
|
||
/** | ||
* Default HTTP socket timeout in milliseconds for SPARQL SERVICE calls. Set to 6 hours. | ||
*/ | ||
public static final int DEFAULT_SPARQL_SOCKET_TIMEOUT = 6 * 60 * 60 * 1000; // 6 hours | ||
|
||
// Timeout values as read from system properties or defaults | ||
|
||
/** | ||
* HTTP connection timeout in milliseconds for general use. | ||
*/ | ||
public static final int CONNECTION_TIMEOUT = Integer.parseInt( | ||
System.getProperty(CONNECTION_TIMEOUT_PROPERTY, String.valueOf(DEFAULT_CONNECTION_TIMEOUT)) | ||
); | ||
|
||
/** | ||
* HTTP connection request timeout in milliseconds for general use. | ||
*/ | ||
public static final int CONNECTION_REQUEST_TIMEOUT = Integer.parseInt( | ||
System.getProperty(CONNECTION_REQUEST_TIMEOUT_PROPERTY, String.valueOf(DEFAULT_CONNECTION_REQUEST_TIMEOUT)) | ||
); | ||
|
||
/** | ||
* HTTP socket timeout in milliseconds for general use. | ||
*/ | ||
public static final int SOCKET_TIMEOUT = Integer.parseInt( | ||
System.getProperty(SOCKET_TIMEOUT_PROPERTY, String.valueOf(DEFAULT_SOCKET_TIMEOUT)) | ||
); | ||
|
||
/** | ||
* HTTP connection timeout in milliseconds for SPARQL SERVICE calls. | ||
*/ | ||
public static final int SPARQL_CONNECTION_TIMEOUT = Integer.parseInt( | ||
System.getProperty(SPARQL_CONNECTION_TIMEOUT_PROPERTY, String.valueOf(DEFAULT_SPARQL_CONNECTION_TIMEOUT)) | ||
); | ||
|
||
/** | ||
* HTTP connection request timeout in milliseconds for SPARQL SERVICE calls. | ||
*/ | ||
public static final int SPARQL_CONNECTION_REQUEST_TIMEOUT = Integer.parseInt( | ||
System.getProperty(SPARQL_CONNECTION_REQUEST_TIMEOUT_PROPERTY, | ||
String.valueOf(DEFAULT_SPARQL_CONNECTION_REQUEST_TIMEOUT)) | ||
); | ||
|
||
/** | ||
* HTTP socket timeout in milliseconds for SPARQL SERVICE calls. | ||
*/ | ||
public static final int SPARQL_SOCKET_TIMEOUT = Integer.parseInt( | ||
System.getProperty(SPARQL_SOCKET_TIMEOUT_PROPERTY, String.valueOf(DEFAULT_SPARQL_SOCKET_TIMEOUT)) | ||
); | ||
|
||
// Variables for the currently used timeouts | ||
|
||
private int currentConnectionTimeout = CONNECTION_TIMEOUT; | ||
private int currentConnectionRequestTimeout = CONNECTION_REQUEST_TIMEOUT; | ||
private int currentSocketTimeout = SOCKET_TIMEOUT; | ||
|
||
private final Logger logger = LoggerFactory.getLogger(SharedHttpClientSessionManager.class); | ||
|
||
/** | ||
|
@@ -267,6 +425,7 @@ public void close() { | |
@Override | ||
public void shutDown() { | ||
try { | ||
// Close open sessions | ||
openSessions.keySet().forEach(session -> { | ||
try { | ||
session.close(); | ||
|
@@ -280,11 +439,11 @@ public void shutDown() { | |
HttpClientUtils.closeQuietly(toCloseDependentClient); | ||
} | ||
} finally { | ||
// Shutdown the executor | ||
try { | ||
executor.shutdown(); | ||
executor.awaitTermination(10, TimeUnit.SECONDS); | ||
} catch (InterruptedException e) { | ||
// Preserve the interrupt status so others can check it as necessary | ||
Thread.currentThread().interrupt(); | ||
} finally { | ||
if (!executor.isTerminated()) { | ||
|
@@ -314,17 +473,67 @@ protected final ExecutorService getExecutorService() { | |
} | ||
|
||
private CloseableHttpClient createHttpClient() { | ||
|
||
HttpClientBuilder nextHttpClientBuilder = httpClientBuilder; | ||
if (nextHttpClientBuilder != null) { | ||
return nextHttpClientBuilder.build(); | ||
} | ||
|
||
RequestConfig requestConfig = getDefaultRequestConfig(); | ||
|
||
return HttpClientBuilder.create() | ||
.evictExpiredConnections() | ||
.evictIdleConnections(30, TimeUnit.MINUTES) | ||
.setRetryHandler(retryHandlerStale) | ||
.setServiceUnavailableRetryStrategy(serviceUnavailableRetryHandler) | ||
.useSystemProperties() | ||
.setDefaultRequestConfig(RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD).build()) | ||
.setDefaultRequestConfig(requestConfig) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we are also using a configurable value for the max number of connections
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's a very good idea. What are your defaults? I think that part of the issue is simply that the default number of connections is too low, maybe even 1, which is quite terrible for the way that RDF4J typically does joins. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have currently 25 connections as default value (for quite some time). Note that we haven't done systematic benchmarks on the impact of adjusting the number of connections. |
||
.build(); | ||
} | ||
|
||
/** | ||
* Returns the default {@link RequestConfig} using the currently set timeout values. | ||
* | ||
* @return a configured {@link RequestConfig} with the current timeouts. | ||
*/ | ||
public RequestConfig getDefaultRequestConfig() { | ||
return RequestConfig.custom() | ||
.setConnectTimeout(currentConnectionTimeout) | ||
.setConnectionRequestTimeout(currentConnectionRequestTimeout) | ||
.setSocketTimeout(currentSocketTimeout) | ||
.setCookieSpec(CookieSpecs.STANDARD) | ||
.build(); | ||
} | ||
|
||
/** | ||
* Switches the current timeout settings to use the SPARQL-specific timeouts. This method should be called when | ||
* making SPARQL SERVICE calls to apply shorter timeout values. | ||
* | ||
* <p> | ||
* The SPARQL-specific timeouts are shorter to ensure that unresponsive or slow SPARQL endpoints do not cause long | ||
* delays in federated query processing. Quick detection of such issues improves the responsiveness and reliability | ||
* of SPARQL queries. | ||
* </p> | ||
*/ | ||
public void setDefaultSparqlServiceTimeouts() { | ||
this.currentConnectionTimeout = SPARQL_CONNECTION_TIMEOUT; | ||
this.currentConnectionRequestTimeout = SPARQL_CONNECTION_REQUEST_TIMEOUT; | ||
this.currentSocketTimeout = SPARQL_SOCKET_TIMEOUT; | ||
} | ||
|
||
/** | ||
* Resets the current timeout settings to the general timeouts. This method should be called to revert any changes | ||
* made by {@link #setDefaultSparqlServiceTimeouts()} and apply the general timeout values. | ||
* | ||
* <p> | ||
* The general timeouts are longer to accommodate operations that may take more time, such as large data transfers | ||
* or extensive processing, without causing premature timeouts. | ||
* </p> | ||
*/ | ||
public void setDefaultTimeouts() { | ||
this.currentConnectionTimeout = CONNECTION_TIMEOUT; | ||
this.currentConnectionRequestTimeout = CONNECTION_REQUEST_TIMEOUT; | ||
this.currentSocketTimeout = SOCKET_TIMEOUT; | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the connection timeout should be typically rather small (in the range of seconds): this is the timeout for establishing the first connection. In our application we have set the default to 5 seconds
Maybe you can explain the different timeouts also better in the javadoc?
https://www.baeldung.com/httpclient-timeout#properties
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is an explanation on the timeout variables, but not on the default constants.
I'm essentially afraid of breaking existing code, since the default timeouts for the Apache HttpClient is infinite.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about having the default be 60 seconds?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think for 99% of practical cases this is very long: this timeout is only for establishing the first connection (which is usually really fast or does not work at all). As a user you would want to know rather fast if the connection cannot be established (i.e. if the packets go into the nirvana). See also my comment w.r.t query timeout below
60s is definitely better than 1 hour though