From de01c3c9b04a712591f774295bc84978ded80bea Mon Sep 17 00:00:00 2001 From: Manfred Baedke Date: Mon, 6 Jan 2025 17:01:25 +0100 Subject: [PATCH 1/5] OAK-11284: Greedy Reuse of cluster IDs may lead to synchronous LastRevRecovery executions slowing down startup Added documentation. --- oak-doc/src/site/markdown/nodestore/documentmk.md | 14 ++++++++++++++ .../oak/plugins/document/LastRevRecoveryAgent.java | 2 ++ 2 files changed, 16 insertions(+) diff --git a/oak-doc/src/site/markdown/nodestore/documentmk.md b/oak-doc/src/site/markdown/nodestore/documentmk.md index df6db50da0a..60eb72f990e 100644 --- a/oak-doc/src/site/markdown/nodestore/documentmk.md +++ b/oak-doc/src/site/markdown/nodestore/documentmk.md @@ -773,6 +773,20 @@ the `machine` and `instance` fields. This behaviour is new and was introduced with Oak 1.10. Previous versions ignore entries that do not match the environment and would create a new entry. +Note that while this behavior is usually beneficial, there are circumstances +under which it may lead to very slow startup times for cluster nodes that try +to acquire a node ID that has not been shut down gracefully and has been +inactive for a long time. This is due to synchronous recovery operations that +are necessary to guarantee the consistency of the cluster (for details see +[Recovery for a cluster node ID](#recovery-for-a-cluster-node-id)). + +To avoid that, the maximum duration of the synchronous recovery may be +limited using the system property `oak.documentMK.syncRecoveryTimeoutMillis`. +A positive value will specify this maximum duration in milliseconds, while a +negative value doesn't limit the recovery time. The default is `-1`. +If the duration is exceeded, the node will no longer try to reuse the ID +and pick one that doesn't need recovery. + ### Update lease for a cluster node ID Each running cluster node updates the `leaseEnd` time of the cluster node ID diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/LastRevRecoveryAgent.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/LastRevRecoveryAgent.java index b3b44582db7..4dee0c44671 100644 --- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/LastRevRecoveryAgent.java +++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/LastRevRecoveryAgent.java @@ -81,6 +81,8 @@ public class LastRevRecoveryAgent { private final Consumer afterRecovery; + //OAK-11284: optionally limit the maximum duration of a synchronous recovery operation that may occur when + //inactive node IDs are reused. private static final long SYNC_RECOVERY_TIMEOUT_MILLIS = SystemPropertySupplier .create("oak.documentMK.syncRecoveryTimeoutMillis", -1) From 130bfdb12bd7ec4b299beec0a21b21fc66cc527b Mon Sep 17 00:00:00 2001 From: mbaedke Date: Wed, 8 Jan 2025 11:13:27 +0100 Subject: [PATCH 2/5] Added documentation --- oak-doc/src/site/markdown/nodestore/documentmk.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/oak-doc/src/site/markdown/nodestore/documentmk.md b/oak-doc/src/site/markdown/nodestore/documentmk.md index 60eb72f990e..f3be34ce510 100644 --- a/oak-doc/src/site/markdown/nodestore/documentmk.md +++ b/oak-doc/src/site/markdown/nodestore/documentmk.md @@ -785,7 +785,9 @@ limited using the system property `oak.documentMK.syncRecoveryTimeoutMillis`. A positive value will specify this maximum duration in milliseconds, while a negative value doesn't limit the recovery time. The default is `-1`. If the duration is exceeded, the node will no longer try to reuse the ID -and pick one that doesn't need recovery. +and pick one that doesn't need recovery. Note that this feature has been +specifically designed for a certain unusual Oak deployment and is not +recommended for general use. ### Update lease for a cluster node ID From 916f68b1dbe186deccc817b806fb98858ee07689 Mon Sep 17 00:00:00 2001 From: mbaedke Date: Wed, 8 Jan 2025 12:57:43 +0100 Subject: [PATCH 3/5] Update oak-doc/src/site/markdown/nodestore/documentmk.md Co-authored-by: Julian Reschke --- oak-doc/src/site/markdown/nodestore/documentmk.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/oak-doc/src/site/markdown/nodestore/documentmk.md b/oak-doc/src/site/markdown/nodestore/documentmk.md index f3be34ce510..cc95626a8fa 100644 --- a/oak-doc/src/site/markdown/nodestore/documentmk.md +++ b/oak-doc/src/site/markdown/nodestore/documentmk.md @@ -785,7 +785,9 @@ limited using the system property `oak.documentMK.syncRecoveryTimeoutMillis`. A positive value will specify this maximum duration in milliseconds, while a negative value doesn't limit the recovery time. The default is `-1`. If the duration is exceeded, the node will no longer try to reuse the ID -and pick one that doesn't need recovery. Note that this feature has been +and pick one that doesn't need recovery. + +Note that this feature has been specifically designed for a certain unusual Oak deployment and is not recommended for general use. From 722f8215762b94b7989be850e01b78eec4078f43 Mon Sep 17 00:00:00 2001 From: mbaedke Date: Wed, 8 Jan 2025 12:58:01 +0100 Subject: [PATCH 4/5] Update oak-doc/src/site/markdown/nodestore/documentmk.md Co-authored-by: Julian Reschke --- oak-doc/src/site/markdown/nodestore/documentmk.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oak-doc/src/site/markdown/nodestore/documentmk.md b/oak-doc/src/site/markdown/nodestore/documentmk.md index cc95626a8fa..1a8e10f7e92 100644 --- a/oak-doc/src/site/markdown/nodestore/documentmk.md +++ b/oak-doc/src/site/markdown/nodestore/documentmk.md @@ -788,7 +788,7 @@ If the duration is exceeded, the node will no longer try to reuse the ID and pick one that doesn't need recovery. Note that this feature has been -specifically designed for a certain unusual Oak deployment and is not +specifically designed for unusual Oak deployments (requiring significantly longer lease timeouts) and is not recommended for general use. ### Update lease for a cluster node ID From f727d4f874ec621a436bbd75ce72232b71524a2d Mon Sep 17 00:00:00 2001 From: mbaedke Date: Wed, 8 Jan 2025 13:12:46 +0100 Subject: [PATCH 5/5] Update documentmk.md --- oak-doc/src/site/markdown/nodestore/documentmk.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oak-doc/src/site/markdown/nodestore/documentmk.md b/oak-doc/src/site/markdown/nodestore/documentmk.md index 1a8e10f7e92..c3a504b63b4 100644 --- a/oak-doc/src/site/markdown/nodestore/documentmk.md +++ b/oak-doc/src/site/markdown/nodestore/documentmk.md @@ -787,8 +787,8 @@ negative value doesn't limit the recovery time. The default is `-1`. If the duration is exceeded, the node will no longer try to reuse the ID and pick one that doesn't need recovery. -Note that this feature has been -specifically designed for unusual Oak deployments (requiring significantly longer lease timeouts) and is not +Note that this feature has been specifically designed for unusual Oak +deployments (requiring significantly longer lease timeouts) and is not recommended for general use. ### Update lease for a cluster node ID