From 0eacaf833fbad897670a22d9a9dd265ee5a80474 Mon Sep 17 00:00:00 2001 From: Stefan Vodita Date: Tue, 1 Jul 2025 15:08:45 +0000 Subject: [PATCH 1/3] Decrease minimum deletes percentage in TMP --- lucene/CHANGES.txt | 3 +++ .../src/java/org/apache/lucene/index/TieredMergePolicy.java | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index d6ae584e44e4..80a696a9f375 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -100,6 +100,9 @@ API Changes * GITHUB#14426: Support determining desired off-heap memory requirements through KnnVectorsReader::getOffHeapByteSize (Chris Hegarty) +* GITHUB#?????: TieredMergePolicy minimum deletes percentage decreased from 5% inclusive to 0% exclusive. + (Stefan Vodita) + New Features --------------------- * GITHUB#14404: Introducing DocValuesMultiRangeQuery.SortedNumericStabbingBuilder into sandbox. diff --git a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java index 3c5f7ea72efa..08b45bb7b51b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java @@ -130,7 +130,7 @@ public double getMaxMergedSegmentMB() { /** * Sets the maximum percentage of doc id space taken by deleted docs. The denominator includes * both active and deleted documents. Lower values make the index more space efficient at the - * expense of increased CPU and I/O activity. Values must be between 5 and 50. Default value is + * expense of increased CPU and I/O activity. Values must be between 0 and 50. Default value is * 20. * *

When the maximum delete percentage is lowered, the indexing thread will call for merges more @@ -139,9 +139,9 @@ public double getMaxMergedSegmentMB() { * amplification factor will lead to higher CPU and I/O activity as indicated above. */ public TieredMergePolicy setDeletesPctAllowed(double v) { - if (v < 5 || v > 50) { + if (v <= 0 || v > 50) { throw new IllegalArgumentException( - "indexPctDeletedTarget must be >= 5.0 and <= 50 (got " + v + ")"); + "indexPctDeletedTarget must be > 0 and <= 50 (got " + v + ")"); } deletesPctAllowed = v; return this; From e65e8a27862f0c0dd6e61a701e3a58ea4d7e60a1 Mon Sep 17 00:00:00 2001 From: Stefan Vodita Date: Wed, 2 Jul 2025 07:56:58 +0000 Subject: [PATCH 2/3] Add PR ID to CHANGES --- lucene/CHANGES.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 80a696a9f375..574f2f946819 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -100,7 +100,7 @@ API Changes * GITHUB#14426: Support determining desired off-heap memory requirements through KnnVectorsReader::getOffHeapByteSize (Chris Hegarty) -* GITHUB#?????: TieredMergePolicy minimum deletes percentage decreased from 5% inclusive to 0% exclusive. +* GITHUB#14893: TieredMergePolicy minimum deletes percentage decreased from 5% inclusive to 0% exclusive. (Stefan Vodita) New Features From 4188f26b4b329edafba32580aee391d644c68f2e Mon Sep 17 00:00:00 2001 From: Stefan Vodita Date: Mon, 7 Jul 2025 13:50:02 +0000 Subject: [PATCH 3/3] Expand javadoc with a warning --- .../java/org/apache/lucene/index/TieredMergePolicy.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java index 08b45bb7b51b..400423df569b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java @@ -137,6 +137,13 @@ public double getMaxMergedSegmentMB() { * often, meaning that write amplification factor will be increased. Write amplification factor * measures the number of times each document in the index is written. A higher write * amplification factor will lead to higher CPU and I/O activity as indicated above. + * + *

Values below 5% can lead to exceptionally high merge cost where indexing will continuously + * merge nearly all segments, and select newly merged segments immediately for merging again, + * often forcing degenerate merge selection like singleton merges. If you venture into this dark + * forest, consider limiting the maximum number of concurrent merges and threads (see {@link + * ConcurrentMergeScheduler#setMaxMergesAndThreads}) as a coarse attempt to bound the otherwise + * pathological indexing behavior. */ public TieredMergePolicy setDeletesPctAllowed(double v) { if (v <= 0 || v > 50) {