diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index d6ae584e44e4..574f2f946819 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -100,6 +100,9 @@ API Changes * GITHUB#14426: Support determining desired off-heap memory requirements through KnnVectorsReader::getOffHeapByteSize (Chris Hegarty) +* GITHUB#14893: TieredMergePolicy minimum deletes percentage decreased from 5% inclusive to 0% exclusive. + (Stefan Vodita) + New Features --------------------- * GITHUB#14404: Introducing DocValuesMultiRangeQuery.SortedNumericStabbingBuilder into sandbox. diff --git a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java index 3c5f7ea72efa..400423df569b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java @@ -130,18 +130,25 @@ public double getMaxMergedSegmentMB() { /** * Sets the maximum percentage of doc id space taken by deleted docs. The denominator includes * both active and deleted documents. Lower values make the index more space efficient at the - * expense of increased CPU and I/O activity. Values must be between 5 and 50. Default value is + * expense of increased CPU and I/O activity. Values must be between 0 and 50. Default value is * 20. * *

When the maximum delete percentage is lowered, the indexing thread will call for merges more * often, meaning that write amplification factor will be increased. Write amplification factor * measures the number of times each document in the index is written. A higher write * amplification factor will lead to higher CPU and I/O activity as indicated above. + * + *

Values below 5% can lead to exceptionally high merge cost where indexing will continuously + * merge nearly all segments, and select newly merged segments immediately for merging again, + * often forcing degenerate merge selection like singleton merges. If you venture into this dark + * forest, consider limiting the maximum number of concurrent merges and threads (see {@link + * ConcurrentMergeScheduler#setMaxMergesAndThreads}) as a coarse attempt to bound the otherwise + * pathological indexing behavior. */ public TieredMergePolicy setDeletesPctAllowed(double v) { - if (v < 5 || v > 50) { + if (v <= 0 || v > 50) { throw new IllegalArgumentException( - "indexPctDeletedTarget must be >= 5.0 and <= 50 (got " + v + ")"); + "indexPctDeletedTarget must be > 0 and <= 50 (got " + v + ")"); } deletesPctAllowed = v; return this;