Skip to content

Commit

Permalink
Update Apache Lucene to 9.12.0 (opensearch-project#15333)
Browse files Browse the repository at this point in the history
* Update Apache Lucene to 9.12.0

Signed-off-by: Andriy Redko <[email protected]>

* change to IOContext READONCE in locations where the file is not expected to be read multiple times.

Signed-off-by: Marc Handalian <[email protected]>

* Use READ IOContext for all non Segment* files when copying node-node

Signed-off-by: Marc Handalian <[email protected]>

* Fixing more test failures

Signed-off-by: Andriy Redko <[email protected]>

* Move Composite912Codec under org.opensearch.index.codec.composite.composite912 package

Signed-off-by: Andriy Redko <[email protected]>

---------

Signed-off-by: Andriy Redko <[email protected]>
Signed-off-by: Marc Handalian <[email protected]>
Co-authored-by: Marc Handalian <[email protected]>
  • Loading branch information
2 people authored and dk2k committed Oct 21, 2024
1 parent 71b0255 commit 6654b52
Show file tree
Hide file tree
Showing 84 changed files with 231 additions and 148 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Bump `dnsjava:dnsjava` from 3.6.1 to 3.6.2 ([#16041](https://github.com/opensearch-project/OpenSearch/pull/16041))
- Bump `com.maxmind.geoip2:geoip2` from 4.2.0 to 4.2.1 ([#16042](https://github.com/opensearch-project/OpenSearch/pull/16042))
- Bump `com.maxmind.db:maxmind-db` from 3.1.0 to 3.1.1 ([#16137](https://github.com/opensearch-project/OpenSearch/pull/16137))
- Bump Apache lucene from 9.11.1 to 9.12.0 ([#15333](https://github.com/opensearch-project/OpenSearch/pull/15333))
- Bump `com.azure:azure-core-http-netty` from 1.15.3 to 1.15.4 ([#16133](https://github.com/opensearch-project/OpenSearch/pull/16133))
- Bump `org.jline:jline` from 3.26.3 to 3.27.0 ([#16135](https://github.com/opensearch-project/OpenSearch/pull/16135))
- Bump `netty` from 4.1.112.Final to 4.1.114.Final ([#16182](https://github.com/opensearch-project/OpenSearch/pull/16182))
Expand Down
2 changes: 1 addition & 1 deletion buildSrc/version.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
opensearch = 3.0.0
lucene = 9.12.0-snapshot-847316d
lucene = 9.12.0

bundled_jdk_vendor = adoptium
bundled_jdk = 21.0.4+7
Expand Down

This file was deleted.

1 change: 1 addition & 0 deletions libs/core/licenses/lucene-core-9.12.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
fdb055d569bb20bfce9618fe2b01c29bab7f290c

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
5ba843374a0aab3dfe0b11cb28b251844d85bf5b

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a9232b6a4882979118d3281b98dfdb6e0e1cb5ca

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a3a6950ffc22e76a082e1b3cefb022b9f7870d29

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
e71f85b72ed3939039ba8897b28b065dd11918b9

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
6baa3ae7ab20d6e644cf0bedb271c50a44c0e259

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
f183e1e8b1eaaa4dec444774a285bb8b66518522

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
b81a609934e65d12ab9d2d84bc2ea6f56a360e57

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bec069f286b45f20b743c81e84202369cd0467e7
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

package org.opensearch.plugin.correlation.core.index.codec;

import org.apache.lucene.backward_codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.plugin.correlation.core.index.codec.correlation990.CorrelationCodec;
import org.opensearch.plugin.correlation.core.index.codec.correlation990.PerFieldCorrelationVectorsFormat;
Expand Down

This file was deleted.

1 change: 1 addition & 0 deletions server/licenses/lucene-analysis-common-9.12.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
4c2503cfaba37249e20ea877555cb52ee89d1ae1

This file was deleted.

1 change: 1 addition & 0 deletions server/licenses/lucene-backward-codecs-9.12.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
68fe98c94e9644a584ea1bf525e68d9406fc61ec

This file was deleted.

1 change: 1 addition & 0 deletions server/licenses/lucene-core-9.12.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
fdb055d569bb20bfce9618fe2b01c29bab7f290c

This file was deleted.

1 change: 1 addition & 0 deletions server/licenses/lucene-grouping-9.12.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ccf99f8db57aa97b2c1f95c5cc2a11156a043921

This file was deleted.

1 change: 1 addition & 0 deletions server/licenses/lucene-highlighter-9.12.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
e93429f66fbcd3b58d81f01223d6ce5688047296

This file was deleted.

1 change: 1 addition & 0 deletions server/licenses/lucene-join-9.12.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
14c802d6955eaf11107375a2ada8fe8ec53b3e01

This file was deleted.

1 change: 1 addition & 0 deletions server/licenses/lucene-memory-9.12.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ffe090339540876b40df792aee51a42af6b3f37f

This file was deleted.

1 change: 1 addition & 0 deletions server/licenses/lucene-misc-9.12.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ad17704ee90eb926b6d3105f7027485cdadbecd9

This file was deleted.

1 change: 1 addition & 0 deletions server/licenses/lucene-queries-9.12.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3ac2a62b0b55c5725bb65f0c5454f9f8a401cf43

This file was deleted.

1 change: 1 addition & 0 deletions server/licenses/lucene-queryparser-9.12.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
55959399373876f4c184944315458dc6b88fbd81

This file was deleted.

1 change: 1 addition & 0 deletions server/licenses/lucene-sandbox-9.12.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
f65882536d681c11a1cbc920e5679201101e3603

This file was deleted.

1 change: 1 addition & 0 deletions server/licenses/lucene-spatial-extras-9.12.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
9d00cc7cc2279822ef6740f0677cafacfb439fa8

This file was deleted.

1 change: 1 addition & 0 deletions server/licenses/lucene-spatial3d-9.12.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
e3092632ca1d4427d3ebb2c866ac89d90f5b61ec

This file was deleted.

1 change: 1 addition & 0 deletions server/licenses/lucene-suggest-9.12.0.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
e1c6636499317ebe498f3490a1ec8b86b8a363dd
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ public void testConcurrentSearchTaskTracking() {
Map<Long, List<ThreadResourceInfo>> threadStats = getThreadStats(SearchAction.NAME + "[*]", taskInfo.getTaskId());
// Concurrent search forks each slice of 5 segments to different thread (see please
// https://github.com/apache/lucene/issues/12498)
assertEquals((int) Math.ceil(getSegmentCount(INDEX_NAME) / 5.0) + 1, threadStats.size());
assertEquals((int) Math.ceil(getSegmentCount(INDEX_NAME) / 5.0), threadStats.size());

// assert that all task descriptions have non-zero length
MatcherAssert.assertThat(taskInfo.getDescription().length(), greaterThan(0));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

import static org.opensearch.indices.recovery.RecoverySettings.INDICES_RECOVERY_CHUNK_SIZE_SETTING;
Expand Down Expand Up @@ -184,7 +185,7 @@ public void testCancelRecoveryAndResume() throws Exception {
)
.get();

latch.await();
latch.await(5, TimeUnit.MINUTES);

// at this point we got some truncated left overs on the replica on the unlucky node
// now we are allowing the recovery to allocate again and finish to see if we wipe the truncated files
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ public LegacyBM25Similarity(float k1, float b) {
* within the range {@code [0..1]}
*/
public LegacyBM25Similarity(float k1, float b, boolean discountOverlaps) {
super(discountOverlaps);
this.bm25Similarity = new BM25Similarity(k1, b, discountOverlaps);
}

Expand Down Expand Up @@ -101,15 +102,6 @@ public final float getB() {
return bm25Similarity.getB();
}

/**
* Returns true if overlap tokens are discounted from the document's length.
*
* @see #LegacyBM25Similarity(float, float, boolean)
*/
public boolean getDiscountOverlaps() {
return bm25Similarity.getDiscountOverlaps();
}

@Override
public String toString() {
return bm25Similarity.toString();
Expand Down
26 changes: 17 additions & 9 deletions server/src/main/java/org/apache/lucene/util/CombinedBitSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,7 @@ public int prevSetBit(int index) {

@Override
public int nextSetBit(int index) {
assert index >= 0 && index < length : "index=" + index + " numBits=" + length();
int next = first.nextSetBit(index);
while (next != DocIdSetIterator.NO_MORE_DOCS && second.get(next) == false) {
if (next == length() - 1) {
return DocIdSetIterator.NO_MORE_DOCS;
}
next = first.nextSetBit(next + 1);
}
return next;
return nextSetBit(index, length() - 1);
}

@Override
Expand Down Expand Up @@ -132,4 +124,20 @@ public void clear(int startIndex, int endIndex) {
public boolean getAndSet(int i) {
throw new UnsupportedOperationException("not implemented");
}

@Override
public int nextSetBit(int start, int end) {
assert start >= 0 && start < length() : "start=" + start + " numBits=" + length();
assert end >= start && end < length() : "end=" + end + " numBits=" + length();

int next = first.nextSetBit(start);
while (next != DocIdSetIterator.NO_MORE_DOCS && second.get(next) == false) {
if (next >= end) {
return DocIdSetIterator.NO_MORE_DOCS;
}
next = first.nextSetBit(next + 1);
}
return next;

}
}
4 changes: 2 additions & 2 deletions server/src/main/java/org/opensearch/common/lucene/Lucene.java
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@
* @opensearch.internal
*/
public class Lucene {
public static final String LATEST_CODEC = "Lucene99";
public static final String LATEST_CODEC = "Lucene912";

public static final String SOFT_DELETES_FIELD = "__soft_deletes";

Expand Down Expand Up @@ -272,7 +272,7 @@ public static void checkSegmentInfoIntegrity(final Directory directory) throws I

@Override
protected Object doBody(String segmentFileName) throws IOException {
try (IndexInput input = directory.openInput(segmentFileName, IOContext.READ)) {
try (IndexInput input = directory.openInput(segmentFileName, IOContext.READONCE)) {
CodecUtil.checksumEntireFile(input);
}
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@

import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec.Mode;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.apache.lucene.codecs.lucene912.Lucene912Codec.Mode;
import org.opensearch.common.Nullable;
import org.opensearch.common.collect.MapBuilder;
import org.opensearch.index.IndexSettings;
Expand Down Expand Up @@ -70,10 +70,10 @@ public CodecService(@Nullable MapperService mapperService, IndexSettings indexSe
final MapBuilder<String, Codec> codecs = MapBuilder.<String, Codec>newMapBuilder();
assert null != indexSettings;
if (mapperService == null) {
codecs.put(DEFAULT_CODEC, new Lucene99Codec());
codecs.put(LZ4, new Lucene99Codec());
codecs.put(BEST_COMPRESSION_CODEC, new Lucene99Codec(Mode.BEST_COMPRESSION));
codecs.put(ZLIB, new Lucene99Codec(Mode.BEST_COMPRESSION));
codecs.put(DEFAULT_CODEC, new Lucene912Codec());
codecs.put(LZ4, new Lucene912Codec());
codecs.put(BEST_COMPRESSION_CODEC, new Lucene912Codec(Mode.BEST_COMPRESSION));
codecs.put(ZLIB, new Lucene912Codec(Mode.BEST_COMPRESSION));
} else {
// CompositeCodec still delegates to PerFieldMappingPostingFormatCodec
// We can still support all the compression codecs when composite index is present
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.opensearch.common.lucene.Lucene;
import org.opensearch.index.codec.fuzzy.FuzzyFilterPostingsFormat;
import org.opensearch.index.codec.fuzzy.FuzzySetFactory;
Expand All @@ -59,7 +59,7 @@
*
* @opensearch.internal
*/
public class PerFieldMappingPostingFormatCodec extends Lucene99Codec {
public class PerFieldMappingPostingFormatCodec extends Lucene912Codec {
private final Logger logger;
private final MapperService mapperService;
private final DocValuesFormat dvFormat = new Lucene90DocValuesFormat();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@

import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.index.codec.composite.composite912.Composite912Codec;
import org.opensearch.index.codec.composite.composite99.Composite99Codec;
import org.opensearch.index.mapper.MapperService;

Expand All @@ -38,10 +39,10 @@ public CompositeCodecFactory() {}

public Map<String, Codec> getCompositeIndexCodecs(MapperService mapperService, Logger logger) {
Map<String, Codec> codecs = new HashMap<>();
codecs.put(DEFAULT_CODEC, new Composite99Codec(Lucene99Codec.Mode.BEST_SPEED, mapperService, logger));
codecs.put(LZ4, new Composite99Codec(Lucene99Codec.Mode.BEST_SPEED, mapperService, logger));
codecs.put(BEST_COMPRESSION_CODEC, new Composite99Codec(Lucene99Codec.Mode.BEST_COMPRESSION, mapperService, logger));
codecs.put(ZLIB, new Composite99Codec(Lucene99Codec.Mode.BEST_COMPRESSION, mapperService, logger));
codecs.put(DEFAULT_CODEC, new Composite912Codec(Lucene912Codec.Mode.BEST_SPEED, mapperService, logger));
codecs.put(LZ4, new Composite912Codec(Lucene912Codec.Mode.BEST_SPEED, mapperService, logger));
codecs.put(BEST_COMPRESSION_CODEC, new Composite912Codec(Lucene912Codec.Mode.BEST_COMPRESSION, mapperService, logger));
codecs.put(ZLIB, new Composite912Codec(Lucene912Codec.Mode.BEST_COMPRESSION, mapperService, logger));
return codecs;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec.composite.composite912;

import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.index.codec.PerFieldMappingPostingFormatCodec;
import org.opensearch.index.codec.composite.composite99.Composite99DocValuesFormat;
import org.opensearch.index.mapper.MapperService;

/**
* Extends the Codec to support new file formats for composite indices eg: star tree index
* based on the mappings.
*
* @opensearch.experimental
*/
@ExperimentalApi
public class Composite912Codec extends FilterCodec {
public static final String COMPOSITE_INDEX_CODEC_NAME = "Composite912Codec";
private final MapperService mapperService;

// needed for SPI - this is used in reader path
public Composite912Codec() {
this(COMPOSITE_INDEX_CODEC_NAME, new Lucene912Codec(), null);
}

public Composite912Codec(Lucene912Codec.Mode compressionMode, MapperService mapperService, Logger logger) {
this(COMPOSITE_INDEX_CODEC_NAME, new PerFieldMappingPostingFormatCodec(compressionMode, mapperService, logger), mapperService);
}

/**
* Sole constructor. When subclassing this codec, create a no-arg ctor and pass the delegate codec and a unique name to
* this ctor.
*
* @param name name of the codec
* @param delegate codec delegate
* @param mapperService mapper service instance
*/
protected Composite912Codec(String name, Codec delegate, MapperService mapperService) {
super(name, delegate);
this.mapperService = mapperService;
}

@Override
public DocValuesFormat docValuesFormat() {
return new Composite99DocValuesFormat(mapperService);
}
}
Loading

0 comments on commit 6654b52

Please sign in to comment.