Skip to content

Commit 3ff7761

Browse files
committed
Add ordinal range encode for tsid
1 parent 62c84a4 commit 3ff7761

File tree

6 files changed

+267
-158
lines changed

6 files changed

+267
-158
lines changed

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java

Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,14 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
6262
final int maxDoc;
6363
private byte[] termsDictBuffer;
6464
private final int skipIndexIntervalSize;
65+
private final int minDocsPerOrdinalForOrdinalRangeEncoding;
6566
final boolean enableOptimizedMerge;
67+
private int primarySortField = -1;
6668

6769
ES819TSDBDocValuesConsumer(
6870
SegmentWriteState state,
6971
int skipIndexIntervalSize,
72+
int minDocsPerOrdinalForOrdinalRangeEncoding,
7073
boolean enableOptimizedMerge,
7174
String dataCodec,
7275
String dataExtension,
@@ -75,6 +78,7 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
7578
) throws IOException {
7679
this.termsDictBuffer = new byte[1 << 14];
7780
this.dir = state.directory;
81+
this.minDocsPerOrdinalForOrdinalRangeEncoding = minDocsPerOrdinalForOrdinalRangeEncoding;
7882
this.context = state.context;
7983
boolean success = false;
8084
try {
@@ -99,6 +103,13 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
99103
maxDoc = state.segmentInfo.maxDoc();
100104
this.skipIndexIntervalSize = skipIndexIntervalSize;
101105
this.enableOptimizedMerge = enableOptimizedMerge;
106+
final var indexSort = state.segmentInfo.getIndexSort();
107+
if (indexSort != null && indexSort.getSort().length > 0 && indexSort.getSort()[0].getReverse() == false) {
108+
var sortField = state.fieldInfos.fieldInfo(indexSort.getSort()[0].getField());
109+
if (sortField != null) {
110+
primarySortField = sortField.number;
111+
}
112+
}
102113
success = true;
103114
} finally {
104115
if (success == false) {
@@ -124,6 +135,10 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti
124135
writeField(field, producer, -1, null);
125136
}
126137

138+
private boolean shouldEncodeOrdinalRange(FieldInfo field, long maxOrd, int numDocsWithValue) {
139+
return maxDoc > 1 && field.number == primarySortField && (numDocsWithValue / maxOrd) >= minDocsPerOrdinalForOrdinalRangeEncoding;
140+
}
141+
127142
private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, long maxOrd, OffsetsAccumulator offsetsAccumulator)
128143
throws IOException {
129144
int numDocsWithValue = 0;
@@ -149,19 +164,52 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer,
149164
try {
150165
if (numValues > 0) {
151166
assert numDocsWithValue > 0;
152-
// Special case for maxOrd of 1, signal -1 that no blocks will be written
153-
meta.writeInt(maxOrd != 1 ? ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT : -1);
154167
final ByteBuffersDataOutput indexOut = new ByteBuffersDataOutput();
155-
final DirectMonotonicWriter indexWriter = DirectMonotonicWriter.getInstance(
156-
meta,
157-
new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"),
158-
1L + ((numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT),
159-
ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
160-
);
168+
DirectMonotonicWriter indexWriter = null;
161169

162170
final long valuesDataOffset = data.getFilePointer();
163-
// Special case for maxOrd of 1, skip writing the blocks
164-
if (maxOrd != 1) {
171+
if (maxOrd == 1) {
172+
// Special case for maxOrd of 1, signal -1 that no blocks will be written
173+
meta.writeInt(-1);
174+
} else if (shouldEncodeOrdinalRange(field, maxOrd, numDocsWithValue)) {
175+
// When a field is sorted, use ordinal range encode for long runs of the same ordinal.
176+
meta.writeInt(-2);
177+
meta.writeVInt(Math.toIntExact(maxOrd));
178+
values = valuesProducer.getSortedNumeric(field);
179+
if (enableOptimizedMerge && numDocsWithValue < maxDoc) {
180+
disiAccumulator = new DISIAccumulator(dir, context, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
181+
}
182+
DirectMonotonicWriter startDocs = DirectMonotonicWriter.getInstance(
183+
meta,
184+
data,
185+
maxOrd + 1,
186+
ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
187+
);
188+
long lastOrd = 0;
189+
startDocs.add(0);
190+
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
191+
if (disiAccumulator != null) {
192+
disiAccumulator.addDocId(doc);
193+
}
194+
if (offsetsAccumulator != null) {
195+
offsetsAccumulator.addDoc(1);
196+
}
197+
final long nextOrd = values.nextValue();
198+
if (nextOrd != lastOrd) {
199+
lastOrd = nextOrd;
200+
startDocs.add(doc);
201+
}
202+
}
203+
startDocs.add(maxDoc);
204+
startDocs.finish();
205+
} else {
206+
indexWriter = DirectMonotonicWriter.getInstance(
207+
meta,
208+
new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"),
209+
1L + ((numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT),
210+
ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
211+
);
212+
meta.writeInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
165213
final long[] buffer = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
166214
int bufferSize = 0;
167215
final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
@@ -204,8 +252,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer,
204252
}
205253

206254
final long valuesDataLength = data.getFilePointer() - valuesDataOffset;
207-
if (maxOrd != 1) {
208-
// Special case for maxOrd of 1, indexWriter isn't really used, so no need to invoke finish() method.
255+
if (indexWriter != null) {
209256
indexWriter.finish();
210257
}
211258
final long indexDataOffset = data.getFilePointer();

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,20 +105,22 @@ private static boolean getOptimizedMergeEnabledDefault() {
105105
}
106106

107107
final int skipIndexIntervalSize;
108+
final int minDocsPerOrdinalForOrdinalRangeEncoding;
108109
private final boolean enableOptimizedMerge;
109110

110111
/** Default constructor. */
111112
public ES819TSDBDocValuesFormat() {
112-
this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, OPTIMIZED_MERGE_ENABLE_DEFAULT);
113+
this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, NUMERIC_BLOCK_SIZE, OPTIMIZED_MERGE_ENABLE_DEFAULT);
113114
}
114115

115116
/** Doc values fields format with specified skipIndexIntervalSize. */
116-
public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, boolean enableOptimizedMerge) {
117+
public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForOrdinalRangeEncoding, boolean enableOptimizedMerge) {
117118
super(CODEC_NAME);
118119
if (skipIndexIntervalSize < 2) {
119120
throw new IllegalArgumentException("skipIndexIntervalSize must be > 1, got [" + skipIndexIntervalSize + "]");
120121
}
121122
this.skipIndexIntervalSize = skipIndexIntervalSize;
123+
this.minDocsPerOrdinalForOrdinalRangeEncoding = minDocsPerOrdinalForOrdinalRangeEncoding;
122124
this.enableOptimizedMerge = enableOptimizedMerge;
123125
}
124126

@@ -127,6 +129,7 @@ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOExcept
127129
return new ES819TSDBDocValuesConsumer(
128130
state,
129131
skipIndexIntervalSize,
132+
minDocsPerOrdinalForOrdinalRangeEncoding,
130133
enableOptimizedMerge,
131134
DATA_CODEC,
132135
DATA_EXTENSION,

0 commit comments

Comments
 (0)