@@ -62,11 +62,14 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
62
62
final int maxDoc ;
63
63
private byte [] termsDictBuffer ;
64
64
private final int skipIndexIntervalSize ;
65
+ private final int minDocsPerOrdinalForOrdinalRangeEncoding ;
65
66
final boolean enableOptimizedMerge ;
67
+ private int primarySortField = -1 ;
66
68
67
69
ES819TSDBDocValuesConsumer (
68
70
SegmentWriteState state ,
69
71
int skipIndexIntervalSize ,
72
+ int minDocsPerOrdinalForOrdinalRangeEncoding ,
70
73
boolean enableOptimizedMerge ,
71
74
String dataCodec ,
72
75
String dataExtension ,
@@ -75,6 +78,7 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
75
78
) throws IOException {
76
79
this .termsDictBuffer = new byte [1 << 14 ];
77
80
this .dir = state .directory ;
81
+ this .minDocsPerOrdinalForOrdinalRangeEncoding = minDocsPerOrdinalForOrdinalRangeEncoding ;
78
82
this .context = state .context ;
79
83
boolean success = false ;
80
84
try {
@@ -99,6 +103,13 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
99
103
maxDoc = state .segmentInfo .maxDoc ();
100
104
this .skipIndexIntervalSize = skipIndexIntervalSize ;
101
105
this .enableOptimizedMerge = enableOptimizedMerge ;
106
+ final var indexSort = state .segmentInfo .getIndexSort ();
107
+ if (indexSort != null && indexSort .getSort ().length > 0 && indexSort .getSort ()[0 ].getReverse () == false ) {
108
+ var sortField = state .fieldInfos .fieldInfo (indexSort .getSort ()[0 ].getField ());
109
+ if (sortField != null ) {
110
+ primarySortField = sortField .number ;
111
+ }
112
+ }
102
113
success = true ;
103
114
} finally {
104
115
if (success == false ) {
@@ -124,6 +135,10 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti
124
135
writeField (field , producer , -1 , null );
125
136
}
126
137
138
+ private boolean shouldEncodeOrdinalRange (FieldInfo field , long maxOrd , int numDocsWithValue ) {
139
+ return maxDoc > 1 && field .number == primarySortField && (numDocsWithValue / maxOrd ) >= minDocsPerOrdinalForOrdinalRangeEncoding ;
140
+ }
141
+
127
142
private long [] writeField (FieldInfo field , TsdbDocValuesProducer valuesProducer , long maxOrd , OffsetsAccumulator offsetsAccumulator )
128
143
throws IOException {
129
144
int numDocsWithValue = 0 ;
@@ -149,19 +164,52 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer,
149
164
try {
150
165
if (numValues > 0 ) {
151
166
assert numDocsWithValue > 0 ;
152
- // Special case for maxOrd of 1, signal -1 that no blocks will be written
153
- meta .writeInt (maxOrd != 1 ? ES819TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT : -1 );
154
167
final ByteBuffersDataOutput indexOut = new ByteBuffersDataOutput ();
155
- final DirectMonotonicWriter indexWriter = DirectMonotonicWriter .getInstance (
156
- meta ,
157
- new ByteBuffersIndexOutput (indexOut , "temp-dv-index" , "temp-dv-index" ),
158
- 1L + ((numValues - 1 ) >>> ES819TSDBDocValuesFormat .NUMERIC_BLOCK_SHIFT ),
159
- ES819TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT
160
- );
168
+ DirectMonotonicWriter indexWriter = null ;
161
169
162
170
final long valuesDataOffset = data .getFilePointer ();
163
- // Special case for maxOrd of 1, skip writing the blocks
164
- if (maxOrd != 1 ) {
171
+ if (maxOrd == 1 ) {
172
+ // Special case for maxOrd of 1, signal -1 that no blocks will be written
173
+ meta .writeInt (-1 );
174
+ } else if (shouldEncodeOrdinalRange (field , maxOrd , numDocsWithValue )) {
175
+ // When a field is sorted, use ordinal range encode for long runs of the same ordinal.
176
+ meta .writeInt (-2 );
177
+ meta .writeVInt (Math .toIntExact (maxOrd ));
178
+ values = valuesProducer .getSortedNumeric (field );
179
+ if (enableOptimizedMerge && numDocsWithValue < maxDoc ) {
180
+ disiAccumulator = new DISIAccumulator (dir , context , data , IndexedDISI .DEFAULT_DENSE_RANK_POWER );
181
+ }
182
+ DirectMonotonicWriter startDocs = DirectMonotonicWriter .getInstance (
183
+ meta ,
184
+ data ,
185
+ maxOrd + 1 ,
186
+ ES819TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT
187
+ );
188
+ long lastOrd = 0 ;
189
+ startDocs .add (0 );
190
+ for (int doc = values .nextDoc (); doc != DocIdSetIterator .NO_MORE_DOCS ; doc = values .nextDoc ()) {
191
+ if (disiAccumulator != null ) {
192
+ disiAccumulator .addDocId (doc );
193
+ }
194
+ if (offsetsAccumulator != null ) {
195
+ offsetsAccumulator .addDoc (1 );
196
+ }
197
+ final long nextOrd = values .nextValue ();
198
+ if (nextOrd != lastOrd ) {
199
+ lastOrd = nextOrd ;
200
+ startDocs .add (doc );
201
+ }
202
+ }
203
+ startDocs .add (maxDoc );
204
+ startDocs .finish ();
205
+ } else {
206
+ indexWriter = DirectMonotonicWriter .getInstance (
207
+ meta ,
208
+ new ByteBuffersIndexOutput (indexOut , "temp-dv-index" , "temp-dv-index" ),
209
+ 1L + ((numValues - 1 ) >>> ES819TSDBDocValuesFormat .NUMERIC_BLOCK_SHIFT ),
210
+ ES819TSDBDocValuesFormat .DIRECT_MONOTONIC_BLOCK_SHIFT
211
+ );
212
+ meta .writeInt (DIRECT_MONOTONIC_BLOCK_SHIFT );
165
213
final long [] buffer = new long [ES819TSDBDocValuesFormat .NUMERIC_BLOCK_SIZE ];
166
214
int bufferSize = 0 ;
167
215
final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder (ES819TSDBDocValuesFormat .NUMERIC_BLOCK_SIZE );
@@ -204,8 +252,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer,
204
252
}
205
253
206
254
final long valuesDataLength = data .getFilePointer () - valuesDataOffset ;
207
- if (maxOrd != 1 ) {
208
- // Special case for maxOrd of 1, indexWriter isn't really used, so no need to invoke finish() method.
255
+ if (indexWriter != null ) {
209
256
indexWriter .finish ();
210
257
}
211
258
final long indexDataOffset = data .getFilePointer ();
0 commit comments