Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion src/hybrid_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -443,11 +443,28 @@ void HybridIterator_Free(struct indexIterator *self) {
rm_free(it);
}

static IndexIterator* HybridIteratorReducer(HybridIteratorParams *hParams) {
IndexIterator* ret = NULL;
if (hParams->childIt && hParams->childIt->type == EMPTY_ITERATOR) {
ret = hParams->childIt;
} else if (hParams->childIt && hParams->childIt->type == WILDCARD_ITERATOR) {
//TODO: When new Iterator API (consider READER_ITERATOR with isWildcard flag)
hParams->childIt->Free(hParams->childIt);
hParams->qParams.searchMode = VECSIM_STANDARD_KNN;
hParams->childIt = NULL;
}
Comment on lines +450 to +455

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[BestPractice]

This check for WILDCARD_ITERATOR does not handle optimized wildcard iterators, which are of type READ_ITERATOR with a special flag. This can lead to missed optimizations for hybrid queries.

You've introduced IsWildcardIterator() to solve this, and it's used in the other new reducer functions. This reducer should also use IsWildcardIterator(hParams->childIt) to correctly handle all types of wildcard iterators. You'll also need to include iterators/wildcard_iterator.h at the top of the file.

Suggested change
} else if (hParams->childIt && hParams->childIt->type == WILDCARD_ITERATOR) {
//TODO: When new Iterator API (consider READER_ITERATOR with isWildcard flag)
hParams->childIt->Free(hParams->childIt);
hParams->qParams.searchMode = VECSIM_STANDARD_KNN;
hParams->childIt = NULL;
}
} else if (hParams->childIt && IsWildcardIterator(hParams->childIt)) {
//TODO: When new Iterator API (consider READER_ITERATOR with isWildcard flag)
hParams->childIt->Free(hParams->childIt);
hParams->qParams.searchMode = VECSIM_STANDARD_KNN;
hParams->childIt = NULL;
}

Committable suggestion

Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation.

Copy Context

return ret;
}

IndexIterator *NewHybridVectorIterator(HybridIteratorParams hParams, QueryError *status) {
// If searchMode is out of the expected range.
if (hParams.qParams.searchMode < 0 || hParams.qParams.searchMode >= VECSIM_LAST_SEARCHMODE) {
QueryError_SetError(status, QUERY_EGENERIC, "Creating new hybrid vector iterator has failed");
}
IndexIterator* ri = HybridIteratorReducer(&hParams);
if (ri) {
return ri;
}

HybridIterator *hi = rm_new(HybridIterator);
hi->lastDocId = 0;
Expand Down Expand Up @@ -503,7 +520,7 @@ IndexIterator *NewHybridVectorIterator(HybridIteratorParams hParams, QueryError
hi->returnedResults = array_new(RSIndexResult *, hParams.query.k);
}

IndexIterator *ri = &hi->base;
ri = &hi->base;
ri->ctx = hi;
// This will be changed later to a valid RLookupKey if there is no syntax error in the query,
// by the creation of the metrics loader results processor.
Expand Down
112 changes: 84 additions & 28 deletions src/iterators/intersection_iterator.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "empty_iterator.h"
#include "union_iterator.h"
#include "index_result.h"
#include "wildcard_iterator.h"

/**************************** Read + SkipTo Helpers ****************************/

Expand Down Expand Up @@ -230,66 +231,121 @@ static int cmpIter(QueryIterator **it1, QueryIterator **it2) {
return (int)(est1 - est2);
}

// Set estimation for number of results. Returns false if the query is empty (some of the iterators are NULL)
static bool II_SetEstimation(IntersectionIterator *it) {
// Set estimation for number of results.
static void II_SetEstimation(IntersectionIterator *it) {
// Set the expected number of results to the minimum of all iterators.
// If any of the iterators is NULL, we set the expected number to 0
RS_ASSERT(it->num_its); // Ensure there is at least one iterator, so we can set num_expected to SIZE_MAX temporarily
it->num_expected = SIZE_MAX;
for (uint32_t i = 0; i < it->num_its; ++i) {
QueryIterator *cur = it->its[i];
if (!cur) {
// If the current iterator is empty, then the entire query will fail
it->num_expected = 0;
return false;
}
size_t amount = cur->NumEstimated(cur);
if (amount < it->num_expected) {
it->num_expected = amount;
}
}
return true;
}

/**
* Reduce the intersection iterator by applying these rules:
* 1. If any of the iterators is an empty iterator, return the empty iterator and update the number of children
* 2. Remove all wildcard iterators since they would not contribute to the intersection (Return one of them if all are wildcards)
* 3. If there is only one left child iterator, return it
* 4. Otherwise, return NULL and let the caller create the intersection iterator
*/
static QueryIterator *IntersectionIteratorReducer(QueryIterator **its, size_t *num) {
QueryIterator *ret = NULL;

// Remove all wildcard iterators from the array
size_t current_size = *num;
size_t write_idx = 0;
bool all_wildcards = true;
for (size_t read_idx = 0; read_idx < current_size; read_idx++) {
if (IsWildcardIterator(its[read_idx])) {
if (!all_wildcards || all_wildcards && read_idx != current_size - 1) {
// remove all the wildcards in case there are other non-wildcard iterators
// avoid removing it in case it's the last one and all are wildcards
its[read_idx]->Free(its[read_idx]);
}
} else {
all_wildcards = false;
its[write_idx++] = its[read_idx];
}
}
*num = write_idx;

// Check for empty iterators
for (size_t ii = 0; ii < write_idx; ++ii) {
if (!its[ii] || its[ii]->type == EMPTY_ITERATOR) {
ret = its[ii] ? its[ii] : IT_V2(NewEmptyIterator)();
its[ii] = NULL; // Mark as taken
break;
}
}

if (ret) {
// Free all non-NULL iterators
for (size_t ii = 0; ii < write_idx; ++ii) {
if (its[ii]) {
its[ii]->Free(its[ii]);
}
}
} else {
// Handle edge cases after wildcard removal
if (write_idx == 0) {
// All iterators were wildcards, return the last one which was not Freed
ret = its[current_size - 1];
} else if (write_idx == 1) {
// Only one iterator left, return it directly
ret = its[0];
}
}

if (ret != NULL) {
rm_free(its);
}

return ret;
}

QueryIterator *NewIntersectionIterator(QueryIterator **its, size_t num, int max_slop, bool in_order, double weight) {
RS_ASSERT(its && num > 0);
QueryIterator *ret = IntersectionIteratorReducer(its, &num);
if (ret != NULL) {
return ret;
}
IntersectionIterator *it = rm_calloc(1, sizeof(*it));
it->its = its;
it->num_its = num;
// If max_slop is negative, we set it to INT_MAX in case `in_order` is true
it->max_slop = max_slop < 0 ? INT_MAX : max_slop;
it->in_order = in_order;

bool allValid = II_SetEstimation(it);
II_SetEstimation(it);

// Sort children iterators from low count to high count which reduces the number of iterations.
if (!in_order && allValid) {
if (!in_order) {
qsort(its, num, sizeof(*its), (CompareFunc)cmpIter);
}

// bind the iterator calls
QueryIterator *base = &it->base;
base->type = INTERSECT_ITERATOR;
base->atEOF = false;
base->lastDocId = 0;
base->current = NewIntersectResult(num, weight);
base->NumEstimated = II_NumEstimated;
ret = &it->base;
ret->type = INTERSECT_ITERATOR;
ret->atEOF = false;
ret->lastDocId = 0;
ret->current = NewIntersectResult(num, weight);
ret->NumEstimated = II_NumEstimated;
if (max_slop < 0 && !in_order) {
// No slop and no order means every result is relevant, so we can use the fast path
base->Read = II_Read;
base->SkipTo = II_SkipTo;
ret->Read = II_Read;
ret->SkipTo = II_SkipTo;
} else {
// Otherwise, we need to check relevancy
base->Read = II_Read_CheckRelevancy;
base->SkipTo = II_SkipTo_CheckRelevancy;
ret->Read = II_Read_CheckRelevancy;
ret->SkipTo = II_SkipTo_CheckRelevancy;
}
base->Free = II_Free;
base->Rewind = II_Rewind;
ret->Free = II_Free;
ret->Rewind = II_Rewind;

if (!allValid) {
// Some of the iterators are NULL, so the intersection will always be empty.
base->Free(base);
base = IT_V2(NewEmptyIterator)();
}
return base;
return ret;
}
5 changes: 3 additions & 2 deletions src/iterators/inverted_index_iterator.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ static QueryIterator *NewInvIndIterator(InvertedIndex *idx, RSIndexResult *res,
it->skipMulti = skipMulti;
it->sctx = sctx;
it->filterCtx = *filterCtx;
it->isWildcard = false;
SetCurrentBlockReader(it);

it->base.current = res;
Expand Down Expand Up @@ -361,13 +362,13 @@ QueryIterator *NewInvIndIterator_TermQuery(InvertedIndex *idx, const RedisSearch
}

QueryIterator *NewInvIndIterator_GenericQuery(InvertedIndex *idx, const RedisSearchCtx *sctx, t_fieldIndex fieldIndex,
enum FieldExpirationPredicate predicate) {
enum FieldExpirationPredicate predicate, double weight) {
FieldFilterContext fieldCtx = {
.field = {.isFieldMask = false, .value = {.index = fieldIndex}},
.predicate = predicate,
};
IndexDecoderCtx decoderCtx = {.wideMask = RS_FIELDMASK_ALL}; // Also covers the case of a non-wide schema
RSIndexResult *record = NewVirtualResult(1, RS_FIELDMASK_ALL);
RSIndexResult *record = NewVirtualResult(weight, RS_FIELDMASK_ALL);
record->freq = (predicate == FIELD_EXPIRATION_MISSING) ? 0 : 1; // TODO: is this required?
return NewInvIndIterator(idx, record, &fieldCtx, true, sctx, &decoderCtx);
}
5 changes: 4 additions & 1 deletion src/iterators/inverted_index_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ typedef struct InvIndIterator {
// Whether to skip multi values from the same doc
bool skipMulti;

// Whether this iterator is result of a wildcard query
bool isWildcard;

union {
struct {
double rangeMin;
Expand Down Expand Up @@ -70,7 +73,7 @@ QueryIterator *NewInvIndIterator_TermQuery(InvertedIndex *idx, const RedisSearch
// The returned iterator will yield "virtual" records. For term/numeric indexes, it is best to use
// the specific functions NewInvIndIterator_TermQuery/NewInvIndIterator_NumericQuery
QueryIterator *NewInvIndIterator_GenericQuery(InvertedIndex *idx, const RedisSearchCtx *sctx, t_fieldIndex fieldIndex,
enum FieldExpirationPredicate predicate);
enum FieldExpirationPredicate predicate, double weight);

#ifdef __cplusplus
}
Expand Down
34 changes: 30 additions & 4 deletions src/iterators/not_iterator.c
Original file line number Diff line number Diff line change
Expand Up @@ -226,14 +226,40 @@ static IteratorStatus NI_SkipTo_Optimized(QueryIterator *base, t_docId docId) {
return rc;
}

/*
* Reduce the not iterator by applying these rules:
* 1. If the child is an empty iterator or NULL, return a wildcard iterator
* 2. If the child is a wildcard iterator, return an empty iterator
* 3. Otherwise, return NULL and let the caller create the not iterator
*/
static QueryIterator* NotIteratorReducer(QueryIterator *it, t_docId maxDocId, double weight, struct timespec timeout, QueryEvalCtx *q) {
RS_ASSERT(q);
QueryIterator *ret = NULL;
if (!it || it->type == EMPTY_ITERATOR) {
ret = IT_V2(NewWildcardIterator)(q, weight);
} else if (IsWildcardIterator(it)) {
ret = IT_V2(NewEmptyIterator)();
}
if (ret != NULL) {
if (it) {
it->Free(it);
}
}
return ret;
}

QueryIterator *IT_V2(NewNotIterator)(QueryIterator *it, t_docId maxDocId, double weight, struct timespec timeout, QueryEvalCtx *q) {
QueryIterator *ret = NotIteratorReducer(it, maxDocId, weight, timeout, q);
if (ret != NULL) {
return ret;
}
NotIterator *ni = rm_calloc(1, sizeof(*ni));
QueryIterator *ret = &ni->base;
bool optimized = q && q->sctx->spec->rule && q->sctx->spec->rule->index_all;
ret = &ni->base;
bool optimized = q && q->sctx && q->sctx->spec && q->sctx->spec->rule && q->sctx->spec->rule->index_all;
if (optimized) {
ni->wcii = IT_V2(NewWildcardIterator_Optimized)(q->sctx);
ni->wcii = IT_V2(NewWildcardIterator_Optimized)(q->sctx, weight);
}
ni->child = it ? it : IT_V2(NewEmptyIterator)();
ni->child = it;
ni->maxDocId = maxDocId; // Valid for the optimized case as well, since this is the maxDocId of the embedded wildcard iterator
ni->timeoutCtx = (TimeoutCtx){ .timeout = timeout, .counter = 0 };

Expand Down
31 changes: 28 additions & 3 deletions src/iterators/optional_iterator.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include "optional_iterator.h"
#include "wildcard_iterator.h"
#include "inverted_index_iterator.h"

static void OI_Free(QueryIterator *base) {
OptionalIterator *oi = (OptionalIterator *)base;
Expand Down Expand Up @@ -173,22 +174,46 @@ static IteratorStatus OI_Read_NotOptimized(QueryIterator *base) {
return ITERATOR_OK;
}

/**
* Reduce the optional iterator by applying these rules:
* 1. If the child is an empty iterator or NULL, return a wildcard iterator
* 2. If the child is a wildcard iterator, return it
* 3. Otherwise, return NULL and let the caller create the optional iterator
*/
static QueryIterator* OptionalIteratorReducer(QueryIterator *it, QueryEvalCtx *q, double weight) {
QueryIterator *ret = NULL;
if (!it || it->type == EMPTY_ITERATOR) {
// If the child is NULL, we return a wildcard iterator. All will be virtual hits
ret = IT_V2(NewWildcardIterator)(q, weight);
if (it) {
it->Free(it);
}
} else if (IsWildcardIterator(it)) {
// All will be real hits
ret = it;
}
return ret;
}

// Create a new OPTIONAL iterator - Non-Optimized version.
QueryIterator *IT_V2(NewOptionalIterator)(QueryIterator *it, QueryEvalCtx *q, double weight) {
RS_ASSERT(it != NULL);
RS_ASSERT(q && q->sctx && q->sctx->spec && q->docTable);
QueryIterator *ret = OptionalIteratorReducer(it, q, weight);
if (ret != NULL) {
return ret;
}
OptionalIterator *oi = rm_calloc(1, sizeof(*oi));
bool optimized = q->sctx->spec->rule && q->sctx->spec->rule->index_all;
if (optimized) {
oi->wcii = IT_V2(NewWildcardIterator_Optimized)(q->sctx);
oi->wcii = IT_V2(NewWildcardIterator_Optimized)(q->sctx, weight);
}
oi->child = it;
oi->virt = NewVirtualResult(weight, RS_FIELDMASK_ALL);
oi->maxDocId = q->docTable->maxDocId;
oi->virt->freq = 1;
oi->weight = weight;

QueryIterator *ret = &oi->base;
ret = &oi->base;
ret->type = OPTIONAL_ITERATOR;
ret->atEOF = false;
ret->lastDocId = 0;
Expand Down
Loading