diff --git a/src/hybrid_reader.c b/src/hybrid_reader.c index ca720240eb2..e858159549e 100644 --- a/src/hybrid_reader.c +++ b/src/hybrid_reader.c @@ -443,11 +443,28 @@ void HybridIterator_Free(struct indexIterator *self) { rm_free(it); } +static IndexIterator* HybridIteratorReducer(HybridIteratorParams *hParams) { + IndexIterator* ret = NULL; + if (hParams->childIt && hParams->childIt->type == EMPTY_ITERATOR) { + ret = hParams->childIt; + } else if (hParams->childIt && hParams->childIt->type == WILDCARD_ITERATOR) { + //TODO: When new Iterator API (consider READER_ITERATOR with isWildcard flag) + hParams->childIt->Free(hParams->childIt); + hParams->qParams.searchMode = VECSIM_STANDARD_KNN; + hParams->childIt = NULL; + } + return ret; +} + IndexIterator *NewHybridVectorIterator(HybridIteratorParams hParams, QueryError *status) { // If searchMode is out of the expected range. if (hParams.qParams.searchMode < 0 || hParams.qParams.searchMode >= VECSIM_LAST_SEARCHMODE) { QueryError_SetError(status, QUERY_EGENERIC, "Creating new hybrid vector iterator has failed"); } + IndexIterator* ri = HybridIteratorReducer(&hParams); + if (ri) { + return ri; + } HybridIterator *hi = rm_new(HybridIterator); hi->lastDocId = 0; @@ -503,7 +520,7 @@ IndexIterator *NewHybridVectorIterator(HybridIteratorParams hParams, QueryError hi->returnedResults = array_new(RSIndexResult *, hParams.query.k); } - IndexIterator *ri = &hi->base; + ri = &hi->base; ri->ctx = hi; // This will be changed later to a valid RLookupKey if there is no syntax error in the query, // by the creation of the metrics loader results processor. diff --git a/src/iterators/intersection_iterator.c b/src/iterators/intersection_iterator.c index c1ea0a6ba4c..289ab126204 100644 --- a/src/iterators/intersection_iterator.c +++ b/src/iterators/intersection_iterator.c @@ -11,6 +11,7 @@ #include "empty_iterator.h" #include "union_iterator.h" #include "index_result.h" +#include "wildcard_iterator.h" /**************************** Read + SkipTo Helpers ****************************/ @@ -230,29 +231,89 @@ static int cmpIter(QueryIterator **it1, QueryIterator **it2) { return (int)(est1 - est2); } -// Set estimation for number of results. Returns false if the query is empty (some of the iterators are NULL) -static bool II_SetEstimation(IntersectionIterator *it) { +// Set estimation for number of results. +static void II_SetEstimation(IntersectionIterator *it) { // Set the expected number of results to the minimum of all iterators. // If any of the iterators is NULL, we set the expected number to 0 RS_ASSERT(it->num_its); // Ensure there is at least one iterator, so we can set num_expected to SIZE_MAX temporarily it->num_expected = SIZE_MAX; for (uint32_t i = 0; i < it->num_its; ++i) { QueryIterator *cur = it->its[i]; - if (!cur) { - // If the current iterator is empty, then the entire query will fail - it->num_expected = 0; - return false; - } size_t amount = cur->NumEstimated(cur); if (amount < it->num_expected) { it->num_expected = amount; } } - return true; +} + +/** + * Reduce the intersection iterator by applying these rules: + * 1. If any of the iterators is an empty iterator, return the empty iterator and update the number of children + * 2. Remove all wildcard iterators since they would not contribute to the intersection (Return one of them if all are wildcards) + * 3. If there is only one left child iterator, return it + * 4. Otherwise, return NULL and let the caller create the intersection iterator +*/ +static QueryIterator *IntersectionIteratorReducer(QueryIterator **its, size_t *num) { + QueryIterator *ret = NULL; + + // Remove all wildcard iterators from the array + size_t current_size = *num; + size_t write_idx = 0; + bool all_wildcards = true; + for (size_t read_idx = 0; read_idx < current_size; read_idx++) { + if (IsWildcardIterator(its[read_idx])) { + if (!all_wildcards || all_wildcards && read_idx != current_size - 1) { + // remove all the wildcards in case there are other non-wildcard iterators + // avoid removing it in case it's the last one and all are wildcards + its[read_idx]->Free(its[read_idx]); + } + } else { + all_wildcards = false; + its[write_idx++] = its[read_idx]; + } + } + *num = write_idx; + + // Check for empty iterators + for (size_t ii = 0; ii < write_idx; ++ii) { + if (!its[ii] || its[ii]->type == EMPTY_ITERATOR) { + ret = its[ii] ? its[ii] : IT_V2(NewEmptyIterator)(); + its[ii] = NULL; // Mark as taken + break; + } + } + + if (ret) { + // Free all non-NULL iterators + for (size_t ii = 0; ii < write_idx; ++ii) { + if (its[ii]) { + its[ii]->Free(its[ii]); + } + } + } else { + // Handle edge cases after wildcard removal + if (write_idx == 0) { + // All iterators were wildcards, return the last one which was not Freed + ret = its[current_size - 1]; + } else if (write_idx == 1) { + // Only one iterator left, return it directly + ret = its[0]; + } + } + + if (ret != NULL) { + rm_free(its); + } + + return ret; } QueryIterator *NewIntersectionIterator(QueryIterator **its, size_t num, int max_slop, bool in_order, double weight) { RS_ASSERT(its && num > 0); + QueryIterator *ret = IntersectionIteratorReducer(its, &num); + if (ret != NULL) { + return ret; + } IntersectionIterator *it = rm_calloc(1, sizeof(*it)); it->its = its; it->num_its = num; @@ -260,36 +321,31 @@ QueryIterator *NewIntersectionIterator(QueryIterator **its, size_t num, int max_ it->max_slop = max_slop < 0 ? INT_MAX : max_slop; it->in_order = in_order; - bool allValid = II_SetEstimation(it); + II_SetEstimation(it); // Sort children iterators from low count to high count which reduces the number of iterations. - if (!in_order && allValid) { + if (!in_order) { qsort(its, num, sizeof(*its), (CompareFunc)cmpIter); } // bind the iterator calls - QueryIterator *base = &it->base; - base->type = INTERSECT_ITERATOR; - base->atEOF = false; - base->lastDocId = 0; - base->current = NewIntersectResult(num, weight); - base->NumEstimated = II_NumEstimated; + ret = &it->base; + ret->type = INTERSECT_ITERATOR; + ret->atEOF = false; + ret->lastDocId = 0; + ret->current = NewIntersectResult(num, weight); + ret->NumEstimated = II_NumEstimated; if (max_slop < 0 && !in_order) { // No slop and no order means every result is relevant, so we can use the fast path - base->Read = II_Read; - base->SkipTo = II_SkipTo; + ret->Read = II_Read; + ret->SkipTo = II_SkipTo; } else { // Otherwise, we need to check relevancy - base->Read = II_Read_CheckRelevancy; - base->SkipTo = II_SkipTo_CheckRelevancy; + ret->Read = II_Read_CheckRelevancy; + ret->SkipTo = II_SkipTo_CheckRelevancy; } - base->Free = II_Free; - base->Rewind = II_Rewind; + ret->Free = II_Free; + ret->Rewind = II_Rewind; - if (!allValid) { - // Some of the iterators are NULL, so the intersection will always be empty. - base->Free(base); - base = IT_V2(NewEmptyIterator)(); - } - return base; + return ret; } diff --git a/src/iterators/inverted_index_iterator.c b/src/iterators/inverted_index_iterator.c index ab7411c83ba..58d83c5d402 100644 --- a/src/iterators/inverted_index_iterator.c +++ b/src/iterators/inverted_index_iterator.c @@ -278,6 +278,7 @@ static QueryIterator *NewInvIndIterator(InvertedIndex *idx, RSIndexResult *res, it->skipMulti = skipMulti; it->sctx = sctx; it->filterCtx = *filterCtx; + it->isWildcard = false; SetCurrentBlockReader(it); it->base.current = res; @@ -361,13 +362,13 @@ QueryIterator *NewInvIndIterator_TermQuery(InvertedIndex *idx, const RedisSearch } QueryIterator *NewInvIndIterator_GenericQuery(InvertedIndex *idx, const RedisSearchCtx *sctx, t_fieldIndex fieldIndex, - enum FieldExpirationPredicate predicate) { + enum FieldExpirationPredicate predicate, double weight) { FieldFilterContext fieldCtx = { .field = {.isFieldMask = false, .value = {.index = fieldIndex}}, .predicate = predicate, }; IndexDecoderCtx decoderCtx = {.wideMask = RS_FIELDMASK_ALL}; // Also covers the case of a non-wide schema - RSIndexResult *record = NewVirtualResult(1, RS_FIELDMASK_ALL); + RSIndexResult *record = NewVirtualResult(weight, RS_FIELDMASK_ALL); record->freq = (predicate == FIELD_EXPIRATION_MISSING) ? 0 : 1; // TODO: is this required? return NewInvIndIterator(idx, record, &fieldCtx, true, sctx, &decoderCtx); } diff --git a/src/iterators/inverted_index_iterator.h b/src/iterators/inverted_index_iterator.h index 4555347c446..5249f15aed9 100644 --- a/src/iterators/inverted_index_iterator.h +++ b/src/iterators/inverted_index_iterator.h @@ -40,6 +40,9 @@ typedef struct InvIndIterator { // Whether to skip multi values from the same doc bool skipMulti; + // Whether this iterator is result of a wildcard query + bool isWildcard; + union { struct { double rangeMin; @@ -70,7 +73,7 @@ QueryIterator *NewInvIndIterator_TermQuery(InvertedIndex *idx, const RedisSearch // The returned iterator will yield "virtual" records. For term/numeric indexes, it is best to use // the specific functions NewInvIndIterator_TermQuery/NewInvIndIterator_NumericQuery QueryIterator *NewInvIndIterator_GenericQuery(InvertedIndex *idx, const RedisSearchCtx *sctx, t_fieldIndex fieldIndex, - enum FieldExpirationPredicate predicate); + enum FieldExpirationPredicate predicate, double weight); #ifdef __cplusplus } diff --git a/src/iterators/not_iterator.c b/src/iterators/not_iterator.c index 9ff19a393bd..a74018e3101 100644 --- a/src/iterators/not_iterator.c +++ b/src/iterators/not_iterator.c @@ -226,14 +226,40 @@ static IteratorStatus NI_SkipTo_Optimized(QueryIterator *base, t_docId docId) { return rc; } +/* + * Reduce the not iterator by applying these rules: + * 1. If the child is an empty iterator or NULL, return a wildcard iterator + * 2. If the child is a wildcard iterator, return an empty iterator + * 3. Otherwise, return NULL and let the caller create the not iterator + */ +static QueryIterator* NotIteratorReducer(QueryIterator *it, t_docId maxDocId, double weight, struct timespec timeout, QueryEvalCtx *q) { + RS_ASSERT(q); + QueryIterator *ret = NULL; + if (!it || it->type == EMPTY_ITERATOR) { + ret = IT_V2(NewWildcardIterator)(q, weight); + } else if (IsWildcardIterator(it)) { + ret = IT_V2(NewEmptyIterator)(); + } + if (ret != NULL) { + if (it) { + it->Free(it); + } + } + return ret; +} + QueryIterator *IT_V2(NewNotIterator)(QueryIterator *it, t_docId maxDocId, double weight, struct timespec timeout, QueryEvalCtx *q) { + QueryIterator *ret = NotIteratorReducer(it, maxDocId, weight, timeout, q); + if (ret != NULL) { + return ret; + } NotIterator *ni = rm_calloc(1, sizeof(*ni)); - QueryIterator *ret = &ni->base; - bool optimized = q && q->sctx->spec->rule && q->sctx->spec->rule->index_all; + ret = &ni->base; + bool optimized = q && q->sctx && q->sctx->spec && q->sctx->spec->rule && q->sctx->spec->rule->index_all; if (optimized) { - ni->wcii = IT_V2(NewWildcardIterator_Optimized)(q->sctx); + ni->wcii = IT_V2(NewWildcardIterator_Optimized)(q->sctx, weight); } - ni->child = it ? it : IT_V2(NewEmptyIterator)(); + ni->child = it; ni->maxDocId = maxDocId; // Valid for the optimized case as well, since this is the maxDocId of the embedded wildcard iterator ni->timeoutCtx = (TimeoutCtx){ .timeout = timeout, .counter = 0 }; diff --git a/src/iterators/optional_iterator.c b/src/iterators/optional_iterator.c index 3c8ee422ef9..38f6008f1f7 100644 --- a/src/iterators/optional_iterator.c +++ b/src/iterators/optional_iterator.c @@ -9,6 +9,7 @@ #include "optional_iterator.h" #include "wildcard_iterator.h" +#include "inverted_index_iterator.h" static void OI_Free(QueryIterator *base) { OptionalIterator *oi = (OptionalIterator *)base; @@ -173,14 +174,38 @@ static IteratorStatus OI_Read_NotOptimized(QueryIterator *base) { return ITERATOR_OK; } +/** + * Reduce the optional iterator by applying these rules: + * 1. If the child is an empty iterator or NULL, return a wildcard iterator + * 2. If the child is a wildcard iterator, return it + * 3. Otherwise, return NULL and let the caller create the optional iterator + */ +static QueryIterator* OptionalIteratorReducer(QueryIterator *it, QueryEvalCtx *q, double weight) { + QueryIterator *ret = NULL; + if (!it || it->type == EMPTY_ITERATOR) { + // If the child is NULL, we return a wildcard iterator. All will be virtual hits + ret = IT_V2(NewWildcardIterator)(q, weight); + if (it) { + it->Free(it); + } + } else if (IsWildcardIterator(it)) { + // All will be real hits + ret = it; + } + return ret; +} + // Create a new OPTIONAL iterator - Non-Optimized version. QueryIterator *IT_V2(NewOptionalIterator)(QueryIterator *it, QueryEvalCtx *q, double weight) { - RS_ASSERT(it != NULL); RS_ASSERT(q && q->sctx && q->sctx->spec && q->docTable); + QueryIterator *ret = OptionalIteratorReducer(it, q, weight); + if (ret != NULL) { + return ret; + } OptionalIterator *oi = rm_calloc(1, sizeof(*oi)); bool optimized = q->sctx->spec->rule && q->sctx->spec->rule->index_all; if (optimized) { - oi->wcii = IT_V2(NewWildcardIterator_Optimized)(q->sctx); + oi->wcii = IT_V2(NewWildcardIterator_Optimized)(q->sctx, weight); } oi->child = it; oi->virt = NewVirtualResult(weight, RS_FIELDMASK_ALL); @@ -188,7 +213,7 @@ QueryIterator *IT_V2(NewOptionalIterator)(QueryIterator *it, QueryEvalCtx *q, do oi->virt->freq = 1; oi->weight = weight; - QueryIterator *ret = &oi->base; + ret = &oi->base; ret->type = OPTIONAL_ITERATOR; ret->atEOF = false; ret->lastDocId = 0; diff --git a/src/iterators/union_iterator.c b/src/iterators/union_iterator.c index 4b2b441a3c0..98b27c93388 100644 --- a/src/iterators/union_iterator.c +++ b/src/iterators/union_iterator.c @@ -8,6 +8,8 @@ */ #include "union_iterator.h" +#include "wildcard_iterator.h" +#include "empty_iterator.h" static inline int cmpLastDocId(const void *e1, const void *e2, const void *udata) { const QueryIterator *it1 = e1, *it2 = e2; @@ -372,8 +374,59 @@ static void UI_Free(QueryIterator *base) { rm_free(ui); } +/** + * Reduce the union iterator by applying these rules: + * 1. Remove all empty iterators + * 2. If in quick exit mode and any of the iterators is a wildcard iterator, return it and free the rest + * 3. Otherwise, return NULL and let the caller create the union iterator + */ +static QueryIterator *UnionIteratorReducer(QueryIterator **its, int *num, bool quickExit, double weight, QueryNodeType type, const char *q_str, IteratorsConfig *config) { + QueryIterator *ret = NULL; + // Let's remove all the empty iterators from the list + size_t current_size = *num; + size_t write_idx = 0; + for (size_t i = 0; i < current_size; ++i) { + if (its[i]) { + if (its[i]->type != EMPTY_ITERATOR) { + its[write_idx++] = its[i]; + } else { + its[i]->Free(its[i]); + } + } + } + *num = write_idx; + if (quickExit) { + for (size_t i = 0; i < write_idx; ++i) { + if (IsWildcardIterator(its[i])) { + ret = its[i]; + for (size_t j = 0; j < write_idx; ++j) { + if (i != j && its[j]) { + its[j]->Free(its[j]); + } + } + break; + } + } + } + + if (write_idx == 1) { + ret = its[0]; + } else if (write_idx == 0) { + ret = IT_V2(NewEmptyIterator)(); + } + if (ret != NULL) { + rm_free(its); + } + return ret; +} + QueryIterator *IT_V2(NewUnionIterator)(QueryIterator **its, int num, bool quickExit, - double weight, QueryNodeType type, const char *q_str, IteratorsConfig *config) { + double weight, QueryNodeType type, const char *q_str, IteratorsConfig *config) { + + QueryIterator* ret = UnionIteratorReducer(its, &num, quickExit, weight, type, q_str, config); + if (ret != NULL) { + return ret; + } // create union context UnionIterator *ui = rm_calloc(1, sizeof(UnionIterator)); ui->its_orig = its; @@ -384,14 +437,14 @@ QueryIterator *IT_V2(NewUnionIterator)(QueryIterator **its, int num, bool quickE ui->q_str = q_str; // bind the union iterator calls - QueryIterator *base = &ui->base; - base->type = UNION_ITERATOR; - base->atEOF = false; - base->lastDocId = 0; - base->current = NewUnionResult(num, weight); - base->NumEstimated = UI_NumEstimated; - base->Free = UI_Free; - base->Rewind = UI_Rewind; + ret = &ui->base; + ret->type = UNION_ITERATOR; + ret->atEOF = false; + ret->lastDocId = 0; + ret->current = NewUnionResult(num, weight); + ret->NumEstimated = UI_NumEstimated; + ret->Free = UI_Free; + ret->Rewind = UI_Rewind; // Choose `Read` and `SkipTo` implementations. // We have 2 factors for the choice: @@ -401,15 +454,15 @@ QueryIterator *IT_V2(NewUnionIterator)(QueryIterator **its, int num, bool quickE // Each implementation if fine-tuned for the best performance in its scenario, and relies on the current state // of the iterator and how it was left by previous API calls, so we can't change implementation mid-execution. if (num > config->minUnionIterHeap) { - base->Read = quickExit ? UI_Read_Quick_Heap : UI_Read_Full_Heap; - base->SkipTo = quickExit ? UI_Skip_Quick_Heap : UI_Skip_Full_Heap; + ret->Read = quickExit ? UI_Read_Quick_Heap : UI_Read_Full_Heap; + ret->SkipTo = quickExit ? UI_Skip_Quick_Heap : UI_Skip_Full_Heap; ui->heap_min_id = rm_malloc(heap_sizeof(num)); heap_init(ui->heap_min_id, cmpLastDocId, NULL, num); } else { - base->Read = quickExit ? UI_Read_Quick_Flat : UI_Read_Full_Flat; - base->SkipTo = quickExit ? UI_Skip_Quick_Flat : UI_Skip_Full_Flat; + ret->Read = quickExit ? UI_Read_Quick_Flat : UI_Read_Full_Flat; + ret->SkipTo = quickExit ? UI_Skip_Quick_Flat : UI_Skip_Full_Flat; } UI_SyncIterList(ui); - return base; + return ret; } diff --git a/src/iterators/wildcard_iterator.c b/src/iterators/wildcard_iterator.c index 659d69a2af9..fba50f7a4d9 100644 --- a/src/iterators/wildcard_iterator.c +++ b/src/iterators/wildcard_iterator.c @@ -55,14 +55,25 @@ static void WI_Rewind(QueryIterator *base) { base->lastDocId = 0; } +bool IsWildcardIterator(QueryIterator *it) { + if (it && it->type == WILDCARD_ITERATOR) { + return true; + } + if (it && it->type == READ_ITERATOR) { + InvIndIterator *invIdxIt = (InvIndIterator *)it; + return invIdxIt->isWildcard; + } + return false; +} + /* Create a new wildcard iterator */ -QueryIterator *IT_V2(NewWildcardIterator_NonOptimized)(t_docId maxId, size_t numDocs) { +QueryIterator *IT_V2(NewWildcardIterator_NonOptimized)(t_docId maxId, size_t numDocs, double weight) { WildcardIterator *wi = rm_calloc(1, sizeof(*wi)); wi->currentId = 0; wi->topId = maxId; wi->numDocs = numDocs; QueryIterator *ret = &wi->base; - ret->current = NewVirtualResult(1, RS_FIELDMASK_ALL); + ret->current = NewVirtualResult(weight, RS_FIELDMASK_ALL); ret->current->freq = 1; ret->atEOF = false; ret->lastDocId = 0; @@ -75,24 +86,29 @@ QueryIterator *IT_V2(NewWildcardIterator_NonOptimized)(t_docId maxId, size_t num return ret; } -QueryIterator *IT_V2(NewWildcardIterator_Optimized)(const RedisSearchCtx *sctx) { +QueryIterator *IT_V2(NewWildcardIterator_Optimized)(const RedisSearchCtx *sctx, double weight) { RS_ASSERT(sctx->spec->rule->index_all); + QueryIterator *ret = NULL; if (sctx->spec->existingDocs) { - return NewInvIndIterator_GenericQuery(sctx->spec->existingDocs, sctx, - RS_INVALID_FIELD_INDEX, FIELD_EXPIRATION_DEFAULT); + ret = NewInvIndIterator_GenericQuery(sctx->spec->existingDocs, sctx, + RS_INVALID_FIELD_INDEX, FIELD_EXPIRATION_DEFAULT, weight); + InvIndIterator *it = (InvIndIterator *)ret; + it->isWildcard = true; } else { - return IT_V2(NewEmptyIterator)(); // Index all and no index, means the spec is currently empty. + ret = IT_V2(NewEmptyIterator)(); // Index all and no index, means the spec is currently empty. } + return ret; } // Returns a new wildcard iterator. // If the spec tracks all existing documents, it will return an iterator over those documents. // Otherwise, it will return a non-optimized wildcard iterator -QueryIterator *IT_V2(NewWildcardIterator)(const QueryEvalCtx *q) { +QueryIterator *IT_V2(NewWildcardIterator)(const QueryEvalCtx *q, double weight) { + QueryIterator *ret = NULL; if (q->sctx->spec->rule->index_all == true) { - return IT_V2(NewWildcardIterator_Optimized)(q->sctx); + return IT_V2(NewWildcardIterator_Optimized)(q->sctx, weight); } else { // Non-optimized wildcard iterator, using a simple doc-id increment as its base. - return IT_V2(NewWildcardIterator_NonOptimized)(q->docTable->maxDocId, q->docTable->size); + return IT_V2(NewWildcardIterator_NonOptimized)(q->docTable->maxDocId, q->docTable->size, weight); } } diff --git a/src/iterators/wildcard_iterator.h b/src/iterators/wildcard_iterator.h index 23bde009872..5e9852b2a9a 100644 --- a/src/iterators/wildcard_iterator.h +++ b/src/iterators/wildcard_iterator.h @@ -27,22 +27,25 @@ typedef struct { * @param maxId - The maxID to return * @param numDocs - the number of docs to return */ -QueryIterator *IT_V2(NewWildcardIterator_NonOptimized)(t_docId maxId, size_t numDocs); +QueryIterator *IT_V2(NewWildcardIterator_NonOptimized)(t_docId maxId, size_t numDocs, double weight); /** * Create a new optimized wildcard iterator. * This iterator can only be used when the index is configured to index all documents. * @param sctx - The search context */ -QueryIterator *IT_V2(NewWildcardIterator_Optimized)(const RedisSearchCtx *sctx); +QueryIterator *IT_V2(NewWildcardIterator_Optimized)(const RedisSearchCtx *sctx, double weight); /** * Create a new wildcard iterator. * If possible, it will use the optimized wildcard iterator, * otherwise it will fall back to the non-optimized version. * @param q - The query evaluation context + * @param weight - The weight of the iterator */ -QueryIterator *IT_V2(NewWildcardIterator)(const QueryEvalCtx *q); +QueryIterator *IT_V2(NewWildcardIterator)(const QueryEvalCtx *q, double weight); + +bool IsWildcardIterator(QueryIterator *it); #ifdef __cplusplus } diff --git a/src/query.c b/src/query.c index 51eda4a7721..cc4ed5ddd72 100644 --- a/src/query.c +++ b/src/query.c @@ -912,10 +912,13 @@ static IndexIterator *Query_EvalWildcardNode(QueryEvalCtx *q, QueryNode *qn) { static IndexIterator *Query_EvalNotNode(QueryEvalCtx *q, QueryNode *qn) { RS_LOG_ASSERT(qn->type == QN_NOT, "query node type should be not") + IndexIterator *child = NULL; + bool currently_notSubtree = q->notSubtree; + q->notSubtree = true; + child = Query_EvalNode(q, qn->children[0]); + q->notSubtree = currently_notSubtree; - return NewNotIterator(qn ? Query_EvalNode(q, qn->children[0]) : NULL, - q->docTable->maxDocId, qn->opts.weight, q->sctx->time.timeout, - q); + return NewNotIterator(child, q->docTable->maxDocId, qn->opts.weight, q->sctx->time.timeout, q); } static IndexIterator *Query_EvalOptionalNode(QueryEvalCtx *q, QueryNode *qn) { @@ -1089,8 +1092,8 @@ static IndexIterator *Query_EvalUnionNode(QueryEvalCtx *q, QueryNode *qn) { rm_free(iters); return ret; } - - IndexIterator *ret = NewUnionIterator(iters, n, 0, qn->opts.weight, QN_UNION, NULL, q->config); + bool quickExit = q->notSubtree; + IndexIterator *ret = NewUnionIterator(iters, n, quickExit, qn->opts.weight, QN_UNION, NULL, q->config); return ret; } @@ -1477,8 +1480,10 @@ static IndexIterator *Query_EvalTagNode(QueryEvalCtx *q, QueryNode *qn) { array_free(total_its); } } - - ret = NewUnionIterator(iters, n, 0, qn->opts.weight, QN_TAG, NULL, q->config); + // We want to get results with all the matching children (`quickExit == false`), unless: + // 1. We are a `Not` sub-tree, so we only care about the set of IDs + bool quickExit = q->notSubtree; + ret = NewUnionIterator(iters, n, quickExit, qn->opts.weight, QN_TAG, NULL, q->config); done: return ret; @@ -1602,6 +1607,7 @@ IndexIterator *QAST_Iterate(QueryAST *qast, const RSSearchOptions *opts, RedisSe .metricRequestsP = &qast->metricRequests, .reqFlags = reqflags, .config = &qast->config, + .notSubtree = false, }; IndexIterator *root = Query_EvalNode(&qectx, qast->root); if (!root) { diff --git a/src/query_ctx.h b/src/query_ctx.h index fe09e61da79..0e20c03fffd 100644 --- a/src/query_ctx.h +++ b/src/query_ctx.h @@ -24,4 +24,5 @@ typedef struct QueryEvalCtx { DocTable *docTable; uint32_t reqFlags; IteratorsConfig *config; + bool notSubtree; } QueryEvalCtx; diff --git a/src/query_parser/v1/parser.c b/src/query_parser/v1/parser.c index 49495f13a55..ed1757007f2 100644 --- a/src/query_parser/v1/parser.c +++ b/src/query_parser/v1/parser.c @@ -76,6 +76,27 @@ static int one_not_null(void *a, void *b, void *out) { } } +// optimize NOT nodes: NOT(NOT(A)) = A +// if the child is a NOT node, return its child instead of creating a double negation +static inline struct RSQueryNode* not_step(struct RSQueryNode* child) { + if (!child) { + return NULL; + } + + // If the child is a NOT node, return its child (double negation elimination) + if (child->type == QN_NOT) { + struct RSQueryNode* grandchild = child->children[0]; + // Detach the grandchild from its parent to prevent it from being freed + child->children[0] = NULL; + // Free the NOT node (the parent) + QueryNode_Free(child); + return grandchild; + } + + // Otherwise, create a new NOT node + return NewNotNode(child); +} + /**************** End of %include directives **********************************/ /* These constants specify the various numeric values for terminal symbols. ***************** Begin token definitions *************************************/ @@ -134,7 +155,7 @@ static int one_not_null(void *a, void *b, void *out) { ** the minor type might be the name of the identifier. ** Each non-terminal can have a different minor type. ** Terminal symbols all have the same minor type, though. -** This macros defines the minor type for terminal +** This macros defines the minor type for terminal ** symbols. ** YYMINORTYPE is the data type used for all minor types. ** This is typically a union of many types, one of @@ -184,8 +205,8 @@ typedef union { #define YYSTACKDEPTH 100 #endif #define RSQueryParser_v1_ARG_SDECL QueryParseCtx *ctx ; -#define RSQueryParser_v1_ARG_PDECL , QueryParseCtx *ctx -#define RSQueryParser_v1_ARG_PARAM ,ctx +#define RSQueryParser_v1_ARG_PDECL , QueryParseCtx *ctx +#define RSQueryParser_v1_ARG_PARAM ,ctx #define RSQueryParser_v1_ARG_FETCH QueryParseCtx *ctx =yypParser->ctx ; #define RSQueryParser_v1_ARG_STORE yypParser->ctx =ctx ; #define RSQueryParser_v1_CTX_SDECL @@ -224,7 +245,7 @@ typedef union { /* Next are the tables used to determine what action to take based on the ** current state and lookahead token. These tables are used to implement ** functions that take a state number and lookahead value and return an -** action integer. +** action integer. ** ** Suppose the action integer is N. Then the action is determined as ** follows @@ -368,9 +389,9 @@ static const YYACTIONTYPE yy_default[] = { }; /********** End of lemon-generated parsing tables *****************************/ -/* The next table maps tokens (terminal symbols) into fallback tokens. +/* The next table maps tokens (terminal symbols) into fallback tokens. ** If a construct like the following: -** +** ** %fallback ID X Y Z. ** ** appears in the grammar, then ID becomes a fallback token for X, Y, @@ -443,10 +464,10 @@ static char *yyTracePrompt = 0; #endif /* NDEBUG */ #ifndef NDEBUG -/* +/* ** Turn parser tracing on by giving a stream to which to write the trace ** and a prompt to preface each trace message. Tracing is turned off -** by making either argument NULL +** by making either argument NULL ** ** Inputs: **