propel-gtm · tonyd3 · Jul 8, 2025 · propel-code-bot · Aug 5, 2025
diff --git a/src/hybrid_reader.c b/src/hybrid_reader.c
@@ -443,11 +443,28 @@ void HybridIterator_Free(struct indexIterator *self) {
   rm_free(it);
 }
 
+static IndexIterator* HybridIteratorReducer(HybridIteratorParams *hParams) {
+  IndexIterator* ret = NULL;
+  if (hParams->childIt && hParams->childIt->type == EMPTY_ITERATOR) {
+    ret = hParams->childIt;
+  } else if (hParams->childIt && hParams->childIt->type == WILDCARD_ITERATOR) {
+    //TODO: When new Iterator API (consider READER_ITERATOR with isWildcard flag)
+    hParams->childIt->Free(hParams->childIt);
+    hParams->qParams.searchMode = VECSIM_STANDARD_KNN;
+    hParams->childIt = NULL;
+  }
-  } else if (hParams->childIt && hParams->childIt->type == WILDCARD_ITERATOR) {
-    //TODO: When new Iterator API (consider READER_ITERATOR with isWildcard flag)
-    hParams->childIt->Free(hParams->childIt);
-    hParams->qParams.searchMode = VECSIM_STANDARD_KNN;
-    hParams->childIt = NULL;
-  }
+  } else if (hParams->childIt && IsWildcardIterator(hParams->childIt)) {
+    //TODO: When new Iterator API (consider READER_ITERATOR with isWildcard flag)
+    hParams->childIt->Free(hParams->childIt);
+    hParams->qParams.searchMode = VECSIM_STANDARD_KNN;
+    hParams->childIt = NULL;
+  }
-  } else if (hParams->childIt && hParams->childIt->type == WILDCARD_ITERATOR) {
-    //TODO: When new Iterator API (consider READER_ITERATOR with isWildcard flag)
-    hParams->childIt->Free(hParams->childIt);
-    hParams->qParams.searchMode = VECSIM_STANDARD_KNN;
-    hParams->childIt = NULL;
-  }
+  } else if (hParams->childIt && IsWildcardIterator(hParams->childIt)) {
+    //TODO: When new Iterator API (consider READER_ITERATOR with isWildcard flag)
+    hParams->childIt->Free(hParams->childIt);
+    hParams->qParams.searchMode = VECSIM_STANDARD_KNN;
+    hParams->childIt = NULL;
+  }
+  return ret;
+}
+
 IndexIterator *NewHybridVectorIterator(HybridIteratorParams hParams, QueryError *status) {
   // If searchMode is out of the expected range.
   if (hParams.qParams.searchMode < 0 || hParams.qParams.searchMode >= VECSIM_LAST_SEARCHMODE) {
     QueryError_SetError(status, QUERY_EGENERIC, "Creating new hybrid vector iterator has failed");
   }
+  IndexIterator* ri = HybridIteratorReducer(&hParams);
+  if (ri) {
+    return ri;
+  }
 
   HybridIterator *hi = rm_new(HybridIterator);
   hi->lastDocId = 0;
@@ -503,7 +520,7 @@ IndexIterator *NewHybridVectorIterator(HybridIteratorParams hParams, QueryError
     hi->returnedResults = array_new(RSIndexResult *, hParams.query.k);
   }
 
-  IndexIterator *ri = &hi->base;
+  ri = &hi->base;
   ri->ctx = hi;
   // This will be changed later to a valid RLookupKey if there is no syntax error in the query,
   // by the creation of the metrics loader results processor.

diff --git a/src/iterators/intersection_iterator.c b/src/iterators/intersection_iterator.c
@@ -11,6 +11,7 @@
 #include "empty_iterator.h"
 #include "union_iterator.h"
 #include "index_result.h"
+#include "wildcard_iterator.h"
 
 /**************************** Read + SkipTo Helpers ****************************/
 
@@ -230,66 +231,121 @@ static int cmpIter(QueryIterator **it1, QueryIterator **it2) {
   return (int)(est1 - est2);
 }
 
-// Set estimation for number of results. Returns false if the query is empty (some of the iterators are NULL)
-static bool II_SetEstimation(IntersectionIterator *it) {
+// Set estimation for number of results.
+static void II_SetEstimation(IntersectionIterator *it) {
   // Set the expected number of results to the minimum of all iterators.
   // If any of the iterators is NULL, we set the expected number to 0
   RS_ASSERT(it->num_its); // Ensure there is at least one iterator, so we can set num_expected to SIZE_MAX temporarily
   it->num_expected = SIZE_MAX;
   for (uint32_t i = 0; i < it->num_its; ++i) {
     QueryIterator *cur = it->its[i];
-    if (!cur) {
-      // If the current iterator is empty, then the entire query will fail
-      it->num_expected = 0;
-      return false;
-    }
     size_t amount = cur->NumEstimated(cur);
     if (amount < it->num_expected) {
       it->num_expected = amount;
     }
   }
-  return true;
+}
+
+/**
+ * Reduce the intersection iterator by applying these rules:
+ * 1. If any of the iterators is an empty iterator, return the empty iterator and update the number of children
+ * 2. Remove all wildcard iterators since they would not contribute to the intersection (Return one of them if all are wildcards)
+ * 3. If there is only one left child iterator, return it
+ * 4. Otherwise, return NULL and let the caller create the intersection iterator
+*/
+static QueryIterator *IntersectionIteratorReducer(QueryIterator **its, size_t *num) {
+  QueryIterator *ret = NULL;
+
+  // Remove all wildcard iterators from the array
+  size_t current_size = *num;
+  size_t write_idx = 0;
+  bool all_wildcards = true;
+  for (size_t read_idx = 0; read_idx < current_size; read_idx++) {
+    if (IsWildcardIterator(its[read_idx])) {
+      if (!all_wildcards || all_wildcards && read_idx != current_size - 1) {
+        // remove all the wildcards in case there are other non-wildcard iterators
+        // avoid removing it in case it's the last one and all are wildcards
+        its[read_idx]->Free(its[read_idx]);
+      }
+    } else {
+      all_wildcards = false;
+      its[write_idx++] = its[read_idx];
+    }
+  }
+  *num = write_idx;
+
+  // Check for empty iterators
+  for (size_t ii = 0; ii < write_idx; ++ii) {
+    if (!its[ii] || its[ii]->type == EMPTY_ITERATOR) {
+      ret = its[ii] ? its[ii] : IT_V2(NewEmptyIterator)();
+      its[ii] = NULL; // Mark as taken
+      break;
+    }
+  }
+
+  if (ret) {
+    // Free all non-NULL iterators
+    for (size_t ii = 0; ii < write_idx; ++ii) {
+      if (its[ii]) {
+        its[ii]->Free(its[ii]);
+      }
+    }
+  } else {
+    // Handle edge cases after wildcard removal
+    if (write_idx == 0) {
+      // All iterators were wildcards, return the last one which was not Freed
+      ret = its[current_size - 1];
+    } else if (write_idx == 1) {
+      // Only one iterator left, return it directly
+      ret = its[0];
+    }
+  }
+
+  if (ret != NULL) {
+    rm_free(its);
+  }
+
+  return ret;
 }
 
 QueryIterator *NewIntersectionIterator(QueryIterator **its, size_t num, int max_slop, bool in_order, double weight) {
   RS_ASSERT(its && num > 0);
+  QueryIterator *ret = IntersectionIteratorReducer(its, &num);
+  if (ret != NULL) {
+    return ret;
+  }
   IntersectionIterator *it = rm_calloc(1, sizeof(*it));
   it->its = its;
   it->num_its = num;
   // If max_slop is negative, we set it to INT_MAX in case `in_order` is true
   it->max_slop = max_slop < 0 ? INT_MAX : max_slop;
   it->in_order = in_order;
 
-  bool allValid = II_SetEstimation(it);
+  II_SetEstimation(it);
 
   // Sort children iterators from low count to high count which reduces the number of iterations.
-  if (!in_order && allValid) {
+  if (!in_order) {
     qsort(its, num, sizeof(*its), (CompareFunc)cmpIter);
   }
 
   // bind the iterator calls
-  QueryIterator *base = &it->base;
-  base->type = INTERSECT_ITERATOR;
-  base->atEOF = false;
-  base->lastDocId = 0;
-  base->current = NewIntersectResult(num, weight);
-  base->NumEstimated = II_NumEstimated;
+  ret = &it->base;
+  ret->type = INTERSECT_ITERATOR;
+  ret->atEOF = false;
+  ret->lastDocId = 0;
+  ret->current = NewIntersectResult(num, weight);
+  ret->NumEstimated = II_NumEstimated;
   if (max_slop < 0 && !in_order) {
     // No slop and no order means every result is relevant, so we can use the fast path
-    base->Read = II_Read;
-    base->SkipTo = II_SkipTo;
+    ret->Read = II_Read;
+    ret->SkipTo = II_SkipTo;
   } else {
     // Otherwise, we need to check relevancy
-    base->Read = II_Read_CheckRelevancy;
-    base->SkipTo = II_SkipTo_CheckRelevancy;
+    ret->Read = II_Read_CheckRelevancy;
+    ret->SkipTo = II_SkipTo_CheckRelevancy;
   }
-  base->Free = II_Free;
-  base->Rewind = II_Rewind;
+  ret->Free = II_Free;
+  ret->Rewind = II_Rewind;
 
-  if (!allValid) {
-    // Some of the iterators are NULL, so the intersection will always be empty.
-    base->Free(base);
-    base = IT_V2(NewEmptyIterator)();
-  }
-  return base;
+  return ret;
 }
diff --git a/src/iterators/inverted_index_iterator.c b/src/iterators/inverted_index_iterator.c
@@ -278,6 +278,7 @@ static QueryIterator *NewInvIndIterator(InvertedIndex *idx, RSIndexResult *res,
   it->skipMulti = skipMulti;
   it->sctx = sctx;
   it->filterCtx = *filterCtx;
+  it->isWildcard = false;
   SetCurrentBlockReader(it);
 
   it->base.current = res;
@@ -361,13 +362,13 @@ QueryIterator *NewInvIndIterator_TermQuery(InvertedIndex *idx, const RedisSearch
 }
 
 QueryIterator *NewInvIndIterator_GenericQuery(InvertedIndex *idx, const RedisSearchCtx *sctx, t_fieldIndex fieldIndex,
-                                              enum FieldExpirationPredicate predicate) {
+                                              enum FieldExpirationPredicate predicate, double weight) {
   FieldFilterContext fieldCtx = {
     .field = {.isFieldMask = false, .value = {.index = fieldIndex}},
     .predicate = predicate,
   };
   IndexDecoderCtx decoderCtx = {.wideMask = RS_FIELDMASK_ALL}; // Also covers the case of a non-wide schema
-  RSIndexResult *record = NewVirtualResult(1, RS_FIELDMASK_ALL);
+  RSIndexResult *record = NewVirtualResult(weight, RS_FIELDMASK_ALL);
   record->freq = (predicate == FIELD_EXPIRATION_MISSING) ? 0 : 1; // TODO: is this required?
   return NewInvIndIterator(idx, record, &fieldCtx, true, sctx, &decoderCtx);
 }
diff --git a/src/iterators/inverted_index_iterator.h b/src/iterators/inverted_index_iterator.h
@@ -40,6 +40,9 @@ typedef struct InvIndIterator {
   // Whether to skip multi values from the same doc
   bool skipMulti;
 
+  // Whether this iterator is result of a wildcard query
+  bool isWildcard;
+
   union {
     struct {
       double rangeMin;
@@ -70,7 +73,7 @@ QueryIterator *NewInvIndIterator_TermQuery(InvertedIndex *idx, const RedisSearch
 // The returned iterator will yield "virtual" records. For term/numeric indexes, it is best to use
 // the specific functions NewInvIndIterator_TermQuery/NewInvIndIterator_NumericQuery
 QueryIterator *NewInvIndIterator_GenericQuery(InvertedIndex *idx, const RedisSearchCtx *sctx, t_fieldIndex fieldIndex,
-                                              enum FieldExpirationPredicate predicate);
+                                              enum FieldExpirationPredicate predicate, double weight);
 
 #ifdef __cplusplus
 }

diff --git a/src/iterators/not_iterator.c b/src/iterators/not_iterator.c
@@ -226,14 +226,40 @@ static IteratorStatus NI_SkipTo_Optimized(QueryIterator *base, t_docId docId) {
   return rc;
 }
 
+/*
+ * Reduce the not iterator by applying these rules:
+ * 1. If the child is an empty iterator or NULL, return a wildcard iterator
+ * 2. If the child is a wildcard iterator, return an empty iterator
+ * 3. Otherwise, return NULL and let the caller create the not iterator
+ */
+static QueryIterator* NotIteratorReducer(QueryIterator *it, t_docId maxDocId, double weight, struct timespec timeout, QueryEvalCtx *q) {
+  RS_ASSERT(q);
+  QueryIterator *ret = NULL;
+  if (!it || it->type == EMPTY_ITERATOR) {
+    ret = IT_V2(NewWildcardIterator)(q, weight);
+  } else if (IsWildcardIterator(it)) {
+    ret = IT_V2(NewEmptyIterator)();
+  }
+  if (ret != NULL) {
+    if (it) {
+      it->Free(it);
+    }
+  }
+  return ret;
+}
+
 QueryIterator *IT_V2(NewNotIterator)(QueryIterator *it, t_docId maxDocId, double weight, struct timespec timeout, QueryEvalCtx *q) {
+  QueryIterator *ret = NotIteratorReducer(it, maxDocId, weight, timeout, q);
+  if (ret != NULL) {
+    return ret;
+  }
   NotIterator *ni = rm_calloc(1, sizeof(*ni));
-  QueryIterator *ret = &ni->base;
-  bool optimized = q && q->sctx->spec->rule && q->sctx->spec->rule->index_all;
+  ret = &ni->base;
+  bool optimized = q && q->sctx && q->sctx->spec && q->sctx->spec->rule && q->sctx->spec->rule->index_all;
   if (optimized) {
-    ni->wcii = IT_V2(NewWildcardIterator_Optimized)(q->sctx);
+    ni->wcii = IT_V2(NewWildcardIterator_Optimized)(q->sctx, weight);
   }
-  ni->child = it ? it : IT_V2(NewEmptyIterator)();
+  ni->child = it;
   ni->maxDocId = maxDocId;          // Valid for the optimized case as well, since this is the maxDocId of the embedded wildcard iterator
   ni->timeoutCtx = (TimeoutCtx){ .timeout = timeout, .counter = 0 };
 

diff --git a/src/iterators/optional_iterator.c b/src/iterators/optional_iterator.c
@@ -9,6 +9,7 @@
 
 #include "optional_iterator.h"
 #include "wildcard_iterator.h"
+#include "inverted_index_iterator.h"
 
 static void OI_Free(QueryIterator *base) {
   OptionalIterator *oi = (OptionalIterator *)base;
@@ -173,22 +174,46 @@ static IteratorStatus OI_Read_NotOptimized(QueryIterator *base) {
   return ITERATOR_OK;
 }
 
+/**
+ * Reduce the optional iterator by applying these rules:
+ * 1. If the child is an empty iterator or NULL, return a wildcard iterator
+ * 2. If the child is a wildcard iterator, return it
+ * 3. Otherwise, return NULL and let the caller create the optional iterator
+ */
+static QueryIterator* OptionalIteratorReducer(QueryIterator *it, QueryEvalCtx *q, double weight) {
+  QueryIterator *ret = NULL;
+  if (!it || it->type == EMPTY_ITERATOR) {
+    // If the child is NULL, we return a wildcard iterator. All will be virtual hits
+    ret = IT_V2(NewWildcardIterator)(q, weight);
+    if (it) {
+      it->Free(it);
+    }
+  } else if (IsWildcardIterator(it)) {
+    // All will be real hits
+    ret = it;
+  }
+  return ret;
+}
+
 // Create a new OPTIONAL iterator - Non-Optimized version.
 QueryIterator *IT_V2(NewOptionalIterator)(QueryIterator *it, QueryEvalCtx *q, double weight) {
-  RS_ASSERT(it != NULL);
   RS_ASSERT(q && q->sctx && q->sctx->spec && q->docTable);
+  QueryIterator *ret = OptionalIteratorReducer(it, q, weight);
+  if (ret != NULL) {
+    return ret;
+  }
   OptionalIterator *oi = rm_calloc(1, sizeof(*oi));
   bool optimized = q->sctx->spec->rule && q->sctx->spec->rule->index_all;
   if (optimized) {
-    oi->wcii = IT_V2(NewWildcardIterator_Optimized)(q->sctx);
+    oi->wcii = IT_V2(NewWildcardIterator_Optimized)(q->sctx, weight);
   }
   oi->child = it;
   oi->virt = NewVirtualResult(weight, RS_FIELDMASK_ALL);
   oi->maxDocId = q->docTable->maxDocId;
   oi->virt->freq = 1;
   oi->weight = weight;
 
-  QueryIterator *ret = &oi->base;
+  ret = &oi->base;
   ret->type = OPTIONAL_ITERATOR;
   ret->atEOF = false;
   ret->lastDocId = 0;