Skip to content

Commit

Permalink
blend equality and existence optimizations into one
Browse files Browse the repository at this point in the history
  • Loading branch information
igorkramaric committed May 9, 2024
1 parent 6c62af3 commit 6507cf3
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 19 deletions.
32 changes: 20 additions & 12 deletions daffodil/hstore_predicate.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -111,26 +111,34 @@ cdef class HStoreQueryDelegate(BaseDaffodilDelegate):

def optimize_equality_and(self, children, sql_expr):
to_optimize = [child_exp for child_exp in children if child_exp.daff_test == "="]

keys = [child_exp.daff_key for child_exp in to_optimize]
unique_keys = set(keys)

if len(unique_keys) < 2 or len(keys) != len(unique_keys):
return sql_expr

keys_and_values = ", ".join(
f'"{child_exp.daff_key}"=>"{child_exp.daff_val}"'
for child_exp in to_optimize
)
optimization_expr = f"{self.field} @> '{keys_and_values}'"
sql_optimized = f"{self.field} @> '{keys_and_values}'"

sql_expr = f" AND ".join(
f"({child_exp})"
for child_exp in children if child_exp.daff_test != "="
remaining_children = [child_exp for child_exp in children if child_exp.daff_test != "="]
if not len(remaining_children):
# every single expression is an equality test, nothing more to do
return sql_optimized

sql_remaining = f" AND ".join(
f"({child_exp})" for child_exp in remaining_children
)
if sql_expr:
return f"{optimization_expr} AND ({sql_expr})"
else:
return optimization_expr

# Apply existence optimizer if applicable
if not breaks_existence_optimizer(remaining_children):
sql_remaining = self.optimize_existence(remaining_children, sql_remaining, "?&", "AND")

return f"{sql_optimized} AND ({sql_remaining})"



def mk_all(self, children):
if not children or not any(children):
Expand All @@ -141,12 +149,12 @@ cdef class HStoreQueryDelegate(BaseDaffodilDelegate):

sql_expr = " AND ".join(f"({child_exp})" for child_exp in children if child_exp)

if not breaks_existence_optimizer(children):
return self.optimize_existence(children, sql_expr, "?&", "AND")

if not breaks_equality_optimizer(children):
return self.optimize_equality_and(children, sql_expr)

if not breaks_existence_optimizer(children):
return self.optimize_existence(children, sql_expr, "?&", "AND")

return sql_expr

def mk_not_any(self, children):
Expand Down
43 changes: 36 additions & 7 deletions test/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ def test_and(self):
}
""")

def test_optimization_string_equality(self):
def test_optimization_equality_within_and(self):
# optimized, equality alone
self.assert_filter_has_n_results(3, """
{
Expand All @@ -323,7 +323,16 @@ def test_optimization_string_equality(self):
}
""")

def test_optimization_string_equality_integers_skipped(self):
# integers only
self.assert_filter_has_n_results(3, """
{
updated = 1714724220
zip_code = 10019
}
""")


def test_optimization_equality_within_and_skipped(self):
# just one unique key
self.assert_filter_has_n_results(4, """
{
Expand All @@ -332,7 +341,7 @@ def test_optimization_string_equality_integers_skipped(self):
}
""")

# again just one unique key which is an optimization candidate
# again just one unique key which is the optimization candidate
self.assert_filter_has_n_results(4, """
{
updated = "1714724220"
Expand All @@ -349,12 +358,32 @@ def test_optimization_string_equality_integers_skipped(self):
}
""")

# integers only so "existence optimization" kicks in first
# ...and we never get to "equality optimization"
def test_optimization_equality_and_existence_within_and(self):
# equality + existence optimization
self.assert_filter_has_n_results(2, """
{
updated = "1714724220"
zip_code = "10019"
sat_writing_avg_score > 361
}
""")

# equality only optimization since the existence optimization is
# skipped (`?=` is a deal-breaker)
self.assert_filter_has_n_results(3, """
{
updated = 1714724220
zip_code = 10019
updated = "1714724220"
zip_code = "10019"
non_existing ?= false
}
""")

# equality optimization skipped since there is only one `=` expression
# while the existence optimization is taking place
self.assert_filter_has_n_results(2, """
{
zip_code = "10019"
sat_writing_avg_score > 361
}
""")

Expand Down

0 comments on commit 6507cf3

Please sign in to comment.