Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: allow full_sorting_merge in funnels #26610

Merged
merged 11 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions posthog/hogql/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ class HogQLQuerySettings(BaseModel):
optimize_aggregation_in_order: Optional[bool] = None
date_time_output_format: Optional[str] = None
date_time_input_format: Optional[str] = None
join_algorithm: Optional[str] = None


# Settings applied on top of all HogQL queries.
Expand Down
2 changes: 2 additions & 0 deletions posthog/hogql_queries/insights/funnels/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
)
from posthog.types import EntityNode, ExclusionEntityNode

JOIN_ALGOS = "direct,parallel_hash,hash,full_sorting_merge"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for memory issues why don't we go for auto? performance concerns?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when i tested it, it took longer and timed out on metabase.



class FunnelBase(ABC):
context: FunnelQueryContext
Expand Down
6 changes: 5 additions & 1 deletion posthog/hogql_queries/insights/funnels/funnel_trends_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from posthog.hogql.constants import HogQLQuerySettings
from posthog.hogql.parser import parse_select, parse_expr
from posthog.hogql_queries.insights.funnels import FunnelTrends
from posthog.hogql_queries.insights.funnels.base import JOIN_ALGOS
from posthog.hogql_queries.insights.utils.utils import get_start_of_interval_hogql_str
from posthog.schema import BreakdownType, BreakdownAttributionType
from posthog.utils import DATERANGE_MAP, relative_date_parse
Expand Down Expand Up @@ -195,7 +196,9 @@ def get_query(self) -> ast.SelectQuery:
""",
{"fill_query": fill_query, "inner_select": inner_select},
)
return cast(ast.SelectQuery, s)
s = cast(ast.SelectQuery, s)
s.settings = HogQLQuerySettings(join_algorithm=JOIN_ALGOS)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might be safer to first check emptiness and then override s.settings, cause if in future parse_select starts setting query settings as well, those will be lost

return s

def _matching_events(self):
if (
Expand Down Expand Up @@ -254,4 +257,5 @@ def actor_query(
select_from=select_from,
order_by=order_by,
where=where,
settings=HogQLQuerySettings(join_algorithm=JOIN_ALGOS),
)
18 changes: 11 additions & 7 deletions posthog/hogql_queries/insights/funnels/funnel_udf.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import cast, Optional

from posthog.hogql import ast
from posthog.hogql.constants import DEFAULT_RETURNED_ROWS
from posthog.hogql.constants import DEFAULT_RETURNED_ROWS, HogQLQuerySettings
from posthog.hogql.parser import parse_select, parse_expr
from posthog.hogql_queries.insights.funnels.base import FunnelBase
from posthog.hogql_queries.insights.funnels.base import FunnelBase, JOIN_ALGOS
from posthog.schema import BreakdownType, BreakdownAttributionType
from posthog.utils import DATERANGE_MAP

Expand Down Expand Up @@ -169,8 +169,10 @@ def get_query(self) -> ast.SelectQuery:
)

# Weird: unless you reference row_number in this outer block, it doesn't work correctly
s = parse_select(
f"""
s = cast(
ast.SelectQuery,
parse_select(
f"""
SELECT
{step_results2},
{mean_conversion_times},
Expand All @@ -182,10 +184,11 @@ def get_query(self) -> ast.SelectQuery:
GROUP BY final_prop
LIMIT {self.get_breakdown_limit() + 1 if use_breakdown_limit else DEFAULT_RETURNED_ROWS}
""",
{"s": s},
{"s": s},
),
)

return cast(ast.SelectQuery, s)
s.settings = HogQLQuerySettings(join_algorithm=JOIN_ALGOS)
return s

def _get_funnel_person_step_condition(self) -> ast.Expr:
actorsQuery, breakdownType = (
Expand Down Expand Up @@ -294,4 +297,5 @@ def actor_query(
select_from=select_from,
order_by=order_by,
where=where,
settings=HogQLQuerySettings(join_algorithm=JOIN_ALGOS),
)
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
GROUP BY aggregation_target
HAVING ifNull(greaterOrEquals(step_reached, 0), 0))
WHERE ifNull(greaterOrEquals(step_reached, 0), 0)
ORDER BY aggregation_target ASC) AS funnel_actors ON equals(if(not(empty(event__override.distinct_id)), event__override.person_id, event.person_id), funnel_actors.actor_id)
ORDER BY aggregation_target ASC SETTINGS join_algorithm='direct,parallel_hash,hash,full_sorting_merge') AS funnel_actors ON equals(if(not(empty(event__override.distinct_id)), event__override.person_id, event.person_id), funnel_actors.actor_id)
WHERE and(equals(event.team_id, 99999), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 99999), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed']), equals(event.event, 'insight loaded'), ifNull(equals(funnel_actors.steps, 2), 0))
GROUP BY actor_id
ORDER BY actor_id ASC) AS source
Expand Down Expand Up @@ -99,7 +99,7 @@
GROUP BY aggregation_target
HAVING ifNull(greaterOrEquals(step_reached, 0), 0))
WHERE ifNull(greaterOrEquals(step_reached, 0), 0)
ORDER BY aggregation_target ASC) AS funnel_actors ON equals(if(not(empty(event__override.distinct_id)), event__override.person_id, event.person_id), funnel_actors.actor_id)
ORDER BY aggregation_target ASC SETTINGS join_algorithm='direct,parallel_hash,hash,full_sorting_merge') AS funnel_actors ON equals(if(not(empty(event__override.distinct_id)), event__override.person_id, event.person_id), funnel_actors.actor_id)
WHERE and(equals(event.team_id, 99999), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 99999), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed']), equals(event.event, 'insight loaded'), ifNull(equals(funnel_actors.steps, 2), 0))
GROUP BY actor_id
ORDER BY actor_id ASC) AS source)))
Expand Down Expand Up @@ -192,7 +192,7 @@
GROUP BY aggregation_target
HAVING ifNull(greaterOrEquals(step_reached, 0), 0))
WHERE ifNull(greaterOrEquals(step_reached, 0), 0)
ORDER BY aggregation_target ASC) AS funnel_actors ON equals(if(not(empty(event__override.distinct_id)), event__override.person_id, event.person_id), funnel_actors.actor_id)
ORDER BY aggregation_target ASC SETTINGS join_algorithm='direct,parallel_hash,hash,full_sorting_merge') AS funnel_actors ON equals(if(not(empty(event__override.distinct_id)), event__override.person_id, event.person_id), funnel_actors.actor_id)
WHERE and(equals(event.team_id, 99999), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 99999), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed', 'insight updated']), equals(event.event, 'insight loaded'), ifNull(notEquals(funnel_actors.steps, 3), 1))
GROUP BY actor_id
ORDER BY actor_id ASC) AS source
Expand Down Expand Up @@ -231,7 +231,7 @@
GROUP BY aggregation_target
HAVING ifNull(greaterOrEquals(step_reached, 0), 0))
WHERE ifNull(greaterOrEquals(step_reached, 0), 0)
ORDER BY aggregation_target ASC) AS funnel_actors ON equals(if(not(empty(event__override.distinct_id)), event__override.person_id, event.person_id), funnel_actors.actor_id)
ORDER BY aggregation_target ASC SETTINGS join_algorithm='direct,parallel_hash,hash,full_sorting_merge') AS funnel_actors ON equals(if(not(empty(event__override.distinct_id)), event__override.person_id, event.person_id), funnel_actors.actor_id)
WHERE and(equals(event.team_id, 99999), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 99999), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed', 'insight updated']), equals(event.event, 'insight loaded'), ifNull(notEquals(funnel_actors.steps, 3), 1))
GROUP BY actor_id
ORDER BY actor_id ASC) AS source)))
Expand Down Expand Up @@ -325,7 +325,7 @@
GROUP BY aggregation_target
HAVING ifNull(greaterOrEquals(step_reached, 0), 0))
WHERE ifNull(greaterOrEquals(step_reached, 0), 0)
ORDER BY aggregation_target ASC) AS funnel_actors
ORDER BY aggregation_target ASC SETTINGS join_algorithm='direct,parallel_hash,hash,full_sorting_merge') AS funnel_actors
WHERE ifNull(equals(funnel_actors.steps, 2), 0)
GROUP BY funnel_actors.actor_id
ORDER BY funnel_actors.actor_id ASC) AS source
Expand Down Expand Up @@ -366,7 +366,7 @@
GROUP BY aggregation_target
HAVING ifNull(greaterOrEquals(step_reached, 0), 0))
WHERE ifNull(greaterOrEquals(step_reached, 0), 0)
ORDER BY aggregation_target ASC) AS funnel_actors
ORDER BY aggregation_target ASC SETTINGS join_algorithm='direct,parallel_hash,hash,full_sorting_merge') AS funnel_actors
WHERE ifNull(equals(funnel_actors.steps, 2), 0)
GROUP BY funnel_actors.actor_id
ORDER BY funnel_actors.actor_id ASC) AS source)))
Expand Down
Loading
Loading