Skip to content

Commit be93beb

Browse files
BenWusean-rose
andauthored
[DENG-6243] Use event_monitoring_live in aggregates query (#8167)
* [DENG-6243] Use event_monitoring_live in aggregates query * Correct `execution_delta` in `sql_generators/glean_usage/templates/event_monitoring_aggregates_v1.metadata.yaml`. --------- Co-authored-by: Sean Rose <[email protected]>
1 parent c00ee51 commit be93beb

File tree

3 files changed

+125
-82
lines changed

3 files changed

+125
-82
lines changed

sql_generators/glean_usage/event_monitoring_live.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -115,12 +115,14 @@ def generate_per_app_id(
115115
"generate", "glean_usage", "events_monitoring", "events_tables", fallback={}
116116
)
117117

118-
deprecated = all([
119-
app_dataset.get("deprecated", False) is True
120-
for app in cached_app_info.values()
121-
for app_dataset in app
122-
if dataset == app_dataset["bq_dataset_family"]
123-
])
118+
deprecated = all(
119+
[
120+
app_dataset.get("deprecated", False) is True
121+
for app in cached_app_info.values()
122+
for app_dataset in app
123+
if dataset == app_dataset["bq_dataset_family"]
124+
]
125+
)
124126

125127
# Skip any not-allowed or deprecated app
126128
if (
@@ -258,6 +260,14 @@ def generate_across_apps(
258260
"generate", "glean_usage", "events_monitoring", "skip_apps", fallback=[]
259261
)
260262

263+
manual_refresh_apps = ConfigLoader.get(
264+
"generate",
265+
"glean_usage",
266+
"events_monitoring",
267+
"manual_refresh",
268+
fallback=[],
269+
)
270+
261271
for app in apps:
262272
for app_dataset in app:
263273
if (
@@ -312,6 +322,7 @@ def generate_across_apps(
312322
apps=apps,
313323
prod_datasets=self._get_prod_datasets_with_event(),
314324
event_tables_per_dataset=event_tables_per_dataset,
325+
manual_refresh_apps=manual_refresh_apps,
315326
)
316327
render_kwargs.update(self.custom_render_kwargs)
317328

sql_generators/glean_usage/templates/event_monitoring_aggregates_v1.metadata.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,14 @@ scheduling:
1313
arguments: [
1414
"--billing-project", "moz-fx-data-backfill-3"
1515
]
16+
{% if manual_refresh_apps -%}
17+
depends_on:
18+
{%- for app in manual_refresh_apps %}
19+
- dag_name: bqetl_materialized_view_refresh
20+
task_id: {{ app }}_derived__event_monitoring_live__v1
21+
execution_delta: -22h45m # wait for the hourly run at 1:45
22+
{%- endfor -%}
23+
{%- endif %}
1624
bigquery:
1725
time_partitioning:
1826
type: day

sql_generators/glean_usage/templates/event_monitoring_aggregates_v1.query.sql

Lines changed: 100 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -5,44 +5,106 @@ WITH
55
{% for dataset in app -%}
66
{% if dataset['bq_dataset_family'] not in ["telemetry"]
77
and dataset['bq_dataset_family'] in event_tables_per_dataset %}
8-
{% for events_table in event_tables_per_dataset[dataset['bq_dataset_family']] -%}
9-
base_{{ dataset['bq_dataset_family'] }}_{{ events_table }} AS (
8+
{% if dataset['bq_dataset_family'] in manual_refresh_apps -%}
9+
{{ dataset['bq_dataset_family'] }}_aggregated AS (
10+
-- Use event_monitoring_live_v1 for apps that use manual refreshes
1011
SELECT
11-
@submission_date AS submission_date,
12-
TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS window_start,
13-
TIMESTAMP_ADD(TIMESTAMP_TRUNC(submission_timestamp, HOUR), INTERVAL 1 HOUR) AS window_end,
14-
event.category AS event_category,
15-
event.name AS event_name,
16-
event_extra.key AS event_extra_key,
17-
normalized_country_code AS country,
18-
client_info.app_channel AS channel,
19-
client_info.app_display_version AS version,
20-
-- experiments[ARRAY_LENGTH(experiments)] will be set to '*'
21-
COALESCE(ping_info.experiments[SAFE_OFFSET(experiment_index)].key, '*') AS experiment,
22-
COALESCE(ping_info.experiments[SAFE_OFFSET(experiment_index)].value.branch, '*') AS experiment_branch,
23-
COUNT(*) AS total_events,
12+
DATE(submission_date) AS submission_date,
13+
window_start,
14+
window_end,
15+
event_category,
16+
event_name,
17+
event_extra_key,
18+
country,
19+
normalized_app_name,
20+
channel,
21+
version,
22+
experiment,
23+
experiment_branch,
24+
total_events,
2425
FROM
25-
`{{ project_id }}.{{ dataset['bq_dataset_family'] }}_stable.{{ events_table }}`
26-
CROSS JOIN
27-
UNNEST(events) AS event
28-
CROSS JOIN
29-
-- Iterator for accessing experiments.
30-
-- Add one more for aggregating events across all experiments
31-
UNNEST(GENERATE_ARRAY(0, ARRAY_LENGTH(ping_info.experiments))) AS experiment_index
32-
LEFT JOIN
33-
-- Add * extra to every event to get total event count
34-
UNNEST(event.extra || [STRUCT<key STRING, value STRING>('*', NULL)]) AS event_extra
26+
`{{ project_id }}.{{ dataset['bq_dataset_family'] }}_derived.event_monitoring_live_v1`
3527
WHERE
36-
DATE(submission_timestamp) = @submission_date
37-
{% if dataset['app_name'] == "firefox_desktop" and events_table == "events_v1" %}
38-
-- See https://mozilla-hub.atlassian.net/browse/DENG-9732
39-
AND (
40-
event.category = "uptake.remotecontent.result"
41-
AND event.name IN ("uptake_remotesettings", "uptake_normandy")
42-
AND mozfun.norm.extract_version(client_info.app_display_version, 'major') >= 143
43-
AND sample_id != 0
44-
) IS NOT TRUE
45-
{% endif %}
28+
DATE(submission_date) = @submission_date
29+
)
30+
{% else %}
31+
{% for events_table in event_tables_per_dataset[dataset['bq_dataset_family']] -%}
32+
base_{{ dataset['bq_dataset_family'] }}_{{ events_table }} AS (
33+
SELECT
34+
DATE(@submission_date) AS submission_date,
35+
TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS window_start,
36+
TIMESTAMP_ADD(TIMESTAMP_TRUNC(submission_timestamp, HOUR), INTERVAL 1 HOUR) AS window_end,
37+
event.category AS event_category,
38+
event.name AS event_name,
39+
event_extra.key AS event_extra_key,
40+
normalized_country_code AS country,
41+
client_info.app_channel AS channel,
42+
client_info.app_display_version AS version,
43+
-- experiments[ARRAY_LENGTH(experiments)] will be set to '*'
44+
COALESCE(ping_info.experiments[SAFE_OFFSET(experiment_index)].key, '*') AS experiment,
45+
COALESCE(ping_info.experiments[SAFE_OFFSET(experiment_index)].value.branch, '*') AS experiment_branch,
46+
COUNT(*) AS total_events,
47+
FROM
48+
`{{ project_id }}.{{ dataset['bq_dataset_family'] }}_stable.{{ events_table }}`
49+
CROSS JOIN
50+
UNNEST(events) AS event
51+
CROSS JOIN
52+
-- Iterator for accessing experiments.
53+
-- Add one more for aggregating events across all experiments
54+
UNNEST(GENERATE_ARRAY(0, ARRAY_LENGTH(ping_info.experiments))) AS experiment_index
55+
LEFT JOIN
56+
-- Add * extra to every event to get total event count
57+
UNNEST(event.extra || [STRUCT<key STRING, value STRING>('*', NULL)]) AS event_extra
58+
WHERE
59+
DATE(submission_timestamp) = @submission_date
60+
{% if dataset['app_name'] == "firefox_desktop" and events_table == "events_v1" %}
61+
-- See https://mozilla-hub.atlassian.net/browse/DENG-9732
62+
AND (
63+
event.category = "uptake.remotecontent.result"
64+
AND event.name IN ("uptake_remotesettings", "uptake_normandy")
65+
AND mozfun.norm.extract_version(client_info.app_display_version, 'major') >= 143
66+
AND sample_id != 0
67+
) IS NOT TRUE
68+
{% endif %}
69+
GROUP BY
70+
submission_date,
71+
window_start,
72+
window_end,
73+
event_category,
74+
event_name,
75+
event_extra_key,
76+
country,
77+
channel,
78+
version,
79+
experiment,
80+
experiment_branch
81+
),
82+
{% endfor %}
83+
{{ dataset['bq_dataset_family'] }}_aggregated AS (
84+
SELECT
85+
submission_date,
86+
window_start,
87+
window_end,
88+
event_category,
89+
event_name,
90+
event_extra_key,
91+
country,
92+
"{{ dataset['canonical_app_name'] }}" AS normalized_app_name,
93+
channel,
94+
version,
95+
experiment,
96+
experiment_branch,
97+
SUM(total_events) AS total_events,
98+
FROM
99+
(
100+
{% for events_table in event_tables_per_dataset[dataset['bq_dataset_family']] -%}
101+
SELECT
102+
*
103+
FROM
104+
base_{{ dataset['bq_dataset_family'] }}_{{ events_table }}
105+
{{ "UNION ALL" if not loop.last }}
106+
{% endfor -%}
107+
)
46108
GROUP BY
47109
submission_date,
48110
window_start,
@@ -51,51 +113,13 @@ WITH
51113
event_name,
52114
event_extra_key,
53115
country,
116+
normalized_app_name,
54117
channel,
55118
version,
56119
experiment,
57120
experiment_branch
58-
),
59-
{% endfor %}
60-
{{ dataset['bq_dataset_family'] }}_aggregated AS (
61-
SELECT
62-
submission_date,
63-
window_start,
64-
window_end,
65-
event_category,
66-
event_name,
67-
event_extra_key,
68-
country,
69-
"{{ dataset['canonical_app_name'] }}" AS normalized_app_name,
70-
channel,
71-
version,
72-
experiment,
73-
experiment_branch,
74-
SUM(total_events) AS total_events,
75-
FROM
76-
(
77-
{% for events_table in event_tables_per_dataset[dataset['bq_dataset_family']] -%}
78-
SELECT
79-
*
80-
FROM
81-
base_{{ dataset['bq_dataset_family'] }}_{{ events_table }}
82-
{{ "UNION ALL" if not loop.last }}
83-
{% endfor -%}
84-
)
85-
GROUP BY
86-
submission_date,
87-
window_start,
88-
window_end,
89-
event_category,
90-
event_name,
91-
event_extra_key,
92-
country,
93-
normalized_app_name,
94-
channel,
95-
version,
96-
experiment,
97-
experiment_branch
98-
)
121+
)
122+
{% endif %}
99123
{% if not outer_loop.last -%}
100124
,
101125
{% endif -%}

0 commit comments

Comments
 (0)