diff --git a/package.json b/package.json index 1e21c963..a207c5c2 100644 --- a/package.json +++ b/package.json @@ -106,7 +106,8 @@ "src/queries/rum-targets.sql", "src/queries/dash/auth-all-domains.sql", "src/queries/dash/domain-list.sql", - "src/queries/dash/update-domain-info.sql" + "src/queries/dash/update-domain-info.sql", + "src/queries/eds/rum-pageviews-cwv-count-threshold.sql" ] }, "nodemonConfig": { diff --git a/src/queries/eds/rum-pageviews-cwv-count-threshold.sql b/src/queries/eds/rum-pageviews-cwv-count-threshold.sql new file mode 100644 index 00000000..d59b458f --- /dev/null +++ b/src/queries/eds/rum-pageviews-cwv-count-threshold.sql @@ -0,0 +1,101 @@ +--- description: Get pageviews for a given URL or domain having a Core Web Vital events count above a given threshold +--- Access-Control-Allow-Origin: * +--- interval: 30 +--- offset: 0 +--- startdate: 2022-02-01 +--- enddate: 2022-05-28 +--- timezone: UTC +--- device: all +--- url: - +--- cwv_type: lcp +--- cwv_count_threshold: 100 +--- avg_daily_pageviews_factor: 1000 +--- domainkey: secret + +WITH current_data AS ( + SELECT * FROM + helix_rum.EVENTS_V3( + @url, -- domain or URL + CAST(@offset AS INT64), -- not used, offset in days from today + CAST(@interval AS INT64), -- interval in days to consider + @startdate, -- not used, start date + @enddate, -- not used, end date + @timezone, -- timezone + @device, -- device class + @domainkey + ) +), + +current_rum_by_id AS ( + SELECT + id, + ANY_VALUE(host) AS host, + ANY_VALUE(user_agent) AS user_agent, + ANY_VALUE(url) AS url, + MAX(CASE + WHEN @cwv_type = "lcp" THEN lcp + WHEN @cwv_type = "cls" THEN cls + WHEN @cwv_type = "fid" THEN fid + WHEN @cwv_type = "inp" THEN inp + END) AS core_web_vital, + ANY_VALUE(referer) AS referer, + MAX(weight) AS weight + FROM current_data + WHERE + url LIKE CONCAT("https://", @url, "%") + GROUP BY id +), + +current_events_by_url AS ( + SELECT + url, + COUNT(id) AS events + FROM current_rum_by_id + GROUP BY url + ORDER BY events DESC +), + +current_rum_by_url_and_weight AS ( + SELECT + url, + MAX(weight) AS weight, + CAST(APPROX_QUANTILES(core_web_vital, 100)[OFFSET(75)] AS INT64) + AS avg_core_web_vital + FROM current_rum_by_id + GROUP BY url +), + +url_above_cwv_count_threshold AS ( + SELECT + filtered_data.url, + filtered_data.cwv_count, + cru.avg_core_web_vital AS avg_cwv, + (ce.events * cru.weight) AS pageviews + FROM ( + SELECT + cr.url, + @cwv_type AS cwv_type, + COUNT(*) AS cwv_count + FROM current_rum_by_id AS cr + WHERE core_web_vital IS NOT NULL + GROUP BY url + ) AS filtered_data + LEFT JOIN + current_events_by_url AS ce + ON filtered_data.url = ce.url + LEFT JOIN + current_rum_by_url_and_weight AS cru + ON filtered_data.url = cru.url + WHERE + CAST(filtered_data.cwv_count AS INT64) > CAST(@cwv_count_threshold AS INT64) + AND CAST((ce.events * cru.weight) AS INT64) + > CAST(@interval * @avg_daily_pageviews_factor AS INT64) +) + +SELECT + url, + cw_type, + cwv_count, + pageviews, + avg_cwv +FROM url_above_cwv_count_threshold;