Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EDS] pageviews min cwv count threshold #1013

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@
"src/queries/rum-targets.sql",
"src/queries/dash/auth-all-domains.sql",
"src/queries/dash/domain-list.sql",
"src/queries/dash/update-domain-info.sql"
"src/queries/dash/update-domain-info.sql",
"src/queries/eds/rum-pageviews-cwv-count-threshold.sql"
]
},
"nodemonConfig": {
Expand Down
101 changes: 101 additions & 0 deletions src/queries/eds/rum-pageviews-cwv-count-threshold.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
--- description: Get pageviews for a given URL or domain having a Core Web Vital events count above a given threshold
--- Access-Control-Allow-Origin: *
--- interval: 30
--- offset: 0
--- startdate: 2022-02-01
--- enddate: 2022-05-28
--- timezone: UTC
--- device: all
--- url: -
--- cwv_type: lcp
--- cwv_count_threshold: 100
--- avg_daily_pageviews_factor: 1000
--- domainkey: secret

WITH current_data AS (
SELECT * FROM
helix_rum.EVENTS_V3(
@url, -- domain or URL
CAST(@offset AS INT64), -- not used, offset in days from today
CAST(@interval AS INT64), -- interval in days to consider
@startdate, -- not used, start date
@enddate, -- not used, end date
@timezone, -- timezone
@device, -- device class
@domainkey
)
),

current_rum_by_id AS (
SELECT
id,
ANY_VALUE(host) AS host,
ANY_VALUE(user_agent) AS user_agent,
ANY_VALUE(url) AS url,
MAX(CASE
WHEN @cwv_type = "lcp" THEN lcp
WHEN @cwv_type = "cls" THEN cls
WHEN @cwv_type = "fid" THEN fid
WHEN @cwv_type = "inp" THEN inp
END) AS core_web_vital,
ANY_VALUE(referer) AS referer,
MAX(weight) AS weight
FROM current_data
WHERE
url LIKE CONCAT("https://", @url, "%")
GROUP BY id
),

current_events_by_url AS (
SELECT
url,
COUNT(id) AS events
FROM current_rum_by_id
GROUP BY url
ORDER BY events DESC
),

current_rum_by_url_and_weight AS (
SELECT
url,
MAX(weight) AS weight,
CAST(APPROX_QUANTILES(core_web_vital, 100)[OFFSET(75)] AS INT64)
AS avg_core_web_vital
FROM current_rum_by_id
GROUP BY url
),

url_above_cwv_count_threshold AS (
SELECT
filtered_data.url,
filtered_data.cwv_count,
cru.avg_core_web_vital AS avg_cwv,
(ce.events * cru.weight) AS pageviews
FROM (
SELECT
cr.url,
@cwv_type AS cwv_type,
COUNT(*) AS cwv_count
FROM current_rum_by_id AS cr
WHERE core_web_vital IS NOT NULL
GROUP BY url
) AS filtered_data
LEFT JOIN
current_events_by_url AS ce
ON filtered_data.url = ce.url
LEFT JOIN
current_rum_by_url_and_weight AS cru
ON filtered_data.url = cru.url
WHERE
CAST(filtered_data.cwv_count AS INT64) > CAST(@cwv_count_threshold AS INT64)
AND CAST((ce.events * cru.weight) AS INT64)
> CAST(@interval * @avg_daily_pageviews_factor AS INT64)
)

SELECT
url,
cw_type,
cwv_count,
pageviews,
avg_cwv
FROM url_above_cwv_count_threshold;
Loading