From b04f75f714bf185e6c85f7dcbc0fb83c6b271890 Mon Sep 17 00:00:00 2001 From: Marquise Rosier Date: Wed, 3 Jan 2024 08:44:35 -0800 Subject: [PATCH 01/13] migrate to v3 --- src/queries/dash/checkpoint-by-url.sql | 51 ++++++++++++++++++ src/queries/dash/conversions.sql | 35 ++++++++++++ src/queries/dash/daily-rum.sql | 52 ++++++++++++++++++ src/queries/dash/enters.sql | 39 ++++++++++++++ src/queries/dash/pageviews.sql | 16 ++++++ src/queries/dash/rfqs.sql | 35 ++++++++++++ src/queries/dash/rum-sources-aggregated.sql | 60 +++++++++++++++++++++ src/queries/dash/searches.sql | 35 ++++++++++++ 8 files changed, 323 insertions(+) create mode 100644 src/queries/dash/checkpoint-by-url.sql create mode 100644 src/queries/dash/conversions.sql create mode 100644 src/queries/dash/daily-rum.sql create mode 100644 src/queries/dash/enters.sql create mode 100644 src/queries/dash/pageviews.sql create mode 100644 src/queries/dash/rfqs.sql create mode 100644 src/queries/dash/rum-sources-aggregated.sql create mode 100644 src/queries/dash/searches.sql diff --git a/src/queries/dash/checkpoint-by-url.sql b/src/queries/dash/checkpoint-by-url.sql new file mode 100644 index 00000000..f1a2b8d5 --- /dev/null +++ b/src/queries/dash/checkpoint-by-url.sql @@ -0,0 +1,51 @@ +--- description: Get URL Specific Daily Conversion Data From RUM for a given domain +--- Authorization: none +--- Access-Control-Allow-Origin: * +--- limit: 30 +--- interval: 30 +--- offset: 0 +--- startdate: 2023-02-01 +--- enddate: 2023-05-28 +--- timezone: UTC +--- exactmatch: true +--- url: - +--- device: all +--- domainkey: secret +--- ckpt: search +with sidekick_events AS ( +SELECT + FORMAT_DATE("%Y-%m-%d", DATE_TRUNC(time, DAY)) AS day, + id, + checkpoint, + hostname, + url, + pageviews, + source, + target + FROM helix_rum.CHECKPOINTS_V4( @url, @offset, @interval, '-', '-', 'UTC', 'all', @domainkey ) +WHERE + checkpoint LIKE @ckpt + +) +SELECT url, + checkpoint, + target, + sum(pageviews) AS invocations, +FROM sidekick_events +WHERE +( + ( + true = true + AND ( + url = concat('https://', REGEXP_REPLACE(@url, 'https://', '')) + or + url = concat('https://www.', REGEXP_REPLACE(@url, 'https://', '')) + or + url = concat('https://www.', REGEXP_REPLACE(@url, 'www.', '')) + or + url = concat('https://', REGEXP_REPLACE(@url, 'https://www.', '')) + ) + ) OR true = false ) +GROUP BY checkpoint, + url, target +ORDER BY invocations desc; \ No newline at end of file diff --git a/src/queries/dash/conversions.sql b/src/queries/dash/conversions.sql new file mode 100644 index 00000000..b4814d15 --- /dev/null +++ b/src/queries/dash/conversions.sql @@ -0,0 +1,35 @@ +--- description: Get URL Request for Quotes Data From RUM for a given domain +--- Authorization: none +--- Access-Control-Allow-Origin: * +--- limit: 30 +--- interval: 30 +--- offset: 0 +--- timezone: UTC +--- exactmatch: true +--- url: - +--- device: all +--- domainkey: secret +with conversions AS ( +SELECT +* + FROM helix_rum.CHECKPOINTS_V4( @url, @offset, @interval, '-', '-', 'UTC', 'all', @domainkey ) +WHERE + checkpoint LIKE "%convert%" AND + ( + ( + @exactmatch = true + AND ( + url = concat('https://', REGEXP_REPLACE(@url, 'https://', '')) + or + url = concat('https://www.', REGEXP_REPLACE(@url, 'https://', '')) + or + url = concat('https://www.', REGEXP_REPLACE(@url, 'www.', '')) + or + url = concat('https://', REGEXP_REPLACE(@url, 'https://www.', '')) + ) + ) OR @exactmatch = false ) +), +unique_targets as ( + select (case when not @exactmatch then hostname end) as hostname,(case when @exactmatch then url end) as url, lower(target) as target, sum(pageviews) traffic from conversions group by (case when not @exactmatch then hostname end), lower(target), (case when @exactmatch then url end) +) +select hostname, url, target, sum(traffic) as traffic from unique_targets group by hostname, url, target order by traffic desc \ No newline at end of file diff --git a/src/queries/dash/daily-rum.sql b/src/queries/dash/daily-rum.sql new file mode 100644 index 00000000..7c5ce84d --- /dev/null +++ b/src/queries/dash/daily-rum.sql @@ -0,0 +1,52 @@ +--- description: Get Daily Helix RUM data for a given domain +--- Authorization: none +--- Access-Control-Allow-Origin: * +--- limit: 30 +--- interval: 30 +--- offset: 0 +--- startdate: 2023-02-01 +--- enddate: 2023-05-28 +--- timezone: UTC +--- timeunit: day +--- exactmatch: false +--- url: - +--- device: all +--- domainkey: secret +WITH +daily_rum AS ( + SELECT + REGEXP_REPLACE(url, '\\?.+', '') as url, + avg(lcp) avglcp, + avg(fid) avgfid, + avg(inp) as avginp, + avg(cls) avgcls, + IF(@timeunit = 'day', FORMAT_TIMESTAMP("%Y-%m-%d", time), + IF(@timeunit = 'hour', FORMAT_TIMESTAMP("%Y-%m-%d-%T", time), + FORMAT_TIMESTAMP("%Y-%m-%d", time))) AS date + FROM + helix_rum.EVENTS_V3( + REGEXP_REPLACE(@url, 'https://', ''), # domain or URL + CAST(@offset AS INT64), # not used, offset in days from today + CAST(@interval AS INT64), # interval in days to consider + @startdate, # not used, start date + @enddate, # not used, end date + @timezone, # timezone + @device, # device class + @domainkey + ) + group by url, date + order by date asc +) +select * from daily_rum +where avglcp is not null + and avgfid is not null + and avgcls is not null + and avginp is not null + and ( + @exactmatch = true and url = concat('https://', REGEXP_REPLACE(@url, 'https://', '')) + or + @exactmatch = false + ) + and not starts_with(url, 'http://localhost') + and not starts_with(url, 'https://localhost') + order by url, date diff --git a/src/queries/dash/enters.sql b/src/queries/dash/enters.sql new file mode 100644 index 00000000..5541c9cf --- /dev/null +++ b/src/queries/dash/enters.sql @@ -0,0 +1,39 @@ +--- description: Get URL Specific Referrals Data From RUM for a given domain +--- Authorization: none +--- Access-Control-Allow-Origin: * +--- limit: 30 +--- interval: 30 +--- offset: 0 +--- timezone: UTC +--- exactmatch: true +--- url: - +--- device: all +--- domainkey: secret +with enters AS ( +SELECT +* + FROM helix_rum.CHECKPOINTS_V4( @url, @offset, @interval, '-', '-', 'UTC', 'all', @domainkey ) +WHERE + checkpoint LIKE "enter" AND + ( + ( + @exactmatch = true + AND ( + url = concat('https://', REGEXP_REPLACE(@url, 'https://', '')) + or + url = concat('https://www.', REGEXP_REPLACE(@url, 'https://', '')) + or + url = concat('https://www.', REGEXP_REPLACE(@url, 'www.', '')) + or + url = concat('https://', REGEXP_REPLACE(@url, 'https://www.', '')) + ) + ) OR @exactmatch = false ) +), + +unique_sources as ( + select (case when not @exactmatch then hostname end) as hostname,(case when @exactmatch then url end) as url, split(net.reg_domain(source), '.')[offset(0)] as source, sum(pageviews) traffic from enters group by (case when not @exactmatch then hostname end), split(net.reg_domain(source), '.')[offset(0)], (case when @exactmatch then url end) +), +total_traffic as ( + select sum(traffic) as total from unique_sources +) +select *, (traffic/total)*100 as percentage from unique_sources join total_traffic on true where traffic >= (@threshold * total_traffic.total) order by traffic desc \ No newline at end of file diff --git a/src/queries/dash/pageviews.sql b/src/queries/dash/pageviews.sql new file mode 100644 index 00000000..2c3434f8 --- /dev/null +++ b/src/queries/dash/pageviews.sql @@ -0,0 +1,16 @@ +--- description: Get Helix RUM data for a given domain or owner/repo combination +--- Authorization: none +--- Access-Control-Allow-Origin: * +--- limit: 10 +--- interval: 30 +--- offset: 0 +--- startdate: 2022-02-01 +--- enddate: 2022-05-28 +--- timezone: UTC +--- url: - +--- device: all +--- domainkey: secret +with pageviews_by_id as ( + SELECT hostname, id, max(weight) as pageviews FROM `helix-225321.helix_rum.EVENTS_V4`(net.host(@url), @offset, @interval, '-', '-', 'UTC', 'all', @domainkey) group by id, hostname +) +select hostname, sum(pageviews) as pageviews from pageviews_by_id group by hostname order by pageviews desc \ No newline at end of file diff --git a/src/queries/dash/rfqs.sql b/src/queries/dash/rfqs.sql new file mode 100644 index 00000000..1958a1be --- /dev/null +++ b/src/queries/dash/rfqs.sql @@ -0,0 +1,35 @@ +--- description: Get URL Request for Quotes Data From RUM for a given domain +--- Authorization: none +--- Access-Control-Allow-Origin: * +--- limit: 30 +--- interval: 30 +--- offset: 0 +--- timezone: UTC +--- exactmatch: true +--- url: - +--- device: all +--- domainkey: secret +with rfqs AS ( +SELECT +* + FROM helix_rum.CHECKPOINTS_V4( @url, @offset, @interval, '-', '-', 'UTC', 'all', @domainkey ) +WHERE + checkpoint LIKE "%rfq%" AND + ( + ( + @exactmatch = true + AND ( + url = concat('https://', REGEXP_REPLACE(@url, 'https://', '')) + or + url = concat('https://www.', REGEXP_REPLACE(@url, 'https://', '')) + or + url = concat('https://www.', REGEXP_REPLACE(@url, 'www.', '')) + or + url = concat('https://', REGEXP_REPLACE(@url, 'https://www.', '')) + ) + ) OR @exactmatch = false ) +), +unique_targets as ( + select (case when not @exactmatch then hostname end) as hostname,(case when @exactmatch then url end) as url, lower(target) as target, sum(pageviews) traffic from rfqs group by (case when not @exactmatch then hostname end), lower(target), (case when @exactmatch then url end) +) +select hostname, url, target, sum(traffic) as traffic from unique_targets group by hostname, url, target order by traffic desc \ No newline at end of file diff --git a/src/queries/dash/rum-sources-aggregated.sql b/src/queries/dash/rum-sources-aggregated.sql new file mode 100644 index 00000000..019758e3 --- /dev/null +++ b/src/queries/dash/rum-sources-aggregated.sql @@ -0,0 +1,60 @@ +--- description: Get popularity data for RUM source attribute values, filtered by checkpoint +--- Authorization: none +--- Access-Control-Allow-Origin: * +--- limit: 30 +--- interval: 30 +--- offset: 0 +--- startdate: 2022-01-01 +--- enddate: 2022-01-31 +--- timezone: UTC +--- url: - +--- checkpoint: - +--- source: - +--- domainkey: secret + +WITH +current_data AS ( + SELECT * + FROM + helix_rum.CHECKPOINTS_V3( + @url, + CAST(@offset AS INT64), + CAST(@interval AS INT64), + @startdate, + @enddate, + @timezone, + 'all', + @domainkey + ) +), + +sources AS ( + SELECT + id, + source, + checkpoint, + MAX(url) AS url, + MAX(pageviews) AS views, + SUM(pageviews) AS actions + FROM current_data + WHERE + source IS NOT NULL AND ( + CAST( + @checkpoint AS STRING + ) = '-' OR CAST(@checkpoint AS STRING) = checkpoint + ) AND (source = @source OR @source = '-') + GROUP BY source, id, checkpoint +) + +SELECT + source, + COUNT(id) AS ids, + COUNT(DISTINCT url) AS pages, + APPROX_TOP_COUNT(url, 1)[OFFSET(0)].value AS topurl, + SUM(views) AS views, + SUM(actions) AS actions, + SUM(actions) / SUM(views) AS actions_per_view +FROM sources +GROUP BY source +ORDER BY views DESC +LIMIT @limit diff --git a/src/queries/dash/searches.sql b/src/queries/dash/searches.sql new file mode 100644 index 00000000..c354f166 --- /dev/null +++ b/src/queries/dash/searches.sql @@ -0,0 +1,35 @@ +--- description: Get URL Specific Searches Data From RUM for a given domain +--- Authorization: none +--- Access-Control-Allow-Origin: * +--- limit: 30 +--- interval: 30 +--- offset: 0 +--- timezone: UTC +--- exactmatch: true +--- url: - +--- device: all +--- domainkey: secret +with searches AS ( +SELECT +* + FROM helix_rum.CHECKPOINTS_V4( @url, @offset, @interval, '-', '-', 'UTC', 'all', @domainkey ) +WHERE + checkpoint LIKE "%search%" AND + ( + ( + @exactmatch = true + AND ( + url = concat('https://', REGEXP_REPLACE(@url, 'https://', '')) + or + url = concat('https://www.', REGEXP_REPLACE(@url, 'https://', '')) + or + url = concat('https://www.', REGEXP_REPLACE(@url, 'www.', '')) + or + url = concat('https://', REGEXP_REPLACE(@url, 'https://www.', '')) + ) + ) OR @exactmatch = false ) +), +unique_targets as ( + select (case when not @exactmatch then hostname end) as hostname,(case when @exactmatch then url end) as url, lower(target) as target, sum(pageviews) traffic from searches group by (case when not @exactmatch then hostname end), lower(target), (case when @exactmatch then url end) +) +select hostname, url, target, sum(traffic) as traffic from unique_targets group by hostname, url, target order by traffic desc \ No newline at end of file From 233ff33cd1ac7f4c1770bd036ddac2fccf316810 Mon Sep 17 00:00:00 2001 From: Marquise Rosier Date: Wed, 3 Jan 2024 08:48:09 -0800 Subject: [PATCH 02/13] add to package.json --- package.json | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index f8d943b3..685c583f 100644 --- a/package.json +++ b/package.json @@ -106,7 +106,15 @@ "src/queries/rum-targets.sql", "src/queries/dash/auth-all-domains.sql", "src/queries/dash/domain-list.sql", - "src/queries/dash/update-domain-info.sql" + "src/queries/dash/update-domain-info.sql", + "src/queries/dash/checkpoint-by-url.sql", + "src/queries/dash/conversions.sql", + "src/queries/dash/daily-rum.sql", + "src/queries/dash/enters.sql", + "src/queries/dash/pageviews.sql", + "src/queries/dash/rfqs.sql", + "src/queries/dash/rum-sources-aggregated.sql", + "src/queries/dash/searches.sql" ] }, "nodemonConfig": { From 9ff3592542e57252d4fcf47fa0fac2592c5c2e14 Mon Sep 17 00:00:00 2001 From: Marquise Rosier Date: Wed, 3 Jan 2024 09:09:18 -0800 Subject: [PATCH 03/13] add github-commits --- package.json | 4 ++- src/queries/dash/github-commits.sql | 40 +++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 src/queries/dash/github-commits.sql diff --git a/package.json b/package.json index 685c583f..c52f42f1 100644 --- a/package.json +++ b/package.json @@ -114,7 +114,9 @@ "src/queries/dash/pageviews.sql", "src/queries/dash/rfqs.sql", "src/queries/dash/rum-sources-aggregated.sql", - "src/queries/dash/searches.sql" + "src/queries/dash/searches.sql", + "src/queries/dash/github-commits.sql" + ] }, "nodemonConfig": { diff --git a/src/queries/dash/github-commits.sql b/src/queries/dash/github-commits.sql new file mode 100644 index 00000000..b16e37cd --- /dev/null +++ b/src/queries/dash/github-commits.sql @@ -0,0 +1,40 @@ +--- description: Get Daily Commits For a Site or Repo +--- Authorization: none +--- Access-Control-Allow-Origin: * +--- limit: 30 +--- interval: 30 +--- offset: 0 +--- startdate: 2023-02-01 +--- enddate: 2023-05-28 +--- timezone: UTC +--- timeunit: day +--- exactmatch: false +--- url: - +--- device: all +--- domainkey: secret +with current_data as ( +SELECT + * +FROM + `helix-225321.helix_external_data.DAILY_COMMITS`( + @url, + @offset, + @interval, + @startdate, + @enddate, + @domainkey + ) +) +select * from current_data where +not user = 'GitHub' +and not user = 'GitHub Action' +and not user = 'GitHub Enterprise' +and not user = 'CircleCi Build' +and not user = 'Helix Bot' +and not user = 'adobe-alloy-bot' +and not user = 'github-actions' +and not user = 'github-actions[bot]' +and not user = 'helix-bot[bot]' +and not user = 'renovate[bot]' +and not user = 'semantic-release-bot' +order by owner_repo, commit_date asc \ No newline at end of file From 0efa566ea709d9e26dd734b2fb9b489b99ee87d1 Mon Sep 17 00:00:00 2001 From: Marquise Rosier Date: Wed, 3 Jan 2024 20:52:40 -0800 Subject: [PATCH 04/13] rum-dashboard supports exact match --- src/queries/dash/rum-dashboard-exact.sql | 559 +++++++++++++++++++++++ 1 file changed, 559 insertions(+) create mode 100644 src/queries/dash/rum-dashboard-exact.sql diff --git a/src/queries/dash/rum-dashboard-exact.sql b/src/queries/dash/rum-dashboard-exact.sql new file mode 100644 index 00000000..ca9e5763 --- /dev/null +++ b/src/queries/dash/rum-dashboard-exact.sql @@ -0,0 +1,559 @@ +--- description: Get Helix RUM data for a given domain or owner/repo combination +--- Authorization: none +--- Access-Control-Allow-Origin: * +--- limit: 10 +--- interval: 30 +--- offset: 0 +--- startdate: 2022-02-01 +--- enddate: 2022-05-28 +--- timezone: UTC +--- url: - +--- owner: - +--- repo: - +--- device: all +--- rising: false +--- domainkey: secret +--- exactmatch: false + +CREATE TEMP FUNCTION FILTERCLASS(user_agent STRING, device STRING) +RETURNS BOOLEAN +AS ( + device = "all" + OR ( + device = "desktop" + AND user_agent NOT LIKE "%Mobile%" + AND user_agent LIKE "Mozilla%" + ) + OR (device = "mobile" AND user_agent LIKE "%Mobile%") + OR (device = "bot" AND user_agent NOT LIKE "Mozilla%") +); + + +WITH +current_data AS ( + SELECT * FROM + helix_rum.EVENTS_V4( + @url, # domain or URL + CAST(@offset AS INT64), # not used, offset in days from today + CAST(@interval AS INT64), # interval in days to consider + @startdate, # not used, start date + @enddate, # not used, end date + @timezone, # timezone + @device, # device class + @domainkey + ) +), + +previous_data AS ( + SELECT * FROM + helix_rum.EVENTS_V4( + @url, # domain or URL + # offset in days from today + CAST(@interval AS INT64) + CAST(@offset AS INT64), + CAST(@interval AS INT64), # interval in days to consider + FORMAT_DATE("%F", DATE_SUB(@startdate, INTERVAL ABS(DATE_DIFF(DATE(@enddate, @timezone), DATE(@startdate, @timezone), DAY)) DAY)), # not used, start date + @startdate, # not used, end date + @timezone, # timezone + @device, # device class + @domainkey + ) +), + +current_rum_by_id AS ( + SELECT + id, + IF(MAX(lcp) IS NULL, NULL, IF(MAX(lcp) <= 2500, TRUE, FALSE)) AS lcpgood, + IF(MAX(fid) IS NULL, NULL, IF(MAX(fid) <= 100, TRUE, FALSE)) AS fidgood, + IF(MAX(inp) IS NULL, NULL, IF(MAX(inp) <= 200, TRUE, FALSE)) AS inpgood, + IF(MAX(cls) IS NULL, NULL, IF(MAX(cls) <= 0.1, TRUE, FALSE)) AS clsgood, + IF(MAX(lcp) IS NULL, NULL, IF(MAX(lcp) >= 4000, TRUE, FALSE)) AS lcpbad, + IF(MAX(fid) IS NULL, NULL, IF(MAX(fid) >= 300, TRUE, FALSE)) AS fidbad, + IF(MAX(inp) IS NULL, NULL, IF(MAX(fid) >= 500, TRUE, FALSE)) AS inpbad, + IF(MAX(cls) IS NULL, NULL, IF(MAX(cls) >= 0.25, TRUE, FALSE)) AS clsbad, + MAX(host) AS host, + MAX(user_agent) AS user_agent, + IF( + @url = "-" AND @repo = "-" AND @owner = "-", + REGEXP_EXTRACT(MAX(url), "https://([^/]+)/", 1), + MAX(url) + ) AS url, + MAX(lcp) AS lcp, + MAX(fid) AS fid, + MAX(inp) AS inp, + MAX(cls) AS cls, + MAX(referer) AS referer, + MAX(weight) AS weight + FROM current_data + WHERE + url LIKE CONCAT( + "https://", @url, "%" + ) OR url LIKE CONCAT( + "https://%", @repo, "--", @owner, ".hlx%/" + ) OR (@url = "-" AND @repo = "-" AND @owner = "-") + OR CONCAT("www.", @url) LIKE CONCAT("%", regexp_replace(url, 'https://', '')) # append www prefix - new in V4 + OR CONCAT("www.", @url) LIKE CONCAT("%", regexp_replace(url, 'https://', ''), "/%") # append www prefix - new in V4 + OR CONCAT("www.", @url) LIKE CONCAT( regexp_replace(url, 'https://', '')) # append www prefix - new in V4 + OR CONCAT("www.", @url) LIKE CONCAT( regexp_replace(url, 'https://', ''), "/%") # append www prefix - new in V4 + OR @url LIKE CONCAT("%.", url) + OR @url LIKE CONCAT("%.", url, "/%") + OR @url LIKE CONCAT(url) + OR @url LIKE CONCAT(url, "/%") + OR STARTS_WITH(regexp_replace(url, 'https://', ''), concat("www.", @url)) + OR STARTS_WITH(regexp_replace(url, 'https://', ''), @url) + OR STARTS_WITH(regexp_replace(url, 'https://', ''), concat("www.", regexp_replace(@url, "https://", ''))) + GROUP BY id +), + +previous_rum_by_id AS ( + SELECT + id, + IF(MAX(lcp) IS NULL, NULL, IF(MAX(lcp) <= 2500, TRUE, FALSE)) AS lcpgood, + IF(MAX(fid) IS NULL, NULL, IF(MAX(fid) <= 100, TRUE, FALSE)) AS fidgood, + IF(MAX(inp) IS NULL, NULL, IF(MAX(inp) <= 200, TRUE, FALSE)) AS inpgood, + IF(MAX(cls) IS NULL, NULL, IF(MAX(cls) <= 0.1, TRUE, FALSE)) AS clsgood, + IF(MAX(lcp) IS NULL, NULL, IF(MAX(lcp) >= 4000, TRUE, FALSE)) AS lcpbad, + IF(MAX(fid) IS NULL, NULL, IF(MAX(fid) >= 300, TRUE, FALSE)) AS fidbad, + IF(MAX(inp) IS NULL, NULL, IF(MAX(inp) >= 500, TRUE, FALSE)) AS inpbad, + IF(MAX(cls) IS NULL, NULL, IF(MAX(cls) >= 0.25, TRUE, FALSE)) AS clsbad, + MAX(host) AS host, + MAX(user_agent) AS user_agent, + IF( + @url = "-" AND @repo = "-" AND @owner = "-", + REGEXP_EXTRACT(MAX(url), "https://([^/]+)/", 1), + MAX(url) + ) AS url, + MAX(lcp) AS lcp, + MAX(fid) AS fid, + MAX(inp) AS inp, + MAX(cls) AS cls, + MAX(referer) AS referer, + MAX(weight) AS weight + FROM previous_data + WHERE + url LIKE CONCAT( + "https://", @url, "%" + ) OR url LIKE CONCAT( + "https://%", @repo, "--", @owner, ".hlx%/" + ) OR (@url = "-" AND @repo = "-" AND @owner = "-") + OR CONCAT("www.", @url) LIKE CONCAT("%", regexp_replace(url, 'https://', '')) # append www prefix - new in V4 + OR CONCAT("www.", @url) LIKE CONCAT("%", regexp_replace(url, 'https://', ''), "/%") # append www prefix - new in V4 + OR CONCAT("www.", @url) LIKE CONCAT( regexp_replace(url, 'https://', '')) # append www prefix - new in V4 + OR CONCAT("www.", @url) LIKE CONCAT( regexp_replace(url, 'https://', ''), "/%") # append www prefix - new in V4 + OR @url LIKE CONCAT("%.", url) + OR @url LIKE CONCAT("%.", url, "/%") + OR @url LIKE CONCAT(url) + OR @url LIKE CONCAT(url, "/%") + OR STARTS_WITH(regexp_replace(url, 'https://', ''), concat("www.", @url)) + OR STARTS_WITH(regexp_replace(url, 'https://', ''), @url) + OR STARTS_WITH(regexp_replace(url, 'https://', ''), concat("www.", regexp_replace(@url, "https://", ''))) + GROUP BY id +), + +current_rum_by_url_and_weight AS ( + SELECT + weight, + url, + CAST( + 100 * IF( + COUNTIF(lcpgood IS NOT NULL) != 0, + COUNTIF(lcpgood = TRUE) / COUNTIF(lcpgood IS NOT NULL), + NULL + ) AS INT64 + ) AS lcpgood, + CAST( + 100 * IF( + COUNTIF(fidgood IS NOT NULL) != 0, + COUNTIF(fidgood = TRUE) / COUNTIF(fidgood IS NOT NULL), + NULL + ) AS INT64 + ) AS fidgood, + CAST( + 100 * IF( + COUNTIF(inpgood IS NOT NULL) != 0, + COUNTIF(inpgood = TRUE) / COUNTIF(inpgood IS NOT NULL), + NULL + ) AS INT64 + ) AS inpgood, + CAST( + 100 * IF( + COUNTIF(clsgood IS NOT NULL) != 0, + COUNTIF(clsgood = TRUE) / COUNTIF(clsgood IS NOT NULL), + NULL + ) AS INT64 + ) AS clsgood, + CAST( + 100 * IF( + COUNTIF(lcpbad IS NOT NULL) != 0, + COUNTIF(lcpbad = TRUE) / COUNTIF(lcpbad IS NOT NULL), + NULL + ) AS INT64 + ) AS lcpbad, + CAST( + 100 * IF( + COUNTIF(fidbad IS NOT NULL) != 0, + COUNTIF(fidbad = TRUE) / COUNTIF(fidbad IS NOT NULL), + NULL + ) AS INT64 + ) AS fidbad, + CAST( + 100 * IF( + COUNTIF(inpbad IS NOT NULL) != 0, + COUNTIF(inpbad = TRUE) / COUNTIF(inpbad IS NOT NULL), + NULL + ) AS INT64 + ) AS inpbad, + CAST( + 100 * IF( + COUNTIF(clsbad IS NOT NULL) != 0, + COUNTIF(clsbad = TRUE) / COUNTIF(clsbad IS NOT NULL), + NULL + ) AS INT64 + ) AS clsbad, + CAST(APPROX_QUANTILES(lcp, 100)[OFFSET(75)] AS INT64) AS avglcp, + CAST(APPROX_QUANTILES(fid, 100)[OFFSET(75)] AS INT64) AS avgfid, + CAST(APPROX_QUANTILES(inp, 100)[OFFSET(75)] AS INT64) AS avginp, + ROUND(APPROX_QUANTILES(cls, 100)[OFFSET(75)], 3) AS avgcls, + COUNT(id) AS events + FROM current_rum_by_id + GROUP BY url, weight + ORDER BY events DESC +), + +previous_rum_by_url_and_weight AS ( + SELECT + weight, + url, + CAST( + 100 * IF( + COUNTIF(lcpgood IS NOT NULL) != 0, + COUNTIF(lcpgood = TRUE) / COUNTIF(lcpgood IS NOT NULL), + NULL + ) AS INT64 + ) AS lcpgood, + CAST( + 100 * IF( + COUNTIF(fidgood IS NOT NULL) != 0, + COUNTIF(fidgood = TRUE) / COUNTIF(fidgood IS NOT NULL), + NULL + ) AS INT64 + ) AS fidgood, + CAST( + 100 * IF( + COUNTIF(inpgood IS NOT NULL) != 0, + COUNTIF(inpgood = TRUE) / COUNTIF(inpgood IS NOT NULL), + NULL + ) AS INT64 + ) AS inpgood, + CAST( + 100 * IF( + COUNTIF(clsgood IS NOT NULL) != 0, + COUNTIF(clsgood = TRUE) / COUNTIF(clsgood IS NOT NULL), + NULL + ) AS INT64 + ) AS clsgood, + CAST( + 100 * IF( + COUNTIF(lcpbad IS NOT NULL) != 0, + COUNTIF(lcpbad = TRUE) / COUNTIF(lcpbad IS NOT NULL), + NULL + ) AS INT64 + ) AS lcpbad, + CAST( + 100 * IF( + COUNTIF(fidbad IS NOT NULL) != 0, + COUNTIF(fidbad = TRUE) / COUNTIF(fidbad IS NOT NULL), + NULL + ) AS INT64 + ) AS fidbad, + CAST( + 100 * IF( + COUNTIF(inpbad IS NOT NULL) != 0, + COUNTIF(inpbad = TRUE) / COUNTIF(inpbad IS NOT NULL), + NULL + ) AS INT64 + ) AS inpbad, + CAST( + 100 * IF( + COUNTIF(clsbad IS NOT NULL) != 0, + COUNTIF(clsbad = TRUE) / COUNTIF(clsbad IS NOT NULL), + NULL + ) AS INT64 + ) AS clsbad, + CAST(APPROX_QUANTILES(lcp, 100)[OFFSET(75)] AS INT64) AS avglcp, + CAST(APPROX_QUANTILES(fid, 100)[OFFSET(75)] AS INT64) AS avgfid, + CAST(APPROX_QUANTILES(inp, 100)[OFFSET(75)] AS INT64) AS avginp, + ROUND(APPROX_QUANTILES(cls, 100)[OFFSET(75)], 3) AS avgcls, + COUNT(id) AS events + FROM previous_rum_by_id + GROUP BY url, weight + ORDER BY events DESC +), + +current_rum_by_url AS ( + SELECT + url, + SUM(lcpgood * weight) / SUM(weight) AS lcpgood, + SUM(fidgood * weight) / SUM(weight) AS fidgood, + SUM(inpgood * weight) / SUM(weight) AS inpgood, + SUM(clsgood * weight) / SUM(weight) AS clsgood, + SUM(lcpbad * weight) / SUM(weight) AS lcpbad, + SUM(fidbad * weight) / SUM(weight) AS fidbad, + SUM(inpbad * weight) / SUM(weight) AS inpbad, + SUM(clsbad * weight) / SUM(weight) AS clsbad, + SUM(avglcp * weight) / SUM(weight) AS avglcp, + SUM(avgfid * weight) / SUM(weight) AS avgfid, + SUM(avginp * weight) / SUM(weight) AS avginp, + ROUND(SUM(avgcls * weight) / SUM(weight), 3) AS avgcls, + SUM(events * weight) AS pageviews + + FROM current_rum_by_url_and_weight + GROUP BY url + ORDER BY pageviews DESC +), + +previous_rum_by_url AS ( + SELECT + url, + SUM(lcpgood * weight) / SUM(weight) AS lcpgood, + SUM(fidgood * weight) / SUM(weight) AS fidgood, + SUM(inpgood * weight) / SUM(weight) AS inpgood, + SUM(clsgood * weight) / SUM(weight) AS clsgood, + SUM(lcpbad * weight) / SUM(weight) AS lcpbad, + SUM(fidbad * weight) / SUM(weight) AS fidbad, + SUM(inpbad * weight) / SUM(weight) AS inpbad, + SUM(clsbad * weight) / SUM(weight) AS clsbad, + SUM(avglcp * weight) / SUM(weight) AS avglcp, + SUM(avgfid * weight) / SUM(weight) AS avgfid, + SUM(avginp * weight) / SUM(weight) AS avginp, + ROUND(SUM(avgcls * weight) / SUM(weight), 3) AS avgcls, + SUM(events * weight) AS pageviews + + FROM previous_rum_by_url_and_weight + GROUP BY url + ORDER BY pageviews DESC +), + +current_event_count AS ( + SELECT SUM(events) AS allevents FROM ( + SELECT + id, + MAX(weight) AS events + FROM current_data + GROUP BY id + ) +), + +previous_event_count AS ( + SELECT SUM(events) AS allevents FROM ( + SELECT + id, + MAX(weight) AS events + FROM previous_data + GROUP BY id + ) +), + +current_truncated_rum_by_url AS ( + SELECT + CAST(SUM(ranked.lcpgood * pageviews) / SUM(pageviews) AS INT64) AS lcpgood, + CAST(SUM(ranked.fidgood * pageviews) / SUM(pageviews) AS INT64) AS fidgood, + CAST(SUM(ranked.inpgood * pageviews) / SUM(pageviews) AS INT64) AS inpgood, + CAST(SUM(ranked.clsgood * pageviews) / SUM(pageviews) AS INT64) AS clsgood, + CAST(SUM(ranked.lcpbad * pageviews) / SUM(pageviews) AS INT64) AS lcpbad, + CAST(SUM(ranked.fidbad * pageviews) / SUM(pageviews) AS INT64) AS fidbad, + CAST(SUM(ranked.inpbad * pageviews) / SUM(pageviews) AS INT64) AS inpbad, + CAST(SUM(ranked.clsbad * pageviews) / SUM(pageviews) AS INT64) AS clsbad, + CAST(SUM(ranked.avglcp * pageviews) / SUM(pageviews) AS INT64) AS avglcp, + CAST(SUM(ranked.avgfid * pageviews) / SUM(pageviews) AS INT64) AS avgfid, + CAST(SUM(ranked.avginp * pageviews) / SUM(pageviews) AS INT64) AS avginp, + ROUND(SUM(ranked.avgcls * pageviews) / SUM(pageviews), 3) AS avgcls, + SUM(ranked.pageviews) AS pageviews, + 100 * SUM(pageviews) / MAX(current_event_count.allevents) AS rumshare, + IF(ranked.rank > @limit AND NOT @rising, "Other", ranked.url) AS url + FROM + (SELECT + pageviews, + lcpgood, + fidgood, + inpgood, + clsgood, + lcpbad, + fidbad, + inpbad, + clsbad, + avglcp, + avgfid, + avginp, + avgcls, + url, + ROW_NUMBER() OVER (ORDER BY pageviews DESC) AS rank + FROM current_rum_by_url) AS ranked, + current_event_count + GROUP BY url +), + +previous_truncated_rum_by_url AS ( + SELECT + CAST(SUM(ranked.lcpgood * pageviews) / SUM(pageviews) AS INT64) AS lcpgood, + CAST(SUM(ranked.fidgood * pageviews) / SUM(pageviews) AS INT64) AS fidgood, + CAST(SUM(ranked.inpgood * pageviews) / SUM(pageviews) AS INT64) AS inpgood, + CAST(SUM(ranked.clsgood * pageviews) / SUM(pageviews) AS INT64) AS clsgood, + CAST(SUM(ranked.lcpbad * pageviews) / SUM(pageviews) AS INT64) AS lcpbad, + CAST(SUM(ranked.fidbad * pageviews) / SUM(pageviews) AS INT64) AS fidbad, + CAST(SUM(ranked.inpbad * pageviews) / SUM(pageviews) AS INT64) AS inpbad, + CAST(SUM(ranked.clsbad * pageviews) / SUM(pageviews) AS INT64) AS clsbad, + CAST(SUM(ranked.avglcp * pageviews) / SUM(pageviews) AS INT64) AS avglcp, + CAST(SUM(ranked.avgfid * pageviews) / SUM(pageviews) AS INT64) AS avgfid, + CAST(SUM(ranked.avginp * pageviews) / SUM(pageviews) AS INT64) AS avginp, + ROUND(SUM(ranked.avgcls * pageviews) / SUM(pageviews), 3) AS avgcls, + SUM(ranked.pageviews) AS pageviews, + 100 * SUM(pageviews) / MAX(previous_event_count.allevents) AS rumshare, + IF(ranked.rank > @limit AND NOT @rising, "Other", ranked.url) AS url + FROM + (SELECT + *, + ROW_NUMBER() OVER (ORDER BY pageviews DESC) AS rank + FROM previous_rum_by_url) AS ranked, + previous_event_count + GROUP BY url +) + +SELECT + url, + pageviews, + pageviews_1, + pageviews_diff, + lcpgood, + fidgood, + inpgood, + clsgood, + lcpbad, + fidbad, + inpbad, + clsbad, + avglcp, + avgfid, + avginp, + avgcls, + rumshare, + lcpgood_1, + fidgood_1, + inpgood_1, + clsgood_1, + lcpbad_1, + fidbad_1, + inpbad_1, + clsbad_1, + avglcp_1, + avgfid_1, + avginp_1, + avgcls_1, + rumshare_1, + url_1 +FROM ( + SELECT + current_truncated_rum_by_url.pageviews AS pageviews, + previous_truncated_rum_by_url.pageviews AS pageviews_1, + current_truncated_rum_by_url.lcpgood AS lcpgood, + current_truncated_rum_by_url.fidgood AS fidgood, + current_truncated_rum_by_url.inpgood AS inpgood, + current_truncated_rum_by_url.clsgood AS clsgood, + current_truncated_rum_by_url.lcpbad AS lcpbad, + current_truncated_rum_by_url.fidbad AS fidbad, + current_truncated_rum_by_url.inpbad AS inpbad, + current_truncated_rum_by_url.clsbad AS clsbad, + current_truncated_rum_by_url.avglcp AS avglcp, + current_truncated_rum_by_url.avgfid AS avgfid, + current_truncated_rum_by_url.avginp AS avginp, + current_truncated_rum_by_url.avgcls AS avgcls, + current_truncated_rum_by_url.rumshare AS rumshare, + previous_truncated_rum_by_url.lcpgood AS lcpgood_1, + previous_truncated_rum_by_url.fidgood AS fidgood_1, + previous_truncated_rum_by_url.inpgood AS inpgood_1, + previous_truncated_rum_by_url.clsgood AS clsgood_1, + previous_truncated_rum_by_url.lcpbad AS lcpbad_1, + previous_truncated_rum_by_url.fidbad AS fidbad_1, + previous_truncated_rum_by_url.inpbad AS inpbad_1, + previous_truncated_rum_by_url.clsbad AS clsbad_1, + previous_truncated_rum_by_url.avglcp AS avglcp_1, + previous_truncated_rum_by_url.avgfid AS avgfid_1, + previous_truncated_rum_by_url.avginp AS avginp_1, + previous_truncated_rum_by_url.avgcls AS avgcls_1, + previous_truncated_rum_by_url.rumshare AS rumshare_1, + previous_truncated_rum_by_url.url AS url_1, + ROW_NUMBER() OVER ( + ORDER BY + IF( + @rising, + COALESCE( + current_truncated_rum_by_url.pageviews, 0 + ) - COALESCE(previous_truncated_rum_by_url.pageviews, 0), + 0 + ) DESC, + current_truncated_rum_by_url.pageviews DESC + ) AS rank, + COALESCE( + current_truncated_rum_by_url.url, previous_truncated_rum_by_url.url + ) AS url, + COALESCE( + current_truncated_rum_by_url.pageviews, 0 + ) - COALESCE(previous_truncated_rum_by_url.pageviews, 0) AS pageviews_diff + FROM + current_truncated_rum_by_url FULL OUTER JOIN previous_truncated_rum_by_url + ON current_truncated_rum_by_url.url = previous_truncated_rum_by_url.url + ORDER BY + IF(current_truncated_rum_by_url.url = "Other", 1, 0), + IF( + @rising, + COALESCE( + current_truncated_rum_by_url.pageviews, 0 + ) - COALESCE(previous_truncated_rum_by_url.pageviews, 0), + 0 + ) DESC, + current_truncated_rum_by_url.pageviews DESC, + previous_truncated_rum_by_url.pageviews DESC +) WHERE +( + ( + @exactmatch = true + AND ( + url = concat('https://', REGEXP_REPLACE(@url, 'https://', '')) + or + url = concat('https://www.', REGEXP_REPLACE(@url, 'https://', '')) + or + url = concat('https://www.', REGEXP_REPLACE(@url, 'www.', '')) + or + url = concat('https://', REGEXP_REPLACE(@url, 'https://www.', '')) + ) + ) OR @exactmatch = false AND (rank <= @limit OR @rising)) +--- avgcls: 75th percentile value of the Cumulative Layout Shift metric in the current period +--- avgcls_1: 75th percentile value of the CLS metric in the previous period +--- avgfid: 75th percentile value of the First Input Delay metric in milliseconds in the current period +--- avgfid_1: 75th percentile value of FID in the previous period +--- avginp: 75th percentile value of the Interaction to Next Paint metric in milliseconds in the current period +--- avginp_1: 75th percentile of INP in the previous period +--- avglcp: 75th percentile of the Largest Contentful Paint metric in milliseconds in the current period +--- avglcp_1: 75th percentile of LCP in the previous period +--- clsbad: percentage of all page views where Cumulative Layout Shift is in the “needs improvement” range in the current period +--- clsbad_1: percentage of of all page views with bad CLS in the previous period +--- clsgood: percentage of all page views where the CLS metric is in the “good” range in the current period +--- clsgood_1: percentage of pageviews with good CLS the the previous period +--- fidbad: percentage of pageviews with bad FID in the current period +--- fidbad_1: percentage of pageviews with bad FID in the previous period +--- fidgood: percentage of pageviews with good FID in the current period +--- fidgood_1: percentage of pageviews with good FID in the previous period +--- inpbad: percentage of pageviews with bad INP in the current period +--- inpbad_1: percentage of pageviews with bad INP in the previous period +--- inpgood: percentage of pageviews with good INP in the current period +--- inpgood_1: percentage of pageviews with bad INP in the previous period +--- lcpbad: percentage of pageviews with bad LCP in the current period +--- lcpbad_1: percentage of pageviews with bad LCP in the previous period +--- lcpgood: percentage of pageviews with good LCP in the current period +--- lcpgood_1: percentage of pageviews with good LCP in the current period +--- pageviews: estimated number of pageviews in the current period +--- pageviews_1: estimated number of pageviews in the previous period +--- pageviews_diff: difference in pageviews between the current and previous period. If the parameter rising is true, then pages will be ranked according to this value +--- rumshare: percentage of all traffic for the given domain that is going to this url in the current period +--- rumshare_1: percentage of of all traffic in the previous domain that is going to this url in the previous period +--- url: the URL of the page that is getting traffic +--- url_1: the URL of the page that is getting traffic in the previous period (these last two values are always the same) \ No newline at end of file From 8d1d258b817fd99cd6352cbec5904d010807ef9e Mon Sep 17 00:00:00 2001 From: Marquise Rosier Date: Wed, 3 Jan 2024 21:09:11 -0800 Subject: [PATCH 05/13] fix grouping --- src/queries/dash/rum-sources-aggregated.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/queries/dash/rum-sources-aggregated.sql b/src/queries/dash/rum-sources-aggregated.sql index 019758e3..f24b5aa8 100644 --- a/src/queries/dash/rum-sources-aggregated.sql +++ b/src/queries/dash/rum-sources-aggregated.sql @@ -55,6 +55,6 @@ SELECT SUM(actions) AS actions, SUM(actions) / SUM(views) AS actions_per_view FROM sources -GROUP BY source +GROUP BY source, url ORDER BY views DESC LIMIT @limit From 91ec5f751986713cd741e13b5ce82c298d22ff67 Mon Sep 17 00:00:00 2001 From: Marquise Rosier Date: Wed, 3 Jan 2024 21:21:26 -0800 Subject: [PATCH 06/13] coalesce views and topurls --- src/queries/dash/rum-sources-aggregated.sql | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/queries/dash/rum-sources-aggregated.sql b/src/queries/dash/rum-sources-aggregated.sql index f24b5aa8..d2f4229a 100644 --- a/src/queries/dash/rum-sources-aggregated.sql +++ b/src/queries/dash/rum-sources-aggregated.sql @@ -44,8 +44,8 @@ sources AS ( ) = '-' OR CAST(@checkpoint AS STRING) = checkpoint ) AND (source = @source OR @source = '-') GROUP BY source, id, checkpoint -) - +), +with filtered as ( SELECT source, COUNT(id) AS ids, @@ -57,4 +57,13 @@ SELECT FROM sources GROUP BY source, url ORDER BY views DESC -LIMIT @limit +) + +SELECT + source, + topurl, + SUM(views) AS views, +FROM filtered +GROUP BY source, topurl +ORDER BY views DESC +LIMIT @limit \ No newline at end of file From 0bcec7f6c82eae8f43018c126e9428415486c0b8 Mon Sep 17 00:00:00 2001 From: Marquise Rosier Date: Wed, 3 Jan 2024 21:33:01 -0800 Subject: [PATCH 07/13] tiny syntax error --- src/queries/dash/rum-sources-aggregated.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/queries/dash/rum-sources-aggregated.sql b/src/queries/dash/rum-sources-aggregated.sql index d2f4229a..9dea46d0 100644 --- a/src/queries/dash/rum-sources-aggregated.sql +++ b/src/queries/dash/rum-sources-aggregated.sql @@ -45,7 +45,7 @@ sources AS ( ) AND (source = @source OR @source = '-') GROUP BY source, id, checkpoint ), -with filtered as ( +filtered AS ( SELECT source, COUNT(id) AS ids, From fd90f992399a39fbba101974f1e5df533665f92f Mon Sep 17 00:00:00 2001 From: Marquise Rosier Date: Thu, 4 Jan 2024 14:08:28 -0800 Subject: [PATCH 08/13] rum-dashboard-exact-match --- package.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index c52f42f1..1c67b65a 100644 --- a/package.json +++ b/package.json @@ -115,7 +115,8 @@ "src/queries/dash/rfqs.sql", "src/queries/dash/rum-sources-aggregated.sql", "src/queries/dash/searches.sql", - "src/queries/dash/github-commits.sql" + "src/queries/dash/github-commits.sql", + "src/queries/dash/rum-dashboard-exact.sql" ] }, From 7c08b0ef7ec2ccc2a1b1e74562506015e7534c4a Mon Sep 17 00:00:00 2001 From: Marquise Rosier Date: Thu, 4 Jan 2024 18:28:05 -0800 Subject: [PATCH 09/13] update rum-pageviews to use EVENTS_V4 --- src/queries/dash/checkpoint-by-url.sql | 51 --- src/queries/dash/conversions.sql | 35 -- src/queries/dash/daily-rum.sql | 52 --- src/queries/dash/enters.sql | 39 -- src/queries/dash/rfqs.sql | 35 -- src/queries/dash/rum-dashboard-exact.sql | 559 ----------------------- src/queries/dash/searches.sql | 35 -- src/queries/rum-pageviews.sql | 2 +- 8 files changed, 1 insertion(+), 807 deletions(-) delete mode 100644 src/queries/dash/checkpoint-by-url.sql delete mode 100644 src/queries/dash/conversions.sql delete mode 100644 src/queries/dash/daily-rum.sql delete mode 100644 src/queries/dash/enters.sql delete mode 100644 src/queries/dash/rfqs.sql delete mode 100644 src/queries/dash/rum-dashboard-exact.sql delete mode 100644 src/queries/dash/searches.sql diff --git a/src/queries/dash/checkpoint-by-url.sql b/src/queries/dash/checkpoint-by-url.sql deleted file mode 100644 index f1a2b8d5..00000000 --- a/src/queries/dash/checkpoint-by-url.sql +++ /dev/null @@ -1,51 +0,0 @@ ---- description: Get URL Specific Daily Conversion Data From RUM for a given domain ---- Authorization: none ---- Access-Control-Allow-Origin: * ---- limit: 30 ---- interval: 30 ---- offset: 0 ---- startdate: 2023-02-01 ---- enddate: 2023-05-28 ---- timezone: UTC ---- exactmatch: true ---- url: - ---- device: all ---- domainkey: secret ---- ckpt: search -with sidekick_events AS ( -SELECT - FORMAT_DATE("%Y-%m-%d", DATE_TRUNC(time, DAY)) AS day, - id, - checkpoint, - hostname, - url, - pageviews, - source, - target - FROM helix_rum.CHECKPOINTS_V4( @url, @offset, @interval, '-', '-', 'UTC', 'all', @domainkey ) -WHERE - checkpoint LIKE @ckpt - -) -SELECT url, - checkpoint, - target, - sum(pageviews) AS invocations, -FROM sidekick_events -WHERE -( - ( - true = true - AND ( - url = concat('https://', REGEXP_REPLACE(@url, 'https://', '')) - or - url = concat('https://www.', REGEXP_REPLACE(@url, 'https://', '')) - or - url = concat('https://www.', REGEXP_REPLACE(@url, 'www.', '')) - or - url = concat('https://', REGEXP_REPLACE(@url, 'https://www.', '')) - ) - ) OR true = false ) -GROUP BY checkpoint, - url, target -ORDER BY invocations desc; \ No newline at end of file diff --git a/src/queries/dash/conversions.sql b/src/queries/dash/conversions.sql deleted file mode 100644 index b4814d15..00000000 --- a/src/queries/dash/conversions.sql +++ /dev/null @@ -1,35 +0,0 @@ ---- description: Get URL Request for Quotes Data From RUM for a given domain ---- Authorization: none ---- Access-Control-Allow-Origin: * ---- limit: 30 ---- interval: 30 ---- offset: 0 ---- timezone: UTC ---- exactmatch: true ---- url: - ---- device: all ---- domainkey: secret -with conversions AS ( -SELECT -* - FROM helix_rum.CHECKPOINTS_V4( @url, @offset, @interval, '-', '-', 'UTC', 'all', @domainkey ) -WHERE - checkpoint LIKE "%convert%" AND - ( - ( - @exactmatch = true - AND ( - url = concat('https://', REGEXP_REPLACE(@url, 'https://', '')) - or - url = concat('https://www.', REGEXP_REPLACE(@url, 'https://', '')) - or - url = concat('https://www.', REGEXP_REPLACE(@url, 'www.', '')) - or - url = concat('https://', REGEXP_REPLACE(@url, 'https://www.', '')) - ) - ) OR @exactmatch = false ) -), -unique_targets as ( - select (case when not @exactmatch then hostname end) as hostname,(case when @exactmatch then url end) as url, lower(target) as target, sum(pageviews) traffic from conversions group by (case when not @exactmatch then hostname end), lower(target), (case when @exactmatch then url end) -) -select hostname, url, target, sum(traffic) as traffic from unique_targets group by hostname, url, target order by traffic desc \ No newline at end of file diff --git a/src/queries/dash/daily-rum.sql b/src/queries/dash/daily-rum.sql deleted file mode 100644 index 7c5ce84d..00000000 --- a/src/queries/dash/daily-rum.sql +++ /dev/null @@ -1,52 +0,0 @@ ---- description: Get Daily Helix RUM data for a given domain ---- Authorization: none ---- Access-Control-Allow-Origin: * ---- limit: 30 ---- interval: 30 ---- offset: 0 ---- startdate: 2023-02-01 ---- enddate: 2023-05-28 ---- timezone: UTC ---- timeunit: day ---- exactmatch: false ---- url: - ---- device: all ---- domainkey: secret -WITH -daily_rum AS ( - SELECT - REGEXP_REPLACE(url, '\\?.+', '') as url, - avg(lcp) avglcp, - avg(fid) avgfid, - avg(inp) as avginp, - avg(cls) avgcls, - IF(@timeunit = 'day', FORMAT_TIMESTAMP("%Y-%m-%d", time), - IF(@timeunit = 'hour', FORMAT_TIMESTAMP("%Y-%m-%d-%T", time), - FORMAT_TIMESTAMP("%Y-%m-%d", time))) AS date - FROM - helix_rum.EVENTS_V3( - REGEXP_REPLACE(@url, 'https://', ''), # domain or URL - CAST(@offset AS INT64), # not used, offset in days from today - CAST(@interval AS INT64), # interval in days to consider - @startdate, # not used, start date - @enddate, # not used, end date - @timezone, # timezone - @device, # device class - @domainkey - ) - group by url, date - order by date asc -) -select * from daily_rum -where avglcp is not null - and avgfid is not null - and avgcls is not null - and avginp is not null - and ( - @exactmatch = true and url = concat('https://', REGEXP_REPLACE(@url, 'https://', '')) - or - @exactmatch = false - ) - and not starts_with(url, 'http://localhost') - and not starts_with(url, 'https://localhost') - order by url, date diff --git a/src/queries/dash/enters.sql b/src/queries/dash/enters.sql deleted file mode 100644 index 5541c9cf..00000000 --- a/src/queries/dash/enters.sql +++ /dev/null @@ -1,39 +0,0 @@ ---- description: Get URL Specific Referrals Data From RUM for a given domain ---- Authorization: none ---- Access-Control-Allow-Origin: * ---- limit: 30 ---- interval: 30 ---- offset: 0 ---- timezone: UTC ---- exactmatch: true ---- url: - ---- device: all ---- domainkey: secret -with enters AS ( -SELECT -* - FROM helix_rum.CHECKPOINTS_V4( @url, @offset, @interval, '-', '-', 'UTC', 'all', @domainkey ) -WHERE - checkpoint LIKE "enter" AND - ( - ( - @exactmatch = true - AND ( - url = concat('https://', REGEXP_REPLACE(@url, 'https://', '')) - or - url = concat('https://www.', REGEXP_REPLACE(@url, 'https://', '')) - or - url = concat('https://www.', REGEXP_REPLACE(@url, 'www.', '')) - or - url = concat('https://', REGEXP_REPLACE(@url, 'https://www.', '')) - ) - ) OR @exactmatch = false ) -), - -unique_sources as ( - select (case when not @exactmatch then hostname end) as hostname,(case when @exactmatch then url end) as url, split(net.reg_domain(source), '.')[offset(0)] as source, sum(pageviews) traffic from enters group by (case when not @exactmatch then hostname end), split(net.reg_domain(source), '.')[offset(0)], (case when @exactmatch then url end) -), -total_traffic as ( - select sum(traffic) as total from unique_sources -) -select *, (traffic/total)*100 as percentage from unique_sources join total_traffic on true where traffic >= (@threshold * total_traffic.total) order by traffic desc \ No newline at end of file diff --git a/src/queries/dash/rfqs.sql b/src/queries/dash/rfqs.sql deleted file mode 100644 index 1958a1be..00000000 --- a/src/queries/dash/rfqs.sql +++ /dev/null @@ -1,35 +0,0 @@ ---- description: Get URL Request for Quotes Data From RUM for a given domain ---- Authorization: none ---- Access-Control-Allow-Origin: * ---- limit: 30 ---- interval: 30 ---- offset: 0 ---- timezone: UTC ---- exactmatch: true ---- url: - ---- device: all ---- domainkey: secret -with rfqs AS ( -SELECT -* - FROM helix_rum.CHECKPOINTS_V4( @url, @offset, @interval, '-', '-', 'UTC', 'all', @domainkey ) -WHERE - checkpoint LIKE "%rfq%" AND - ( - ( - @exactmatch = true - AND ( - url = concat('https://', REGEXP_REPLACE(@url, 'https://', '')) - or - url = concat('https://www.', REGEXP_REPLACE(@url, 'https://', '')) - or - url = concat('https://www.', REGEXP_REPLACE(@url, 'www.', '')) - or - url = concat('https://', REGEXP_REPLACE(@url, 'https://www.', '')) - ) - ) OR @exactmatch = false ) -), -unique_targets as ( - select (case when not @exactmatch then hostname end) as hostname,(case when @exactmatch then url end) as url, lower(target) as target, sum(pageviews) traffic from rfqs group by (case when not @exactmatch then hostname end), lower(target), (case when @exactmatch then url end) -) -select hostname, url, target, sum(traffic) as traffic from unique_targets group by hostname, url, target order by traffic desc \ No newline at end of file diff --git a/src/queries/dash/rum-dashboard-exact.sql b/src/queries/dash/rum-dashboard-exact.sql deleted file mode 100644 index ca9e5763..00000000 --- a/src/queries/dash/rum-dashboard-exact.sql +++ /dev/null @@ -1,559 +0,0 @@ ---- description: Get Helix RUM data for a given domain or owner/repo combination ---- Authorization: none ---- Access-Control-Allow-Origin: * ---- limit: 10 ---- interval: 30 ---- offset: 0 ---- startdate: 2022-02-01 ---- enddate: 2022-05-28 ---- timezone: UTC ---- url: - ---- owner: - ---- repo: - ---- device: all ---- rising: false ---- domainkey: secret ---- exactmatch: false - -CREATE TEMP FUNCTION FILTERCLASS(user_agent STRING, device STRING) -RETURNS BOOLEAN -AS ( - device = "all" - OR ( - device = "desktop" - AND user_agent NOT LIKE "%Mobile%" - AND user_agent LIKE "Mozilla%" - ) - OR (device = "mobile" AND user_agent LIKE "%Mobile%") - OR (device = "bot" AND user_agent NOT LIKE "Mozilla%") -); - - -WITH -current_data AS ( - SELECT * FROM - helix_rum.EVENTS_V4( - @url, # domain or URL - CAST(@offset AS INT64), # not used, offset in days from today - CAST(@interval AS INT64), # interval in days to consider - @startdate, # not used, start date - @enddate, # not used, end date - @timezone, # timezone - @device, # device class - @domainkey - ) -), - -previous_data AS ( - SELECT * FROM - helix_rum.EVENTS_V4( - @url, # domain or URL - # offset in days from today - CAST(@interval AS INT64) + CAST(@offset AS INT64), - CAST(@interval AS INT64), # interval in days to consider - FORMAT_DATE("%F", DATE_SUB(@startdate, INTERVAL ABS(DATE_DIFF(DATE(@enddate, @timezone), DATE(@startdate, @timezone), DAY)) DAY)), # not used, start date - @startdate, # not used, end date - @timezone, # timezone - @device, # device class - @domainkey - ) -), - -current_rum_by_id AS ( - SELECT - id, - IF(MAX(lcp) IS NULL, NULL, IF(MAX(lcp) <= 2500, TRUE, FALSE)) AS lcpgood, - IF(MAX(fid) IS NULL, NULL, IF(MAX(fid) <= 100, TRUE, FALSE)) AS fidgood, - IF(MAX(inp) IS NULL, NULL, IF(MAX(inp) <= 200, TRUE, FALSE)) AS inpgood, - IF(MAX(cls) IS NULL, NULL, IF(MAX(cls) <= 0.1, TRUE, FALSE)) AS clsgood, - IF(MAX(lcp) IS NULL, NULL, IF(MAX(lcp) >= 4000, TRUE, FALSE)) AS lcpbad, - IF(MAX(fid) IS NULL, NULL, IF(MAX(fid) >= 300, TRUE, FALSE)) AS fidbad, - IF(MAX(inp) IS NULL, NULL, IF(MAX(fid) >= 500, TRUE, FALSE)) AS inpbad, - IF(MAX(cls) IS NULL, NULL, IF(MAX(cls) >= 0.25, TRUE, FALSE)) AS clsbad, - MAX(host) AS host, - MAX(user_agent) AS user_agent, - IF( - @url = "-" AND @repo = "-" AND @owner = "-", - REGEXP_EXTRACT(MAX(url), "https://([^/]+)/", 1), - MAX(url) - ) AS url, - MAX(lcp) AS lcp, - MAX(fid) AS fid, - MAX(inp) AS inp, - MAX(cls) AS cls, - MAX(referer) AS referer, - MAX(weight) AS weight - FROM current_data - WHERE - url LIKE CONCAT( - "https://", @url, "%" - ) OR url LIKE CONCAT( - "https://%", @repo, "--", @owner, ".hlx%/" - ) OR (@url = "-" AND @repo = "-" AND @owner = "-") - OR CONCAT("www.", @url) LIKE CONCAT("%", regexp_replace(url, 'https://', '')) # append www prefix - new in V4 - OR CONCAT("www.", @url) LIKE CONCAT("%", regexp_replace(url, 'https://', ''), "/%") # append www prefix - new in V4 - OR CONCAT("www.", @url) LIKE CONCAT( regexp_replace(url, 'https://', '')) # append www prefix - new in V4 - OR CONCAT("www.", @url) LIKE CONCAT( regexp_replace(url, 'https://', ''), "/%") # append www prefix - new in V4 - OR @url LIKE CONCAT("%.", url) - OR @url LIKE CONCAT("%.", url, "/%") - OR @url LIKE CONCAT(url) - OR @url LIKE CONCAT(url, "/%") - OR STARTS_WITH(regexp_replace(url, 'https://', ''), concat("www.", @url)) - OR STARTS_WITH(regexp_replace(url, 'https://', ''), @url) - OR STARTS_WITH(regexp_replace(url, 'https://', ''), concat("www.", regexp_replace(@url, "https://", ''))) - GROUP BY id -), - -previous_rum_by_id AS ( - SELECT - id, - IF(MAX(lcp) IS NULL, NULL, IF(MAX(lcp) <= 2500, TRUE, FALSE)) AS lcpgood, - IF(MAX(fid) IS NULL, NULL, IF(MAX(fid) <= 100, TRUE, FALSE)) AS fidgood, - IF(MAX(inp) IS NULL, NULL, IF(MAX(inp) <= 200, TRUE, FALSE)) AS inpgood, - IF(MAX(cls) IS NULL, NULL, IF(MAX(cls) <= 0.1, TRUE, FALSE)) AS clsgood, - IF(MAX(lcp) IS NULL, NULL, IF(MAX(lcp) >= 4000, TRUE, FALSE)) AS lcpbad, - IF(MAX(fid) IS NULL, NULL, IF(MAX(fid) >= 300, TRUE, FALSE)) AS fidbad, - IF(MAX(inp) IS NULL, NULL, IF(MAX(inp) >= 500, TRUE, FALSE)) AS inpbad, - IF(MAX(cls) IS NULL, NULL, IF(MAX(cls) >= 0.25, TRUE, FALSE)) AS clsbad, - MAX(host) AS host, - MAX(user_agent) AS user_agent, - IF( - @url = "-" AND @repo = "-" AND @owner = "-", - REGEXP_EXTRACT(MAX(url), "https://([^/]+)/", 1), - MAX(url) - ) AS url, - MAX(lcp) AS lcp, - MAX(fid) AS fid, - MAX(inp) AS inp, - MAX(cls) AS cls, - MAX(referer) AS referer, - MAX(weight) AS weight - FROM previous_data - WHERE - url LIKE CONCAT( - "https://", @url, "%" - ) OR url LIKE CONCAT( - "https://%", @repo, "--", @owner, ".hlx%/" - ) OR (@url = "-" AND @repo = "-" AND @owner = "-") - OR CONCAT("www.", @url) LIKE CONCAT("%", regexp_replace(url, 'https://', '')) # append www prefix - new in V4 - OR CONCAT("www.", @url) LIKE CONCAT("%", regexp_replace(url, 'https://', ''), "/%") # append www prefix - new in V4 - OR CONCAT("www.", @url) LIKE CONCAT( regexp_replace(url, 'https://', '')) # append www prefix - new in V4 - OR CONCAT("www.", @url) LIKE CONCAT( regexp_replace(url, 'https://', ''), "/%") # append www prefix - new in V4 - OR @url LIKE CONCAT("%.", url) - OR @url LIKE CONCAT("%.", url, "/%") - OR @url LIKE CONCAT(url) - OR @url LIKE CONCAT(url, "/%") - OR STARTS_WITH(regexp_replace(url, 'https://', ''), concat("www.", @url)) - OR STARTS_WITH(regexp_replace(url, 'https://', ''), @url) - OR STARTS_WITH(regexp_replace(url, 'https://', ''), concat("www.", regexp_replace(@url, "https://", ''))) - GROUP BY id -), - -current_rum_by_url_and_weight AS ( - SELECT - weight, - url, - CAST( - 100 * IF( - COUNTIF(lcpgood IS NOT NULL) != 0, - COUNTIF(lcpgood = TRUE) / COUNTIF(lcpgood IS NOT NULL), - NULL - ) AS INT64 - ) AS lcpgood, - CAST( - 100 * IF( - COUNTIF(fidgood IS NOT NULL) != 0, - COUNTIF(fidgood = TRUE) / COUNTIF(fidgood IS NOT NULL), - NULL - ) AS INT64 - ) AS fidgood, - CAST( - 100 * IF( - COUNTIF(inpgood IS NOT NULL) != 0, - COUNTIF(inpgood = TRUE) / COUNTIF(inpgood IS NOT NULL), - NULL - ) AS INT64 - ) AS inpgood, - CAST( - 100 * IF( - COUNTIF(clsgood IS NOT NULL) != 0, - COUNTIF(clsgood = TRUE) / COUNTIF(clsgood IS NOT NULL), - NULL - ) AS INT64 - ) AS clsgood, - CAST( - 100 * IF( - COUNTIF(lcpbad IS NOT NULL) != 0, - COUNTIF(lcpbad = TRUE) / COUNTIF(lcpbad IS NOT NULL), - NULL - ) AS INT64 - ) AS lcpbad, - CAST( - 100 * IF( - COUNTIF(fidbad IS NOT NULL) != 0, - COUNTIF(fidbad = TRUE) / COUNTIF(fidbad IS NOT NULL), - NULL - ) AS INT64 - ) AS fidbad, - CAST( - 100 * IF( - COUNTIF(inpbad IS NOT NULL) != 0, - COUNTIF(inpbad = TRUE) / COUNTIF(inpbad IS NOT NULL), - NULL - ) AS INT64 - ) AS inpbad, - CAST( - 100 * IF( - COUNTIF(clsbad IS NOT NULL) != 0, - COUNTIF(clsbad = TRUE) / COUNTIF(clsbad IS NOT NULL), - NULL - ) AS INT64 - ) AS clsbad, - CAST(APPROX_QUANTILES(lcp, 100)[OFFSET(75)] AS INT64) AS avglcp, - CAST(APPROX_QUANTILES(fid, 100)[OFFSET(75)] AS INT64) AS avgfid, - CAST(APPROX_QUANTILES(inp, 100)[OFFSET(75)] AS INT64) AS avginp, - ROUND(APPROX_QUANTILES(cls, 100)[OFFSET(75)], 3) AS avgcls, - COUNT(id) AS events - FROM current_rum_by_id - GROUP BY url, weight - ORDER BY events DESC -), - -previous_rum_by_url_and_weight AS ( - SELECT - weight, - url, - CAST( - 100 * IF( - COUNTIF(lcpgood IS NOT NULL) != 0, - COUNTIF(lcpgood = TRUE) / COUNTIF(lcpgood IS NOT NULL), - NULL - ) AS INT64 - ) AS lcpgood, - CAST( - 100 * IF( - COUNTIF(fidgood IS NOT NULL) != 0, - COUNTIF(fidgood = TRUE) / COUNTIF(fidgood IS NOT NULL), - NULL - ) AS INT64 - ) AS fidgood, - CAST( - 100 * IF( - COUNTIF(inpgood IS NOT NULL) != 0, - COUNTIF(inpgood = TRUE) / COUNTIF(inpgood IS NOT NULL), - NULL - ) AS INT64 - ) AS inpgood, - CAST( - 100 * IF( - COUNTIF(clsgood IS NOT NULL) != 0, - COUNTIF(clsgood = TRUE) / COUNTIF(clsgood IS NOT NULL), - NULL - ) AS INT64 - ) AS clsgood, - CAST( - 100 * IF( - COUNTIF(lcpbad IS NOT NULL) != 0, - COUNTIF(lcpbad = TRUE) / COUNTIF(lcpbad IS NOT NULL), - NULL - ) AS INT64 - ) AS lcpbad, - CAST( - 100 * IF( - COUNTIF(fidbad IS NOT NULL) != 0, - COUNTIF(fidbad = TRUE) / COUNTIF(fidbad IS NOT NULL), - NULL - ) AS INT64 - ) AS fidbad, - CAST( - 100 * IF( - COUNTIF(inpbad IS NOT NULL) != 0, - COUNTIF(inpbad = TRUE) / COUNTIF(inpbad IS NOT NULL), - NULL - ) AS INT64 - ) AS inpbad, - CAST( - 100 * IF( - COUNTIF(clsbad IS NOT NULL) != 0, - COUNTIF(clsbad = TRUE) / COUNTIF(clsbad IS NOT NULL), - NULL - ) AS INT64 - ) AS clsbad, - CAST(APPROX_QUANTILES(lcp, 100)[OFFSET(75)] AS INT64) AS avglcp, - CAST(APPROX_QUANTILES(fid, 100)[OFFSET(75)] AS INT64) AS avgfid, - CAST(APPROX_QUANTILES(inp, 100)[OFFSET(75)] AS INT64) AS avginp, - ROUND(APPROX_QUANTILES(cls, 100)[OFFSET(75)], 3) AS avgcls, - COUNT(id) AS events - FROM previous_rum_by_id - GROUP BY url, weight - ORDER BY events DESC -), - -current_rum_by_url AS ( - SELECT - url, - SUM(lcpgood * weight) / SUM(weight) AS lcpgood, - SUM(fidgood * weight) / SUM(weight) AS fidgood, - SUM(inpgood * weight) / SUM(weight) AS inpgood, - SUM(clsgood * weight) / SUM(weight) AS clsgood, - SUM(lcpbad * weight) / SUM(weight) AS lcpbad, - SUM(fidbad * weight) / SUM(weight) AS fidbad, - SUM(inpbad * weight) / SUM(weight) AS inpbad, - SUM(clsbad * weight) / SUM(weight) AS clsbad, - SUM(avglcp * weight) / SUM(weight) AS avglcp, - SUM(avgfid * weight) / SUM(weight) AS avgfid, - SUM(avginp * weight) / SUM(weight) AS avginp, - ROUND(SUM(avgcls * weight) / SUM(weight), 3) AS avgcls, - SUM(events * weight) AS pageviews - - FROM current_rum_by_url_and_weight - GROUP BY url - ORDER BY pageviews DESC -), - -previous_rum_by_url AS ( - SELECT - url, - SUM(lcpgood * weight) / SUM(weight) AS lcpgood, - SUM(fidgood * weight) / SUM(weight) AS fidgood, - SUM(inpgood * weight) / SUM(weight) AS inpgood, - SUM(clsgood * weight) / SUM(weight) AS clsgood, - SUM(lcpbad * weight) / SUM(weight) AS lcpbad, - SUM(fidbad * weight) / SUM(weight) AS fidbad, - SUM(inpbad * weight) / SUM(weight) AS inpbad, - SUM(clsbad * weight) / SUM(weight) AS clsbad, - SUM(avglcp * weight) / SUM(weight) AS avglcp, - SUM(avgfid * weight) / SUM(weight) AS avgfid, - SUM(avginp * weight) / SUM(weight) AS avginp, - ROUND(SUM(avgcls * weight) / SUM(weight), 3) AS avgcls, - SUM(events * weight) AS pageviews - - FROM previous_rum_by_url_and_weight - GROUP BY url - ORDER BY pageviews DESC -), - -current_event_count AS ( - SELECT SUM(events) AS allevents FROM ( - SELECT - id, - MAX(weight) AS events - FROM current_data - GROUP BY id - ) -), - -previous_event_count AS ( - SELECT SUM(events) AS allevents FROM ( - SELECT - id, - MAX(weight) AS events - FROM previous_data - GROUP BY id - ) -), - -current_truncated_rum_by_url AS ( - SELECT - CAST(SUM(ranked.lcpgood * pageviews) / SUM(pageviews) AS INT64) AS lcpgood, - CAST(SUM(ranked.fidgood * pageviews) / SUM(pageviews) AS INT64) AS fidgood, - CAST(SUM(ranked.inpgood * pageviews) / SUM(pageviews) AS INT64) AS inpgood, - CAST(SUM(ranked.clsgood * pageviews) / SUM(pageviews) AS INT64) AS clsgood, - CAST(SUM(ranked.lcpbad * pageviews) / SUM(pageviews) AS INT64) AS lcpbad, - CAST(SUM(ranked.fidbad * pageviews) / SUM(pageviews) AS INT64) AS fidbad, - CAST(SUM(ranked.inpbad * pageviews) / SUM(pageviews) AS INT64) AS inpbad, - CAST(SUM(ranked.clsbad * pageviews) / SUM(pageviews) AS INT64) AS clsbad, - CAST(SUM(ranked.avglcp * pageviews) / SUM(pageviews) AS INT64) AS avglcp, - CAST(SUM(ranked.avgfid * pageviews) / SUM(pageviews) AS INT64) AS avgfid, - CAST(SUM(ranked.avginp * pageviews) / SUM(pageviews) AS INT64) AS avginp, - ROUND(SUM(ranked.avgcls * pageviews) / SUM(pageviews), 3) AS avgcls, - SUM(ranked.pageviews) AS pageviews, - 100 * SUM(pageviews) / MAX(current_event_count.allevents) AS rumshare, - IF(ranked.rank > @limit AND NOT @rising, "Other", ranked.url) AS url - FROM - (SELECT - pageviews, - lcpgood, - fidgood, - inpgood, - clsgood, - lcpbad, - fidbad, - inpbad, - clsbad, - avglcp, - avgfid, - avginp, - avgcls, - url, - ROW_NUMBER() OVER (ORDER BY pageviews DESC) AS rank - FROM current_rum_by_url) AS ranked, - current_event_count - GROUP BY url -), - -previous_truncated_rum_by_url AS ( - SELECT - CAST(SUM(ranked.lcpgood * pageviews) / SUM(pageviews) AS INT64) AS lcpgood, - CAST(SUM(ranked.fidgood * pageviews) / SUM(pageviews) AS INT64) AS fidgood, - CAST(SUM(ranked.inpgood * pageviews) / SUM(pageviews) AS INT64) AS inpgood, - CAST(SUM(ranked.clsgood * pageviews) / SUM(pageviews) AS INT64) AS clsgood, - CAST(SUM(ranked.lcpbad * pageviews) / SUM(pageviews) AS INT64) AS lcpbad, - CAST(SUM(ranked.fidbad * pageviews) / SUM(pageviews) AS INT64) AS fidbad, - CAST(SUM(ranked.inpbad * pageviews) / SUM(pageviews) AS INT64) AS inpbad, - CAST(SUM(ranked.clsbad * pageviews) / SUM(pageviews) AS INT64) AS clsbad, - CAST(SUM(ranked.avglcp * pageviews) / SUM(pageviews) AS INT64) AS avglcp, - CAST(SUM(ranked.avgfid * pageviews) / SUM(pageviews) AS INT64) AS avgfid, - CAST(SUM(ranked.avginp * pageviews) / SUM(pageviews) AS INT64) AS avginp, - ROUND(SUM(ranked.avgcls * pageviews) / SUM(pageviews), 3) AS avgcls, - SUM(ranked.pageviews) AS pageviews, - 100 * SUM(pageviews) / MAX(previous_event_count.allevents) AS rumshare, - IF(ranked.rank > @limit AND NOT @rising, "Other", ranked.url) AS url - FROM - (SELECT - *, - ROW_NUMBER() OVER (ORDER BY pageviews DESC) AS rank - FROM previous_rum_by_url) AS ranked, - previous_event_count - GROUP BY url -) - -SELECT - url, - pageviews, - pageviews_1, - pageviews_diff, - lcpgood, - fidgood, - inpgood, - clsgood, - lcpbad, - fidbad, - inpbad, - clsbad, - avglcp, - avgfid, - avginp, - avgcls, - rumshare, - lcpgood_1, - fidgood_1, - inpgood_1, - clsgood_1, - lcpbad_1, - fidbad_1, - inpbad_1, - clsbad_1, - avglcp_1, - avgfid_1, - avginp_1, - avgcls_1, - rumshare_1, - url_1 -FROM ( - SELECT - current_truncated_rum_by_url.pageviews AS pageviews, - previous_truncated_rum_by_url.pageviews AS pageviews_1, - current_truncated_rum_by_url.lcpgood AS lcpgood, - current_truncated_rum_by_url.fidgood AS fidgood, - current_truncated_rum_by_url.inpgood AS inpgood, - current_truncated_rum_by_url.clsgood AS clsgood, - current_truncated_rum_by_url.lcpbad AS lcpbad, - current_truncated_rum_by_url.fidbad AS fidbad, - current_truncated_rum_by_url.inpbad AS inpbad, - current_truncated_rum_by_url.clsbad AS clsbad, - current_truncated_rum_by_url.avglcp AS avglcp, - current_truncated_rum_by_url.avgfid AS avgfid, - current_truncated_rum_by_url.avginp AS avginp, - current_truncated_rum_by_url.avgcls AS avgcls, - current_truncated_rum_by_url.rumshare AS rumshare, - previous_truncated_rum_by_url.lcpgood AS lcpgood_1, - previous_truncated_rum_by_url.fidgood AS fidgood_1, - previous_truncated_rum_by_url.inpgood AS inpgood_1, - previous_truncated_rum_by_url.clsgood AS clsgood_1, - previous_truncated_rum_by_url.lcpbad AS lcpbad_1, - previous_truncated_rum_by_url.fidbad AS fidbad_1, - previous_truncated_rum_by_url.inpbad AS inpbad_1, - previous_truncated_rum_by_url.clsbad AS clsbad_1, - previous_truncated_rum_by_url.avglcp AS avglcp_1, - previous_truncated_rum_by_url.avgfid AS avgfid_1, - previous_truncated_rum_by_url.avginp AS avginp_1, - previous_truncated_rum_by_url.avgcls AS avgcls_1, - previous_truncated_rum_by_url.rumshare AS rumshare_1, - previous_truncated_rum_by_url.url AS url_1, - ROW_NUMBER() OVER ( - ORDER BY - IF( - @rising, - COALESCE( - current_truncated_rum_by_url.pageviews, 0 - ) - COALESCE(previous_truncated_rum_by_url.pageviews, 0), - 0 - ) DESC, - current_truncated_rum_by_url.pageviews DESC - ) AS rank, - COALESCE( - current_truncated_rum_by_url.url, previous_truncated_rum_by_url.url - ) AS url, - COALESCE( - current_truncated_rum_by_url.pageviews, 0 - ) - COALESCE(previous_truncated_rum_by_url.pageviews, 0) AS pageviews_diff - FROM - current_truncated_rum_by_url FULL OUTER JOIN previous_truncated_rum_by_url - ON current_truncated_rum_by_url.url = previous_truncated_rum_by_url.url - ORDER BY - IF(current_truncated_rum_by_url.url = "Other", 1, 0), - IF( - @rising, - COALESCE( - current_truncated_rum_by_url.pageviews, 0 - ) - COALESCE(previous_truncated_rum_by_url.pageviews, 0), - 0 - ) DESC, - current_truncated_rum_by_url.pageviews DESC, - previous_truncated_rum_by_url.pageviews DESC -) WHERE -( - ( - @exactmatch = true - AND ( - url = concat('https://', REGEXP_REPLACE(@url, 'https://', '')) - or - url = concat('https://www.', REGEXP_REPLACE(@url, 'https://', '')) - or - url = concat('https://www.', REGEXP_REPLACE(@url, 'www.', '')) - or - url = concat('https://', REGEXP_REPLACE(@url, 'https://www.', '')) - ) - ) OR @exactmatch = false AND (rank <= @limit OR @rising)) ---- avgcls: 75th percentile value of the Cumulative Layout Shift metric in the current period ---- avgcls_1: 75th percentile value of the CLS metric in the previous period ---- avgfid: 75th percentile value of the First Input Delay metric in milliseconds in the current period ---- avgfid_1: 75th percentile value of FID in the previous period ---- avginp: 75th percentile value of the Interaction to Next Paint metric in milliseconds in the current period ---- avginp_1: 75th percentile of INP in the previous period ---- avglcp: 75th percentile of the Largest Contentful Paint metric in milliseconds in the current period ---- avglcp_1: 75th percentile of LCP in the previous period ---- clsbad: percentage of all page views where Cumulative Layout Shift is in the “needs improvement” range in the current period ---- clsbad_1: percentage of of all page views with bad CLS in the previous period ---- clsgood: percentage of all page views where the CLS metric is in the “good” range in the current period ---- clsgood_1: percentage of pageviews with good CLS the the previous period ---- fidbad: percentage of pageviews with bad FID in the current period ---- fidbad_1: percentage of pageviews with bad FID in the previous period ---- fidgood: percentage of pageviews with good FID in the current period ---- fidgood_1: percentage of pageviews with good FID in the previous period ---- inpbad: percentage of pageviews with bad INP in the current period ---- inpbad_1: percentage of pageviews with bad INP in the previous period ---- inpgood: percentage of pageviews with good INP in the current period ---- inpgood_1: percentage of pageviews with bad INP in the previous period ---- lcpbad: percentage of pageviews with bad LCP in the current period ---- lcpbad_1: percentage of pageviews with bad LCP in the previous period ---- lcpgood: percentage of pageviews with good LCP in the current period ---- lcpgood_1: percentage of pageviews with good LCP in the current period ---- pageviews: estimated number of pageviews in the current period ---- pageviews_1: estimated number of pageviews in the previous period ---- pageviews_diff: difference in pageviews between the current and previous period. If the parameter rising is true, then pages will be ranked according to this value ---- rumshare: percentage of all traffic for the given domain that is going to this url in the current period ---- rumshare_1: percentage of of all traffic in the previous domain that is going to this url in the previous period ---- url: the URL of the page that is getting traffic ---- url_1: the URL of the page that is getting traffic in the previous period (these last two values are always the same) \ No newline at end of file diff --git a/src/queries/dash/searches.sql b/src/queries/dash/searches.sql deleted file mode 100644 index c354f166..00000000 --- a/src/queries/dash/searches.sql +++ /dev/null @@ -1,35 +0,0 @@ ---- description: Get URL Specific Searches Data From RUM for a given domain ---- Authorization: none ---- Access-Control-Allow-Origin: * ---- limit: 30 ---- interval: 30 ---- offset: 0 ---- timezone: UTC ---- exactmatch: true ---- url: - ---- device: all ---- domainkey: secret -with searches AS ( -SELECT -* - FROM helix_rum.CHECKPOINTS_V4( @url, @offset, @interval, '-', '-', 'UTC', 'all', @domainkey ) -WHERE - checkpoint LIKE "%search%" AND - ( - ( - @exactmatch = true - AND ( - url = concat('https://', REGEXP_REPLACE(@url, 'https://', '')) - or - url = concat('https://www.', REGEXP_REPLACE(@url, 'https://', '')) - or - url = concat('https://www.', REGEXP_REPLACE(@url, 'www.', '')) - or - url = concat('https://', REGEXP_REPLACE(@url, 'https://www.', '')) - ) - ) OR @exactmatch = false ) -), -unique_targets as ( - select (case when not @exactmatch then hostname end) as hostname,(case when @exactmatch then url end) as url, lower(target) as target, sum(pageviews) traffic from searches group by (case when not @exactmatch then hostname end), lower(target), (case when @exactmatch then url end) -) -select hostname, url, target, sum(traffic) as traffic from unique_targets group by hostname, url, target order by traffic desc \ No newline at end of file diff --git a/src/queries/rum-pageviews.sql b/src/queries/rum-pageviews.sql index 97ca369c..ae5103af 100644 --- a/src/queries/rum-pageviews.sql +++ b/src/queries/rum-pageviews.sql @@ -37,7 +37,7 @@ BEGIN WHEN 365 THEN TIMESTAMP_TRUNC(time, YEAR) ELSE TIMESTAMP_TRUNC(time, DAY) END AS date - FROM helix_rum.PAGEVIEWS_V3( + FROM helix_rum.PAGEVIEWS_V4( inurl, # url (inoffset * ingranularity) - 1, # offset inlimit * ingranularity, # days to fetch From 902b16fb46b6225ba3166d7f28d59cc71a1be36f Mon Sep 17 00:00:00 2001 From: Marquise Rosier Date: Thu, 4 Jan 2024 18:33:46 -0800 Subject: [PATCH 10/13] remove unused from package.json --- package.json | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/package.json b/package.json index 1c67b65a..e39b293e 100644 --- a/package.json +++ b/package.json @@ -107,16 +107,9 @@ "src/queries/dash/auth-all-domains.sql", "src/queries/dash/domain-list.sql", "src/queries/dash/update-domain-info.sql", - "src/queries/dash/checkpoint-by-url.sql", - "src/queries/dash/conversions.sql", - "src/queries/dash/daily-rum.sql", - "src/queries/dash/enters.sql", "src/queries/dash/pageviews.sql", - "src/queries/dash/rfqs.sql", "src/queries/dash/rum-sources-aggregated.sql", - "src/queries/dash/searches.sql", - "src/queries/dash/github-commits.sql", - "src/queries/dash/rum-dashboard-exact.sql" + "src/queries/dash/github-commits.sql" ] }, From 003a28b3ebae3ef0869d035f09a37371d5895cc5 Mon Sep 17 00:00:00 2001 From: Marquise Rosier Date: Sat, 6 Jan 2024 20:44:36 -0800 Subject: [PATCH 11/13] feat: update dashboard queries --- src/queries/dash/github-commits.sql | 52 ++++++++++----------- src/queries/dash/pageviews.sql | 20 ++++++-- src/queries/dash/rum-sources-aggregated.sql | 29 ++++++------ 3 files changed, 58 insertions(+), 43 deletions(-) diff --git a/src/queries/dash/github-commits.sql b/src/queries/dash/github-commits.sql index b16e37cd..eb5dda1f 100644 --- a/src/queries/dash/github-commits.sql +++ b/src/queries/dash/github-commits.sql @@ -12,29 +12,29 @@ --- url: - --- device: all --- domainkey: secret -with current_data as ( -SELECT - * -FROM - `helix-225321.helix_external_data.DAILY_COMMITS`( - @url, - @offset, - @interval, - @startdate, - @enddate, - @domainkey - ) -) -select * from current_data where -not user = 'GitHub' -and not user = 'GitHub Action' -and not user = 'GitHub Enterprise' -and not user = 'CircleCi Build' -and not user = 'Helix Bot' -and not user = 'adobe-alloy-bot' -and not user = 'github-actions' -and not user = 'github-actions[bot]' -and not user = 'helix-bot[bot]' -and not user = 'renovate[bot]' -and not user = 'semantic-release-bot' -order by owner_repo, commit_date asc \ No newline at end of file +WITH current_data AS ( + SELECT * + FROM + `HELIX-225321.HELIX_EXTERNAL_DATA.DAILY_COMMITS`( + @url, + @offset, + @interval, + @startdate, + @enddate, + @domainkey + ) +) + +SELECT * FROM current_data WHERE + NOT user = 'GitHub' + AND NOT user = 'GitHub Action' + AND NOT user = 'GitHub Enterprise' + AND NOT user = 'CircleCi Build' + AND NOT user = 'Helix Bot' + AND NOT user = 'adobe-alloy-bot' + AND NOT user = 'github-actions' + AND NOT user = 'github-actions[bot]' + AND NOT user = 'helix-bot[bot]' + AND NOT user = 'renovate[bot]' + AND NOT user = 'semantic-release-bot' +ORDER BY owner_repo ASC, commit_date ASC diff --git a/src/queries/dash/pageviews.sql b/src/queries/dash/pageviews.sql index 2c3434f8..35f6e596 100644 --- a/src/queries/dash/pageviews.sql +++ b/src/queries/dash/pageviews.sql @@ -10,7 +10,21 @@ --- url: - --- device: all --- domainkey: secret -with pageviews_by_id as ( - SELECT hostname, id, max(weight) as pageviews FROM `helix-225321.helix_rum.EVENTS_V4`(net.host(@url), @offset, @interval, '-', '-', 'UTC', 'all', @domainkey) group by id, hostname +WITH pageviews_by_id AS ( + SELECT + hostname, + id, + MAX(weight) AS pageviews + FROM + `helix-225321.helix_rum.EVENTS_V4`( + net.host(@url), @offset, @interval, '-', '-', 'UTC', 'all', @domainkey + ) + GROUP BY id, hostname ) -select hostname, sum(pageviews) as pageviews from pageviews_by_id group by hostname order by pageviews desc \ No newline at end of file + +SELECT + hostname, + SUM(pageviews) AS pageviews +FROM pageviews_by_id +GROUP BY hostname +ORDER BY pageviews DESC diff --git a/src/queries/dash/rum-sources-aggregated.sql b/src/queries/dash/rum-sources-aggregated.sql index 9dea46d0..665f47c5 100644 --- a/src/queries/dash/rum-sources-aggregated.sql +++ b/src/queries/dash/rum-sources-aggregated.sql @@ -44,26 +44,27 @@ sources AS ( ) = '-' OR CAST(@checkpoint AS STRING) = checkpoint ) AND (source = @source OR @source = '-') GROUP BY source, id, checkpoint -), +), + filtered AS ( -SELECT - source, - COUNT(id) AS ids, - COUNT(DISTINCT url) AS pages, - APPROX_TOP_COUNT(url, 1)[OFFSET(0)].value AS topurl, - SUM(views) AS views, - SUM(actions) AS actions, - SUM(actions) / SUM(views) AS actions_per_view -FROM sources -GROUP BY source, url -ORDER BY views DESC + SELECT + source, + COUNT(id) AS ids, + COUNT(DISTINCT url) AS pages, + APPROX_TOP_COUNT(url, 1)[OFFSET(0)].value AS topurl, + SUM(views) AS views, + SUM(actions) AS actions, + SUM(actions) / SUM(views) AS actions_per_view + FROM sources + GROUP BY source, url + ORDER BY views DESC ) SELECT source, topurl, - SUM(views) AS views, + SUM(views) AS views FROM filtered GROUP BY source, topurl ORDER BY views DESC -LIMIT @limit \ No newline at end of file +LIMIT @limit From 9facfccb512e6f53f61d0da34c86c50f79bd0cbb Mon Sep 17 00:00:00 2001 From: Marquise Rosier Date: Sun, 7 Jan 2024 13:45:59 -0800 Subject: [PATCH 12/13] no need for rum-sources-aggregate --- src/queries/dash/rum-sources-aggregated.sql | 70 --------------------- 1 file changed, 70 deletions(-) delete mode 100644 src/queries/dash/rum-sources-aggregated.sql diff --git a/src/queries/dash/rum-sources-aggregated.sql b/src/queries/dash/rum-sources-aggregated.sql deleted file mode 100644 index 665f47c5..00000000 --- a/src/queries/dash/rum-sources-aggregated.sql +++ /dev/null @@ -1,70 +0,0 @@ ---- description: Get popularity data for RUM source attribute values, filtered by checkpoint ---- Authorization: none ---- Access-Control-Allow-Origin: * ---- limit: 30 ---- interval: 30 ---- offset: 0 ---- startdate: 2022-01-01 ---- enddate: 2022-01-31 ---- timezone: UTC ---- url: - ---- checkpoint: - ---- source: - ---- domainkey: secret - -WITH -current_data AS ( - SELECT * - FROM - helix_rum.CHECKPOINTS_V3( - @url, - CAST(@offset AS INT64), - CAST(@interval AS INT64), - @startdate, - @enddate, - @timezone, - 'all', - @domainkey - ) -), - -sources AS ( - SELECT - id, - source, - checkpoint, - MAX(url) AS url, - MAX(pageviews) AS views, - SUM(pageviews) AS actions - FROM current_data - WHERE - source IS NOT NULL AND ( - CAST( - @checkpoint AS STRING - ) = '-' OR CAST(@checkpoint AS STRING) = checkpoint - ) AND (source = @source OR @source = '-') - GROUP BY source, id, checkpoint -), - -filtered AS ( - SELECT - source, - COUNT(id) AS ids, - COUNT(DISTINCT url) AS pages, - APPROX_TOP_COUNT(url, 1)[OFFSET(0)].value AS topurl, - SUM(views) AS views, - SUM(actions) AS actions, - SUM(actions) / SUM(views) AS actions_per_view - FROM sources - GROUP BY source, url - ORDER BY views DESC -) - -SELECT - source, - topurl, - SUM(views) AS views -FROM filtered -GROUP BY source, topurl -ORDER BY views DESC -LIMIT @limit From c39dc765245aac34002cbc6d5f36b7dd7a5ade06 Mon Sep 17 00:00:00 2001 From: Marquise Rosier Date: Sun, 7 Jan 2024 13:46:54 -0800 Subject: [PATCH 13/13] update static list --- package.json | 1 - 1 file changed, 1 deletion(-) diff --git a/package.json b/package.json index e39b293e..f266cac9 100644 --- a/package.json +++ b/package.json @@ -108,7 +108,6 @@ "src/queries/dash/domain-list.sql", "src/queries/dash/update-domain-info.sql", "src/queries/dash/pageviews.sql", - "src/queries/dash/rum-sources-aggregated.sql", "src/queries/dash/github-commits.sql" ]