Skip to content

Commit

Permalink
Fixes to CrUX pipeline (#36)
Browse files Browse the repository at this point in the history
* skip null technologies

* ignore null technologies

* sql review

* updated data fixed
  • Loading branch information
max-ostapenko authored Dec 10, 2024
1 parent 2e81f77 commit aea4218
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 23 deletions.
39 changes: 19 additions & 20 deletions definitions/output/core_web_vitals/technologies.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ crux AS (
WHEN 10000 THEN 'Top 10k'
WHEN 1000 THEN 'Top 1k'
END AS rank,
CONCAT(origin, '/') AS root_page_url,
CONCAT(origin, '/') AS root_page,
IF(device = 'desktop', 'desktop', 'mobile') AS client,
# CWV
Expand Down Expand Up @@ -94,9 +94,9 @@ crux AS (
technologies AS (
SELECT
technology.technology AS app,
technology.technology,
client,
page AS url
page
FROM ${ctx.ref('crawl', 'pages')},
UNNEST(technologies) AS technology
WHERE
Expand All @@ -106,9 +106,9 @@ technologies AS (
technology.technology != ''
UNION ALL
SELECT
'ALL' AS app,
'ALL' AS technology,
client,
page AS url
page
FROM ${ctx.ref('crawl', 'pages')}
WHERE
date = '${pastMonth}'
Expand All @@ -117,18 +117,18 @@ UNION ALL
categories AS (
SELECT
technology.technology AS app,
technology.technology,
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
FROM ${ctx.ref('crawl', 'pages')},
UNNEST(technologies) AS technology,
UNNEST(technology.categories) AS category
WHERE
date = '${pastMonth}'
${constants.devRankFilter}
GROUP BY app
GROUP BY technology
UNION ALL
SELECT
'ALL' AS app,
'ALL' AS technology,
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
FROM ${ctx.ref('crawl', 'pages')},
UNNEST(technologies) AS technology,
Expand All @@ -142,8 +142,8 @@ UNION ALL
summary_stats AS (
SELECT
client,
page AS url,
root_page AS root_page_url,
page,
root_page AS root_page,
SAFE.INT64(summary.bytesTotal) AS bytesTotal,
SAFE.INT64(summary.bytesJS) AS bytesJS,
SAFE.INT64(summary.bytesImg) AS bytesImg,
Expand All @@ -161,8 +161,8 @@ summary_stats AS (
lab_data AS (
SELECT
client,
root_page_url,
app,
root_page,
technology,
ANY_VALUE(category) AS category,
AVG(bytesTotal) AS bytesTotal,
AVG(bytesJS) AS bytesJS,
Expand All @@ -174,21 +174,21 @@ lab_data AS (
AVG(seo) AS seo
FROM summary_stats
JOIN technologies
USING (client, url)
USING (client, page)
JOIN categories
USING (app)
USING (technology)
GROUP BY
client,
root_page_url,
app
root_page,
technology
)
SELECT
DATE('${pastMonth}') AS date,
geo,
rank,
ANY_VALUE(category) AS category,
app,
technology AS app,
client,
COUNT(0) AS origins,
Expand Down Expand Up @@ -226,9 +226,8 @@ SELECT
SAFE_CAST(APPROX_QUANTILES(bytesImg, 1000)[OFFSET(500)] AS INT64) AS median_bytes_image
FROM lab_data
JOIN crux
USING
(client, root_page_url)
INNER JOIN crux
USING (client, root_page)
GROUP BY
app,
geo,
Expand Down
2 changes: 1 addition & 1 deletion definitions/output/reports/cwv_tech_categories.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ technologies AS (
SELECT
category,
categories.origins,
ARRAY_AGG(technology ORDER BY technologies.origins DESC) AS technologies
ARRAY_AGG(technology IGNORE NULLS ORDER BY technologies.origins DESC) AS technologies
FROM categories
JOIN technologies
USING (category)
Expand Down
2 changes: 1 addition & 1 deletion infra/dataform-trigger/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ DECLARE previousMonth_YYYYMM STRING DEFAULT SUBSTR(previousMonth, 1, 6);
WITH crux AS (
SELECT
LOGICAL_AND(total_rows > 0) AS rows_available,
LOGICAL_AND(TIMESTAMP_DIFF(CURRENT_TIMESTAMP(), last_modified_time, HOUR) < 7) AS recent_last_modified
LOGICAL_OR(TIMESTAMP_DIFF(CURRENT_TIMESTAMP(), last_modified_time, HOUR) < 8) AS recent_last_modified
FROM chrome-ux-report.materialized.INFORMATION_SCHEMA.PARTITIONS
WHERE table_name IN ('device_summary', 'country_summary')
AND partition_id IN (previousMonth, previousMonth_YYYYMM)
Expand Down
2 changes: 1 addition & 1 deletion infra/tf/function_dataform_trigger.tf
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ resource "google_cloud_scheduler_job" "bq-poller-crux-ready" {
paused = false
project = local.project
region = local.region
schedule = "0 */7 8-14 * *"
schedule = "0 */8 8-14 * *"
time_zone = "Etc/UTC"
http_target {
body = base64encode(local.crux_ready_scheduler_body)
Expand Down

0 comments on commit aea4218

Please sign in to comment.