-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into excess-aphid
- Loading branch information
Showing
1 changed file
with
88 additions
and
71 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,127 +1,144 @@ | ||
const pastMonth = constants.fnPastMonth(constants.currentMonth) | ||
|
||
publish('tech_detections', { | ||
type: 'table', | ||
description: 'Used in dashboard: https://lookerstudio.google.com/u/7/reporting/1jh_ScPlCIbSYTf2r2Y6EftqmX9SQy4Gn/page/p_an38lbzywc/edit', | ||
description: 'Used in dashboard: https://lookerstudio.google.com/u/7/reporting/1jh_ScPlCIbSYTf2r2Y6EftqmX9SQy4Gn/origin/p_an38lbzywc/edit', | ||
schema: 'wappalyzer', | ||
type: 'incremental', | ||
protected: true, | ||
bigquery: { | ||
partitionBy: 'date' | ||
}, | ||
tags: ['crawl_complete'] | ||
}).query(ctx => ` | ||
}).preOps(ctx => ` | ||
DELETE FROM ${ctx.self()} | ||
WHERE date = '${constants.currentMonth}'; | ||
`).query(ctx => ` | ||
WITH source AS ( | ||
SELECT DISTINCT | ||
date, | ||
root_page AS page, | ||
root_page AS origin, | ||
tech.technology | ||
FROM ${ctx.ref('crawl', 'pages')}, | ||
UNNEST(technologies) AS tech | ||
WHERE date >= "${pastMonth}" ${constants.devRankFilter} | ||
WHERE date IN ('${pastMonth}', '${constants.currentMonth}') ${constants.devRankFilter} | ||
), | ||
-- Technology in the previous month (August) | ||
-- Technology in the previous month | ||
tech_before AS ( | ||
SELECT | ||
page, | ||
origin, | ||
technology | ||
FROM source | ||
WHERE date = "${pastMonth}" | ||
WHERE date = '${pastMonth}' | ||
), | ||
-- Technology in the current month (September) | ||
-- Technology in the current month | ||
tech_current AS ( | ||
SELECT | ||
page, | ||
origin, | ||
technology | ||
FROM source | ||
WHERE date = "${constants.currentMonth}" | ||
WHERE date = '${constants.currentMonth}' | ||
), | ||
-- Summary of technology and categories per page in the previous month | ||
-- Summary of technology per origin in the previous month | ||
tech_before_summary AS ( | ||
SELECT | ||
technology, | ||
COUNT(DISTINCT page) AS total_pages_before | ||
COUNT(DISTINCT origin) AS total_origins_before | ||
FROM tech_before | ||
GROUP BY technology | ||
), | ||
-- Pages that existed last month but introduced the technology in the current month | ||
tech_introduced_existing_pages AS ( | ||
-- origins that persisted across both months and adopted the technology in the current month | ||
tech_adopted_existing_origins AS ( | ||
SELECT | ||
tech_current.technology, | ||
COUNT(DISTINCT tech_current.page) AS total_pages_introduced_existing, | ||
STRING_AGG(DISTINCT tech_current.page LIMIT 5) AS sample_pages_introduced_existing | ||
FROM tech_current | ||
JOIN tech_before | ||
USING (page) | ||
persisted_origins.technology, | ||
COUNT(DISTINCT persisted_origins.origin) AS total_origins_adopted_existing, | ||
STRING_AGG(DISTINCT persisted_origins.origin LIMIT 5) AS sample_origins_adopted_existing | ||
FROM ( | ||
SELECT DISTINCT | ||
tech_current.technology, | ||
tech_current.origin | ||
FROM tech_before | ||
JOIN tech_current | ||
USING (origin) | ||
) as persisted_origins | ||
LEFT JOIN tech_before AS tb | ||
ON tech_current.page = tb.page AND tech_current.technology = tb.technology | ||
WHERE tb.page IS NULL -- Technology was not detected last month | ||
GROUP BY tech_current.technology | ||
ON persisted_origins.origin = tb.origin AND persisted_origins.technology = tb.technology | ||
WHERE tb.origin IS NULL -- Technology was not detected last month | ||
GROUP BY 1 | ||
), | ||
-- Pages that were not in the dataset last month but appeared this month with the technology | ||
tech_introduced_new_pages AS ( | ||
-- origins that arrived to CrUX in the current month and their detected technologies | ||
tech_adopted_new_origins AS ( | ||
SELECT | ||
tech_current.technology, | ||
COUNT(DISTINCT tech_current.page) AS total_pages_introduced_new, | ||
STRING_AGG(DISTINCT tech_current.page LIMIT 5) AS sample_pages_introduced_new | ||
COUNT(DISTINCT tech_current.origin) AS total_origins_adopted_new, | ||
--STRING_AGG(DISTINCT tech_current.origin LIMIT 5) AS sample_origins_adopted_new | ||
FROM tech_current | ||
LEFT JOIN tech_before | ||
USING (page) | ||
WHERE tech_before.page IS NULL -- Page was not present last month | ||
GROUP BY tech_current.technology | ||
USING (origin) | ||
WHERE tech_before.origin IS NULL -- origin was not present last month | ||
GROUP BY 1 | ||
), | ||
-- Pages that existed this month but no longer have the technology | ||
tech_deprecated_existing_pages AS ( | ||
-- origins that persisted across both months and deprecated the technology usage in the current month | ||
tech_deprecated_existing_origins AS ( | ||
SELECT | ||
tech_before.technology, | ||
COUNT(DISTINCT tech_before.page) AS total_pages_deprecated_existing, | ||
STRING_AGG(DISTINCT tech_before.page LIMIT 5) AS sample_pages_deprecated_existing | ||
FROM tech_before | ||
JOIN tech_current | ||
USING (page) | ||
persisted_origins.technology, | ||
COUNT(DISTINCT persisted_origins.origin) AS total_origins_deprecated_existing, | ||
STRING_AGG(DISTINCT persisted_origins.origin LIMIT 5) AS sample_origins_deprecated_existing | ||
FROM ( | ||
SELECT DISTINCT | ||
tech_before.technology, | ||
tech_before.origin | ||
FROM tech_before | ||
JOIN tech_current | ||
USING (origin) | ||
) as persisted_origins | ||
LEFT JOIN tech_current AS tc | ||
ON tech_before.page = tc.page AND tech_before.technology = tc.technology | ||
WHERE tc.page IS NULL -- Technology is not detected in the current month | ||
GROUP BY tech_before.technology | ||
ON persisted_origins.origin = tc.origin AND persisted_origins.technology = tc.technology | ||
WHERE tc.origin IS NULL -- Technology is not detected in the current month | ||
GROUP BY 1 | ||
), | ||
-- Pages that no longer exist in the current dataset | ||
tech_deprecated_gone_pages AS ( | ||
-- origins that were dropped from CrUX in the current dataset, and thus the technology was not detected anymore | ||
tech_deprecated_gone_origins AS ( | ||
SELECT | ||
tech_before.technology, | ||
COUNT(DISTINCT tech_before.page) AS total_pages_deprecated_gone, | ||
STRING_AGG(DISTINCT tech_before.page LIMIT 5) AS sample_pages_deprecated_gone | ||
COUNT(DISTINCT tech_before.origin) AS total_origins_deprecated_gone, | ||
--STRING_AGG(DISTINCT tech_before.origin LIMIT 5) AS sample_origins_deprecated_gone | ||
FROM tech_before | ||
LEFT JOIN tech_current | ||
USING (page) | ||
WHERE tech_current.page IS NULL -- Page no longer exists in current dataset | ||
GROUP BY tech_before.technology | ||
USING (origin) | ||
WHERE tech_current.origin IS NULL -- origin no longer exists in current dataset | ||
GROUP BY 1 | ||
) | ||
-- Final aggregation and comparison of technology adoption/deprecation metrics | ||
-- aggregation of technology adoption/deprecation metrics | ||
SELECT | ||
COALESCE(before_summary.technology, tech_introduced_existing_pages.technology, tech_introduced_new_pages.technology, apps.name) AS technology, | ||
DATE('${constants.currentMonth}') AS date, | ||
COALESCE(before_summary.technology, tech_adopted_existing_origins.technology, tech_adopted_new_origins.technology, apps.name) AS technology, | ||
-- Pages summary | ||
0-COALESCE(total_pages_deprecated_existing, 0) AS total_pages_deprecated_existing, | ||
0-COALESCE(total_pages_deprecated_gone, 0) AS total_pages_deprecated_gone, | ||
-- origins summary | ||
0-COALESCE(total_origins_deprecated_existing, 0) AS total_origins_deprecated_existing, | ||
0-COALESCE(total_origins_deprecated_gone, 0) AS total_origins_deprecated_gone, | ||
COALESCE(total_pages_before, 0) - COALESCE(total_pages_deprecated_existing, 0) - COALESCE(total_pages_deprecated_gone, 0) AS total_pages_persisted, | ||
COALESCE(total_origins_before, 0) - COALESCE(total_origins_deprecated_existing, 0) - COALESCE(total_origins_deprecated_gone, 0) AS total_origins_persisted, | ||
COALESCE(total_pages_introduced_existing, 0) AS total_pages_introduced_existing, | ||
COALESCE(total_pages_introduced_new, 0) AS total_pages_introduced_new, | ||
COALESCE(total_origins_adopted_existing, 0) AS total_origins_adopted_existing, | ||
COALESCE(total_origins_adopted_new, 0) AS total_origins_adopted_new, | ||
-- Sample pages | ||
COALESCE(sample_pages_deprecated_existing, "") AS sample_pages_deprecated_existing, | ||
COALESCE(sample_pages_deprecated_gone, "") AS sample_pages_deprecated_gone, | ||
-- Sample origins | ||
COALESCE(sample_origins_deprecated_existing, "") AS sample_origins_deprecated_existing, | ||
--COALESCE(sample_origins_deprecated_gone, "") AS sample_origins_deprecated_gone, | ||
COALESCE(tech_introduced_existing_pages.sample_pages_introduced_existing, "") AS sample_pages_introduced_existing, | ||
COALESCE(tech_introduced_new_pages.sample_pages_introduced_new, "") AS sample_pages_introduced_new | ||
COALESCE(tech_adopted_existing_origins.sample_origins_adopted_existing, "") AS sample_origins_adopted_existing, | ||
--COALESCE(tech_adopted_new_origins.sample_origins_adopted_new, "") AS sample_origins_adopted_new | ||
FROM tech_before_summary before_summary | ||
FULL OUTER JOIN tech_introduced_existing_pages | ||
ON before_summary.technology = tech_introduced_existing_pages.technology | ||
FULL OUTER JOIN tech_introduced_new_pages | ||
ON before_summary.technology = tech_introduced_new_pages.technology | ||
LEFT JOIN tech_deprecated_existing_pages | ||
ON before_summary.technology = tech_deprecated_existing_pages.technology | ||
LEFT JOIN tech_deprecated_gone_pages | ||
ON before_summary.technology = tech_deprecated_gone_pages.technology | ||
FULL OUTER JOIN tech_adopted_existing_origins | ||
ON before_summary.technology = tech_adopted_existing_origins.technology | ||
FULL OUTER JOIN tech_adopted_new_origins | ||
ON before_summary.technology = tech_adopted_new_origins.technology | ||
LEFT JOIN tech_deprecated_existing_origins | ||
ON before_summary.technology = tech_deprecated_existing_origins.technology | ||
LEFT JOIN tech_deprecated_gone_origins | ||
ON before_summary.technology = tech_deprecated_gone_origins.technology | ||
FULL OUTER JOIN wappalyzer.apps | ||
ON before_summary.technology = apps.name | ||
ORDER BY total_pages_persisted DESC | ||
`) |