Skip to content

Commit

Permalink
backfill_pages
Browse files Browse the repository at this point in the history
  • Loading branch information
max-ostapenko authored and GCP Dataform committed Sep 29, 2024
1 parent e3cf47b commit 8832ffe
Show file tree
Hide file tree
Showing 5 changed files with 323 additions and 8 deletions.
122 changes: 122 additions & 0 deletions definitions/output/all/backfill_pages.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
const iterations = []
const clients = constants.clients

for (
let date = "2016-01-01"; // 2022-06-01
date >= "2016-01-01"; // 2016-01-01
date = constants.fn_past_month(date)
) {
clients.forEach((client) => {
iterations.push({
date: date,
client: client,
})
})

if (date <= "2018-12-01") {
midMonth = new Date(date)
midMonth.setDate(15)

clients.forEach((client) => {
iterations.push({
date: midMonth.toISOString().substring(0, 10),
client: client,
})
})
}
}

iterations.forEach((iteration, i) => {
operate(`backfill_pages ${iteration.date} ${iteration.client}`).tags([
"backfill_pages"
]).dependencies([
i===0 ? "" : `backfill_pages ${iterations[i-1].date} ${iterations[i-1].client}`
]).queries(ctx => `
DELETE FROM \`all_dev.pages_stable\`
WHERE date = '${iteration.date}' AND client = '${iteration.client}';
CREATE TEMPORARY FUNCTION GET_OTHER_CUSTOM_METRICS(
jsonObject JSON,
keys ARRAY<STRING>
) RETURNS JSON
LANGUAGE js AS """
try {
let other_metrics = {};
keys.forEach(function(key) {
other_metrics[key.substr(1)] = JSON.parse(jsonObject[key]);
});
return other_metrics;
} catch (e) {
return null;
}
""";
INSERT INTO \`all_dev.pages_stable\` --${ctx.resolve("all", "pages")}
SELECT
DATE('${iteration.date}') AS date,
'${iteration.client}' AS client,
pages.url AS page,
TRUE AS is_root_page,
pages.url AS root_page,
crux.rank AS rank,
JSON_VALUE(payload, "$.testID") AS wptid,
SAFE.PARSE_JSON(payload, wide_number_mode => 'round') AS payload,
NULL AS summary,
STRUCT<
a11y JSON,
cms JSON,
css_variables JSON,
cookies JSON,
ecommerce JSON,
element_count JSON,
javascript JSON,
markup JSON,
media JSON,
origin_trials JSON,
performance JSON,
privacy JSON,
responsive_images JSON,
robots_txt JSON,
security JSON,
structured_data JSON,
third_parties JSON,
well_known JSON,
wpt_bodies JSON,
other JSON
>(
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._a11y"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._cms"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._css-variables"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._cookies"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._ecommerce"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._element_count"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._javascript"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._markup"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._media"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._origin-trials"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._performance"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._privacy"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._responsive_images"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._robots_txt"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._security"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._structured-data"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._third-parties"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._well-known"), wide_number_mode => 'round'),
SAFE.PARSE_JSON(JSON_VALUE(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._wpt_bodies"), wide_number_mode => 'round'),
GET_OTHER_CUSTOM_METRICS(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), ["_Colordepth", "_Dpi", "_Images", "_Resolution", "_almanac", "_avg_dom_depth", "_css", "_doctype", "_document_height", "_document_width", "_event-names", "_fugu-apis", "_has_shadow_root", "_img-loading-attr", "_initiators", "_inline_style_bytes", "_lib-detector-version", "_localstorage_size", "_meta_viewport", "_num_iframes", "_num_scripts", "_num_scripts_async", "_num_scripts_sync", "_pwa", "_quirks_mode", "_sass", "_sessionstorage_size", "_usertiming"])
) AS custom_metrics,
NULL AS lighthouse,
NULL AS features,
NULL AS technologies,
JSON_QUERY(SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), "$._metadata") AS metadata
FROM pages.${constants.fn_date_underscored(iteration.date)}_${iteration.client} AS pages ${constants.dev_TABLESAMPLE}
LEFT JOIN (
SELECT DISTINCT
CONCAT(origin, '/') AS page,
experimental.popularity.rank AS rank
FROM ${ctx.resolve("chrome-ux-report", "experimental", "global")}
WHERE yyyymm = ${constants.fn_past_month(iteration.date).substring(0, 7).replace('-', '')}
) AS crux
ON pages.url = crux.page;
`)
})
6 changes: 3 additions & 3 deletions definitions/output/all/backfill_requests.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ for (
operate("")

iterations.forEach((iteration, i) => {
operate(`requests_backfill ${iteration.date} ${iteration.client}`).tags([
"requests_backfill"
operate(`backfill_requests ${iteration.date} ${iteration.client}`).tags([
"backfill_requests"
]).dependencies([
i===0 ? "" : `requests_backfill ${iterations[i-1].date} ${iterations[i-1].client}`
i===0 ? "" : `backfill_requests ${iterations[i-1].date} ${iterations[i-1].client}`
]).queries(ctx => `
DELETE FROM ${ctx.resolve("all", "requests")}
WHERE date = '${iteration.date}' AND client = '${iteration.client}';
Expand Down
133 changes: 133 additions & 0 deletions definitions/output/all/backfill_summary_pages.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
const iterations = []
const clients = constants.clients

for (
let date = "2015-12-01";
date >= "2015-12-01"; // 2011-06-01
date = constants.fn_past_month(date)
) {
clients.forEach((client) => {
iterations.push({
date: date,
client: client,
})
})

midMonth = new Date(date)
midMonth.setDate(15)

clients.forEach((client) => {
iterations.push({
date: midMonth.toISOString().substring(0, 10),
client: client,
})
})

}

iterations.forEach((iteration, i) => {
operate(`backfill_summary_pages ${iteration.date} ${iteration.client}`).tags([
"pages_backfill"
]).dependencies([
i===0 ? "" : `backfill_summary_pages ${iterations[i-1].date} ${iterations[i-1].client}`
]).queries(ctx => `
DELETE FROM \`all_dev.pages_stable\`
WHERE date = '${iteration.date}' AND client = '${iteration.client}';
INSERT INTO \`all_dev.pages_stable\` --${ctx.resolve("all", "pages")}
SELECT
DATE('${iteration.date}') AS date,
'${iteration.client}' AS client,
pages.url AS page,
TRUE AS is_root_page,
pages.url AS root_page,
CASE
WHEN rank<=1000 THEN 1000
WHEN rank<=5000 THEN 5000
ELSE NULL
END AS rank,
wptid,
TO_JSON( STRUCT(
pageid,
createDate,
archive,
label,
crawlid,
wptid,
wptrun,
url,
urlShort,
urlhash,
cdn,
startedDateTime,
TTFB,
renderStart,
onContentLoaded,
onLoad,
fullyLoaded,
visualComplete,
PageSpeed,
SpeedIndex,
rank,
reqTotal,
reqHtml,
reqJS,
reqCSS,
reqImg,
reqGif,
reqJpg,
reqPng,
reqFont,
reqFlash,
reqJson,
reqOther,
bytesTotal,
bytesHtml,
bytesJS,
bytesCSS,
bytesImg,
bytesGif,
bytesJpg,
bytesPng,
bytesFont,
bytesFlash,
bytesJson,
bytesOther,
bytesHtmlDoc,
numDomains,
maxDomainReqs,
numRedirects,
numErrors,
numGlibs,
numHttps,
numCompressed,
numDomElements,
maxageNull,
maxage0,
maxage1,
maxage30,
maxage365,
maxageMore,
gzipTotal,
gzipSavings,
_connections,
_adult_site,
avg_dom_depth,
document_height,
document_width,
localstorage_size,
sessionstorage_size,
num_iframes,
num_scripts,
doctype,
meta_viewport
)) AS payload,
NULL AS summary,
NULL AS custom_metrics,
NULL AS lighthouse,
NULL AS features,
NULL AS technologies,
NULL AS metadata
FROM summary_pages.${constants.fn_date_underscored(iteration.date)}_${iteration.client} AS pages ${constants.dev_TABLESAMPLE};
`)
})
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ const iterations = [],
clients = constants.clients;

for (
let date = "2016-01-01"; // 2022-06-01
date >= "2016-01-01"; // 2016-01-01
let date = "2015-12-01";
date >= "2015-12-01"; // 2011-06-01
date = constants.fn_past_month(date)
) {
clients.forEach((client) => {
Expand Down Expand Up @@ -31,8 +31,10 @@ iterations.forEach((iteration, i) => {
add_dimensions = false;
}

operate(`requests_backfill_summary ${iteration.date}_${iteration.client}`).tags([
operate(`backfill_summary_requests ${iteration.date} ${iteration.client}`).tags([
"requests_backfill"
]).dependencies([
i===0 ? "" : `backfill_summary_requests ${iterations[i-1].date} ${iterations[i-1].client}`
]).queries(ctx => `
DELETE FROM ${ctx.resolve("all", "requests")}
WHERE date = '${iteration.date}' AND client = '${iteration.client}';
Expand Down Expand Up @@ -148,7 +150,66 @@ SELECT
requests.firstHTML AS is_main_document,
get_type(requests.mimeType, get_ext_from_url(requests.url)) AS type,
IF(requests.firstReq, 1, NULL) AS index,
NULL AS payload,
TO_JSON( STRUCT(
requests.requestid,
requests.pageid,
requests.startedDateTime,
requests.time,
requests.method,
requests.url,
requests.urlShort,
requests.redirectUrl,
requests.firstReq,
requests.firstHtml,
requests.reqHttpVersion,
requests.reqHeadersSize,
requests.reqBodySize,
requests.reqCookieLen,
requests.reqOtherHeaders,
requests.status,
requests.respHttpVersion,
requests.respHeadersSize,
requests.respBodySize,
requests.respSize,
requests.respCookieLen,
requests.expAge,
requests.mimeType,
requests.respOtherHeaders,
requests.req_accept,
requests.req_accept_charset,
requests.req_accept_encoding,
requests.req_accept_language,
requests.req_connection,
requests.req_host,
requests.req_if_modified_since,
requests.req_if_none_match,
requests.req_referer,
requests.req_user_agent,
requests.resp_accept_ranges,
requests.resp_age,
requests.resp_cache_control,
requests.resp_connection,
requests.resp_content_encoding,
requests.resp_content_language,
requests.resp_content_length,
requests.resp_content_location,
requests.resp_content_type,
requests.resp_date,
requests.resp_etag,
requests.resp_expires,
requests.resp_keep_alive,
requests.resp_last_modified,
requests.resp_location,
requests.resp_pragma,
requests.resp_server,
requests.resp_transfer_encoding,
requests.resp_vary,
requests.resp_via,
requests.resp_x_powered_by,
requests._cdn_provider,
requests._gzip_save,
requests.crawlid
)) AS payload,
TO_JSON( STRUCT(
requests.time AS time,
requests.method AS method,
Expand Down
1 change: 0 additions & 1 deletion definitions/sources/declares.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ HAVING COUNT(1) = 0
`);
}


declare({
database: "chrome-ux-report",
schema: "experimental",
Expand Down

0 comments on commit 8832ffe

Please sign in to comment.