Skip to content

Commit

Permalink
requests processed in 4 steps
Browse files Browse the repository at this point in the history
  • Loading branch information
max-ostapenko committed Dec 17, 2024
1 parent 85fd06d commit df280a1
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 38 deletions.
56 changes: 21 additions & 35 deletions definitions/output/crawl/requests.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,42 +38,28 @@ publish('requests', {
},
tags: ['crawl_complete']
}).preOps(ctx => `
CREATE TEMP FUNCTION pruneHeaders(
jsonObject JSON
) RETURNS JSON
LANGUAGE js AS '''
try {
for (const [key, value] of Object.entries(jsonObject)) {
if(key.startsWith('req_') || key.startsWith('resp_')) {
delete jsonObject[key]
}
}
return jsonObject
} catch (e) {
return jsonObject
}
''';
FOR client_value IN (SELECT * FROM UNNEST(['desktop', 'mobile']) AS client) DO
FOR is_root_page_value IN (SELECT * FROM UNNEST([TRUE, FALSE]) AS is_root_page) DO
DELETE FROM ${ctx.self()}
WHERE date = '${constants.currentMonth}' AND
client = 'desktop';
`).query(ctx => `
SELECT
*
FROM ${ctx.ref('crawl_staging', 'requests')}
WHERE date = '${constants.currentMonth}' AND
client = 'desktop'
${constants.devRankFilter}
`).postOps(ctx => `
DELETE FROM ${ctx.self()}
WHERE date = '${constants.currentMonth}' AND
client = 'mobile';
-- Delete old entries
DELETE FROM ${ctx.self()}
WHERE date = '${constants.currentMonth}'
AND client = client_value.client
AND is_root_page = is_root_page_value.is_root_page;
INSERT INTO ${ctx.self()}
SELECT
*
-- Insert new entries
INSERT INTO ${ctx.self()}
SELECT *
FROM ${ctx.ref('crawl_staging', 'requests')}
WHERE date = '${constants.currentMonth}' AND
client = client_value.client AND
is_root_page = is_root_page_value.is_root_page ${constants.devRankFilter};
END FOR;
END FOR;
`).query(ctx => `
SELECT *
FROM ${ctx.ref('crawl_staging', 'requests')}
WHERE date = '${constants.currentMonth}' AND
client = 'mobile'
${constants.devRankFilter}
WHERE date IS NULL ${constants.devRankFilter}
LIMIT 0
`)
5 changes: 2 additions & 3 deletions definitions/output/reports/cwv_tech_categories.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,15 @@ WITH pages AS (
date = '${pastMonth}' AND
client = 'mobile'
${constants.devRankFilter}
),categories AS (
), categories AS (
SELECT
category,
COUNT(DISTINCT root_page) AS origins
FROM pages,
UNNEST(technologies) AS t,
UNNEST(t.categories) AS category
GROUP BY category
),
technologies AS (
), technologies AS (
SELECT
category,
technology,
Expand Down

0 comments on commit df280a1

Please sign in to comment.