Skip to content

Commit

Permalink
simplify tags
Browse files Browse the repository at this point in the history
  • Loading branch information
max-ostapenko committed Dec 9, 2024
1 parent ade5867 commit f2b56f0
Show file tree
Hide file tree
Showing 15 changed files with 58 additions and 58 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Tag: `crawl_complete`

### Core Web Vitals Technology Report

Tag: `cwv_tech_report`
Tag: `crux_ready`

- httparchive.core_web_vitals.technologies

Expand All @@ -26,7 +26,7 @@ Consumers:

### Blink Features Report

Tag: `blink_features_report`
Tag: `crawl_complete`

- httparchive.blink_features.features
- httparchive.blink_features.usage
Expand All @@ -39,11 +39,11 @@ Consumers:

1. [crawl-complete](https://console.cloud.google.com/cloudpubsub/subscription/detail/dataformTrigger?authuser=7&project=httparchive) PubSub subscription

Tags: ["crawl_complete", "blink_features_report", "crawl_results_legacy"]
Tags: ["crawl_complete"]

2. [bq-poller-cwv-tech-report](https://console.cloud.google.com/cloudscheduler/jobs/edit/us-east4/bq-poller-cwv-tech-report?authuser=7&project=httparchive) Scheduler

Tags: ["cwv_tech_report"]
Tags: ["crux_ready"]

### Triggering workflows

Expand Down
2 changes: 1 addition & 1 deletion definitions/output/blink_features/features.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ publish('features', {
partitionBy: 'yyyymmdd',
clusterBy: ['client', 'rank']
},
tags: ['blink_features_report']
tags: ['crawl_complete']
}).preOps(ctx => `
DELETE FROM ${ctx.self()}
WHERE yyyymmdd = DATE '${constants.currentMonth}';
Expand Down
2 changes: 1 addition & 1 deletion definitions/output/blink_features/usage.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ publish('usage', {
schema: 'blink_features',
type: 'incremental',
protected: true,
tags: ['blink_features_report']
tags: ['crawl_complete']
}).preOps(ctx => `
DELETE FROM ${ctx.self()}
WHERE yyyymmdd = REPLACE('${constants.currentMonth}', '-', '');
Expand Down
2 changes: 1 addition & 1 deletion definitions/output/core_web_vitals/technologies.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ publish('technologies', {
clusterBy: ['geo', 'app', 'rank', 'client'],
requirePartitionFilter: true
},
tags: ['cwv_tech_report'],
tags: ['crux_ready'],
dependOnDependencyAssertions: true
}).preOps(ctx => `
DELETE FROM ${ctx.self()}
Expand Down
2 changes: 1 addition & 1 deletion definitions/output/reports/cwv_tech_adoption.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ publish('cwv_tech_adoption', {
partitionBy: 'date',
clusterBy: ['rank', 'geo']
},
tags: ['cwv_tech_report']
tags: ['crux_ready']
}).preOps(ctx => `
CREATE TEMPORARY FUNCTION GET_ADOPTION(
records ARRAY<STRUCT<
Expand Down
2 changes: 1 addition & 1 deletion definitions/output/reports/cwv_tech_categories.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ const pastMonth = constants.fnPastMonth(constants.currentMonth)
publish('cwv_tech_categories', {
schema: 'reports',
type: 'table',
tags: ['cwv_tech_report']
tags: ['crux_ready']
}).query(ctx => `
/* {"dataform_trigger": "report_cwv_tech_complete", "name": "categories", "type": "dict"} */
WITH pages AS (
Expand Down
2 changes: 1 addition & 1 deletion definitions/output/reports/cwv_tech_core_web_vitals.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ publish('cwv_tech_core_web_vitals', {
partitionBy: 'date',
clusterBy: ['rank', 'geo']
},
tags: ['cwv_tech_report']
tags: ['crux_ready']
}).preOps(ctx => `
CREATE TEMPORARY FUNCTION GET_VITALS(
records ARRAY<STRUCT<
Expand Down
2 changes: 1 addition & 1 deletion definitions/output/reports/cwv_tech_lighthouse.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ publish('cwv_tech_lighthouse', {
partitionBy: 'date',
clusterBy: ['rank', 'geo']
},
tags: ['cwv_tech_report']
tags: ['crux_ready']
}).preOps(ctx => `
CREATE TEMPORARY FUNCTION GET_LIGHTHOUSE(
records ARRAY<STRUCT<
Expand Down
2 changes: 1 addition & 1 deletion definitions/output/reports/cwv_tech_page_weight.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ publish('cwv_tech_page_weight', {
partitionBy: 'date',
clusterBy: ['rank', 'geo']
},
tags: ['cwv_tech_report']
tags: ['crux_ready']
}).preOps(ctx => `
CREATE TEMPORARY FUNCTION GET_PAGE_WEIGHT(
records ARRAY<STRUCT<
Expand Down
2 changes: 1 addition & 1 deletion definitions/output/reports/cwv_tech_technologies.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ const pastMonth = constants.fnPastMonth(constants.currentMonth)
publish('cwv_tech_technologies', {
schema: 'reports',
type: 'table',
tags: ['cwv_tech_report']
tags: ['crux_ready']
}).query(ctx => `
/* {"dataform_trigger": "report_cwv_tech_complete", "name": "technologies", "type": "dict"} */
SELECT
Expand Down
4 changes: 2 additions & 2 deletions definitions/output/reports/reports_dynamic.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ if (iterations.length === 1) {
protected: true,
bigquery: sql.type === 'histogram' ? { partitionBy: 'date', clusterBy: ['client'] } : {},
schema: 'reports',
tags: ['crawl_complete', 'crawl_reports']
tags: ['crawl_complete']
}).preOps(ctx => `
--DELETE FROM ${ctx.self()}
--WHERE date = '${params.date}';
Expand All @@ -33,7 +33,7 @@ sql.query(ctx, params))
metrics.forEach(metric => {
metric.SQL.forEach(sql => {
operate(metric.id + '_' + sql.type + '_' + params.date, {
tags: ['crawl_complete', 'crawl_reports']
tags: ['crawl_complete']
}).queries(ctx => `
DELETE FROM reports.${metric.id}_${sql.type}
WHERE date = '${params.date}';
Expand Down
4 changes: 2 additions & 2 deletions docs/infrastructure.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ graph LR;
dataformTrigger_subscription --> dataformTrigger
subgraph Cloud_Scheduler
bq_poller_cwv_tech_report[CWV Report Poller Job]
bq_poller_cwv_tech_report --> dataformTrigger
bq_poller_crux_ready[CrUX Readiness Poller Job]
bq_poller_crux_ready --> dataformTrigger
end
subgraph Dataform
Expand Down
66 changes: 34 additions & 32 deletions infra/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,23 @@ Request body example with trigger name:
```json
{
"message": {
"name": "cwv_tech_report"
"name": "crux_ready"
}
}
```

Trigger for local development:

```bash
curl -X POST http://localhost:8080/ \
-H "Content-Type: application/json" \
-d '{
"message": {
"name": "crux_ready"
}
}'
```

## Cloud Function for report data exports

[exportReport](https://console.cloud.google.com/functions/details/us-central1/bqExport?env=gen2&authuser=7&project=httparchive) Cloud Run Function
Expand Down Expand Up @@ -54,37 +66,7 @@ This function exports reports data to GCS or Firestore.
}
```

## Monitoring

The issues within the pipeline are being tracked using the following alerts:

1. the event trigger processing fails - [Dataform Trigger Function Error](https://console.cloud.google.com/monitoring/alerting/policies/570799173843203905?authuser=7&project=httparchive)
2. a job in the workflow fails - "[Dataform Workflow Invocation Failed](https://console.cloud.google.com/monitoring/alerting/policies/16526940745374967367?authuser=7&project=httparchive)
3. the export function fails - [Dataform Export Function Error](https://console.cloud.google.com/monitoring/alerting/policies/570799173843203905?authuser=7&project=httparchive)

Error notifications are sent to [#10x-infra](https://httparchive.slack.com/archives/C030V4WAVL3) Slack channel.

## Local development

To test the function locally run from the function directory:

```bash
npm run start
```

Then, in a separate terminal, run the command with the test payload:

```bash
curl -X POST http://localhost:8080/ \
-H "Content-Type: application/json" \
-d '{
"message": {
"name": "cwv_tech_report"
}
}'
```

or
Trigger for local development:

```bash
curl -X POST http://localhost:8080/ \
Expand Down Expand Up @@ -132,6 +114,26 @@ curl -X POST http://localhost:8080/ \
}'
```

## Monitoring

The issues within the pipeline are being tracked using the following alerts:

1. the event trigger processing fails - [Dataform Trigger Function Error](https://console.cloud.google.com/monitoring/alerting/policies/570799173843203905?authuser=7&project=httparchive)
2. a job in the workflow fails - "[Dataform Workflow Invocation Failed](https://console.cloud.google.com/monitoring/alerting/policies/16526940745374967367?authuser=7&project=httparchive)
3. the export function fails - [Dataform Export Function Error](https://console.cloud.google.com/monitoring/alerting/policies/570799173843203905?authuser=7&project=httparchive)

Error notifications are sent to [#10x-infra](https://httparchive.slack.com/archives/C030V4WAVL3) Slack channel.

## Local development

To test the function locally run from the function directory:

```bash
npm run start
```

Then, in a separate terminal, run the command with the test trigger payload.

## Deployment

From project root directory run:
Expand Down
10 changes: 4 additions & 6 deletions infra/dataform-trigger/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ const { BigQuery } = require('@google-cloud/bigquery')
const { getCompilationResults, runWorkflow } = require('./dataform')

const TRIGGERS = {
cwv_tech_report: {
crux_ready: {
type: 'poller',
query: `
DECLARE previousMonth STRING DEFAULT FORMAT_DATE('%Y%m%d', DATE_SUB(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH));
Expand Down Expand Up @@ -31,7 +31,7 @@ FROM crux, report;
action: 'runDataformRepo',
actionArgs: {
repoName: 'crawl-data',
tags: ['cwv_tech_report']
tags: ['crux_ready']
}
},
crawl_complete: {
Expand All @@ -40,9 +40,7 @@ FROM crux, report;
actionArgs: {
repoName: 'crawl-data',
tags: [
'crawl_complete',
'crawl_reports',
'blink_features_report'
'crawl_complete'
]
}
}
Expand Down Expand Up @@ -163,7 +161,7 @@ async function runDataformRepo (args) {
* Example request payload:
* {
* "message": {
* "name": "cwv_tech_report"
* "name": "crux_ready"
* }
* }
*/
Expand Down
6 changes: 3 additions & 3 deletions infra/tf/function_dataform_trigger.tf
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,10 @@ resource "google_pubsub_subscription" "dataform_crawl_complete" {

# Cloud Scheduler Job to trigger CWV Tech Report Dataform workflow
locals {
cwv_tech_report_scheduler_body = <<EOF
crux_ready_scheduler_body = <<EOF
{
"message": {
"name": "cwv_tech_report"
"name": "crux_ready"
}
}
EOF
Expand All @@ -108,7 +108,7 @@ resource "google_cloud_scheduler_job" "bq-poller-cwv-tech-report" {
schedule = "0 */7 8-14 * *"
time_zone = "Etc/UTC"
http_target {
body = base64encode(local.cwv_tech_report_scheduler_body)
body = base64encode(local.crux_ready_scheduler_body)
headers = {
Content-Type = "application/json"
}
Expand Down

0 comments on commit f2b56f0

Please sign in to comment.