Skip to content

Commit

Permalink
fix: Postgres query metrics (#335)
Browse files Browse the repository at this point in the history
  • Loading branch information
chris13524 authored May 20, 2024
1 parent 835e3e5 commit 4d687a7
Show file tree
Hide file tree
Showing 24 changed files with 320 additions and 43 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/auto_deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ jobs:
- uses: actions/checkout@v3
with:
fetch-depth: 0
submodules: recursive
token: ${{ secrets.RELEASE_PAT }}

- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: recursive
token: ${{ secrets.RELEASE_PAT }}

- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
Expand Down Expand Up @@ -156,6 +159,7 @@ jobs:
TF_VAR_jwt_secret: ${{ secrets.PROD_JWT_SECRET }}
TF_VAR_image_version: ${{ inputs.image_tag }}
TF_VAR_relay_public_key: ${{ secrets.RELAY_PUBLIC_KEY }}
TF_VAR_notification_channels: NNOynGwVz
with:
environment: "prod"

Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ jobs:
# Checkout code
- name: "Git checkout"
uses: actions/checkout@v2
with:
submodules: recursive
token: ${{ secrets.RELEASE_PAT }}

# Install sccache
- name: "Install sccache"
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/ci_terraform.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: recursive
token: ${{ secrets.RELEASE_PAT }}

- name: Setup Terraform
uses: hashicorp/setup-terraform@v2
Expand Down Expand Up @@ -88,6 +91,9 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: recursive
token: ${{ secrets.RELEASE_PAT }}

- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ jobs:
- uses: actions/checkout@v3
with:
fetch-depth: 0
submodules: recursive
token: ${{ secrets.RELEASE_PAT }}

- name: Install lld and llvm
run: sudo apt-get install -y lld llvm
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "terraform/monitoring/grafonnet-lib"]
path = terraform/monitoring/grafonnet-lib
url = [email protected]:WalletConnect/grafonnet-lib.git
1 change: 1 addition & 0 deletions src/handlers/register_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ pub async fn handler(
token: body.token,
always_raw,
},
state.metrics.as_ref(),
)
.await?;

Expand Down
35 changes: 34 additions & 1 deletion src/metrics/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
use wc::metrics::{otel::metrics::Counter, ServiceMetrics};
use {
std::time::Instant,
wc::metrics::{
otel::{
metrics::{Counter, Histogram},
KeyValue,
},
ServiceMetrics,
},
};

#[derive(Clone)]
pub struct Metrics {
Expand All @@ -16,6 +25,9 @@ pub struct Metrics {

pub tenant_suspensions: Counter<u64>,
pub client_suspensions: Counter<u64>,

postgres_queries: Counter<u64>,
postgres_query_latency: Histogram<u64>,
}

impl Default for Metrics {
Expand Down Expand Up @@ -84,6 +96,16 @@ impl Metrics {
.with_description("The number of clients that have been suspended")
.init();

let postgres_queries: Counter<u64> = meter
.u64_counter("postgres_queries")
.with_description("The number of Postgres queries executed")
.init();

let postgres_query_latency: Histogram<u64> = meter
.u64_histogram("postgres_query_latency")
.with_description("The latency Postgres queries")
.init();

Metrics {
registered_clients: clients_counter,
received_notifications: received_notification_counter,
Expand All @@ -96,6 +118,17 @@ impl Metrics {
tenant_fcm_v1_updates: tenant_fcm_v1_updates_counter,
tenant_suspensions: tenant_suspensions_counter,
client_suspensions: client_suspensions_counter,
postgres_queries,
postgres_query_latency,
}
}

pub fn postgres_query(&self, query_name: &'static str, start: Instant) {
let elapsed = start.elapsed();

let attributes = [KeyValue::new("name", query_name)];
self.postgres_queries.add(1, &attributes);
self.postgres_query_latency
.record(elapsed.as_millis() as u64, &attributes);
}
}
37 changes: 34 additions & 3 deletions src/stores/client.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
use {
crate::{
metrics::Metrics,
providers::ProviderKind,
stores::{self, StoreError::NotFound},
},
async_trait::async_trait,
sqlx::Executor,
std::time::Instant,
tracing::{debug, instrument},
};

Expand All @@ -19,15 +21,27 @@ pub struct Client {

#[async_trait]
pub trait ClientStore {
async fn create_client(&self, tenant_id: &str, id: &str, client: Client) -> stores::Result<()>;
async fn create_client(
&self,
tenant_id: &str,
id: &str,
client: Client,
metrics: Option<&Metrics>,
) -> stores::Result<()>;
async fn get_client(&self, tenant_id: &str, id: &str) -> stores::Result<Client>;
async fn delete_client(&self, tenant_id: &str, id: &str) -> stores::Result<()>;
}

#[async_trait]
impl ClientStore for sqlx::PgPool {
#[instrument(skip(self, client))]
async fn create_client(&self, tenant_id: &str, id: &str, client: Client) -> stores::Result<()> {
#[instrument(skip(self, client, metrics))]
async fn create_client(
&self,
tenant_id: &str,
id: &str,
client: Client,
metrics: Option<&Metrics>,
) -> stores::Result<()> {
debug!(
"ClientStore::create_client tenant_id={tenant_id} id={id} token={} with locking",
client.token
Expand All @@ -37,6 +51,7 @@ impl ClientStore for sqlx::PgPool {

// Statement for locking based on the client id to prevent an issue #230
// and locking based on the token to prevent an issue #292
let start = Instant::now();
sqlx::query(
"SELECT
pg_advisory_xact_lock(abs(hashtext($1::text))),
Expand All @@ -46,13 +61,21 @@ impl ClientStore for sqlx::PgPool {
.bind(client.token.clone())
.execute(&mut transaction)
.await?;
if let Some(metrics) = metrics {
metrics.postgres_query("create_client_pg_advisory_xact_lock", start);
}

let start = Instant::now();
sqlx::query("DELETE FROM public.clients WHERE id = $1 OR device_token = $2")
.bind(id)
.bind(client.token.clone())
.execute(&mut transaction)
.await?;
if let Some(metrics) = metrics {
metrics.postgres_query("create_client_delete", start);
}

let start = Instant::now();
let mut insert_query = sqlx::QueryBuilder::new(
"INSERT INTO public.clients (id, tenant_id, push_type, device_token, always_raw)",
);
Expand All @@ -73,7 +96,15 @@ impl ClientStore for sqlx::PgPool {
},
);
insert_query.build().execute(&mut transaction).await?;
if let Some(metrics) = metrics {
metrics.postgres_query("create_client_insert", start);
}

let start = Instant::now();
transaction.commit().await?;
if let Some(metrics) = metrics {
metrics.postgres_query("create_client_commit", start);
}

Ok(())
}
Expand Down
56 changes: 39 additions & 17 deletions terraform/.terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions terraform/backend.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ terraform {
}
grafana = {
source = "grafana/grafana"
version = "~> 1.28"
version = ">= 2.1"
}
random = {
source = "hashicorp/random"
Expand All @@ -31,4 +31,4 @@ terraform {
version = "5.7.0"
}
}
}
}
1 change: 1 addition & 0 deletions terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ module "monitoring" {
prometheus_workspace_id = aws_prometheus_workspace.prometheus.id
load_balancer_arn = module.ecs.load_balancer_arn
environment = local.environment
notification_channels = var.notification_channels
}

data "aws_ecr_repository" "repository" {
Expand Down
9 changes: 0 additions & 9 deletions terraform/monitoring/backend.tf

This file was deleted.

55 changes: 55 additions & 0 deletions terraform/monitoring/dashboard.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
local grafana = import 'grafonnet-lib/grafana.libsonnet';
local panels = import 'panels/panels.libsonnet';

local dashboard = grafana.dashboard;
local row = grafana.row;
local annotation = grafana.annotation;
local layout = grafana.layout;

local ds = {
prometheus: {
type: 'prometheus',
uid: std.extVar('prometheus_uid'),
},
cloudwatch: {
type: 'cloudwatch',
uid: std.extVar('cloudwatch_uid'),
},
};
local vars = {
namespace: 'Push',
environment: std.extVar('environment'),
notifications: std.parseJson(std.extVar('notifications')),
};

////////////////////////////////////////////////////////////////////////////////

local height = 8;
local pos = grafana.layout.pos(height);

////////////////////////////////////////////////////////////////////////////////

dashboard.new(
title = std.extVar('dashboard_title'),
uid = std.extVar('dashboard_uid'),
editable = true,
graphTooltip = dashboard.graphTooltips.sharedCrosshair,
timezone = dashboard.timezones.utc,
)
.addAnnotation(
annotation.new(
target = {
limit: 100,
matchAny: false,
tags: [],
type: 'dashboard',
},
)
)

.addPanels(layout.generate_grid([
//////////////////////////////////////////////////////////////////////////////
row.new('Application'),
panels.app.postgres_query_rate(ds, vars) { gridPos: pos._6 },
panels.app.postgres_query_latency(ds, vars) { gridPos: pos._6 },
]))
1 change: 1 addition & 0 deletions terraform/monitoring/grafonnet-lib
Submodule grafonnet-lib added at aa25c4
Loading

0 comments on commit 4d687a7

Please sign in to comment.