From 25e8b86d1792b0bf71a054f09266e41b360407d0 Mon Sep 17 00:00:00 2001 From: Yun Kim Date: Fri, 14 Jul 2023 12:09:30 -0400 Subject: [PATCH] Add monitors --- .../assets/dashboards/overview_dashboard.json | 8 +++--- .../recommended_monitors/error_rate.json | 25 +++++++++++++++++++ .../request_duration.json | 24 ++++++++++++++++++ langchain/manifest.json | 4 +++ 4 files changed, 57 insertions(+), 4 deletions(-) create mode 100644 langchain/assets/recommended_monitors/error_rate.json create mode 100644 langchain/assets/recommended_monitors/request_duration.json diff --git a/langchain/assets/dashboards/overview_dashboard.json b/langchain/assets/dashboards/overview_dashboard.json index a3a4471ceb53e..611df8b298527 100644 --- a/langchain/assets/dashboards/overview_dashboard.json +++ b/langchain/assets/dashboards/overview_dashboard.json @@ -754,7 +754,6 @@ "title": "Avg Completion Tokens per Request", "title_size": "16", "title_align": "left", - "time": {}, "type": "query_value", "requests": [ { @@ -921,25 +920,26 @@ "value", "sum" ], + "time": {}, "type": "timeseries", "requests": [ { "formulas": [ { "alias": "Error Rate", - "formula": "(query1 / query2) * 100" + "formula": "(query1 / query2)" } ], "queries": [ { "name": "query1", "data_source": "metrics", - "query": "sum:langchain.request.error{$env,$service,$version,$provider,$model,$api_key}.as_count()" + "query": "sum:trace.langchain.request.errors{$env,$service,$version,$provider,$model,$api_key}.as_count()" }, { "data_source": "metrics", "name": "query2", - "query": "count:langchain.request.duration{$env,$service,$version,$provider,$model,$api_key}.as_count()" + "query": "sum:trace.langchain.request.hits{$env,$service,$version,$provider,$model,$api_key}.as_count()" } ], "response_format": "timeseries", diff --git a/langchain/assets/recommended_monitors/error_rate.json b/langchain/assets/recommended_monitors/error_rate.json new file mode 100644 index 0000000000000..ab10f117b84ac --- /dev/null +++ b/langchain/assets/recommended_monitors/error_rate.json @@ -0,0 +1,25 @@ +{ + "name": "[LangChain] LangChain service has a high error rate", + "type": "query alert", + "query": "sum(last_5m):(sum:trace.langchain.request.errors{*}.as_count() / sum:trace.langchain.request.hits{*}.as_count()) > 0.1", + "message": "{{#is_alert}}\n\nALERT: The error rate of your LangChain requests is higher than normal. The error rate is currently over {{value}} errors per request.\n{{/is_alert}}", + "tags": [ + "integration:langchain" + ], + "options": { + "thresholds": { + "critical": 0.1, + "warning": 0.05 + }, + "notify_audit": false, + "require_full_window": false, + "notify_no_data": false, + "renotify_interval": 0, + "include_tags": false + }, + "priority": null, + "restricted_roles": null, + "recommended_monitor_metadata": { + "description": "Notify your team when requests made with LangChain have increased error rates" + } +} \ No newline at end of file diff --git a/langchain/assets/recommended_monitors/request_duration.json b/langchain/assets/recommended_monitors/request_duration.json new file mode 100644 index 0000000000000..a0e3d0fe13197 --- /dev/null +++ b/langchain/assets/recommended_monitors/request_duration.json @@ -0,0 +1,24 @@ +{ + "name": "[LangChain] Request Duration Spike", + "type": "query alert", + "query": "avg(last_5m):avg:system.load.1{*} > 10", + "message": "{{#is_alert}}\n\nALERT: The duration of your LangChain requests is higher than normal. The average LangChain request duration is currently over {{value}} seconds. \n\n{{/is_alert}}", + "tags": [ + "integration:langchain" + ], + "options": { + "thresholds": { + "critical": 10 + }, + "notify_audit": false, + "require_full_window": false, + "notify_no_data": false, + "renotify_interval": 0, + "include_tags": false + }, + "priority": null, + "restricted_roles": null, + "recommended_monitor_metadata": { + "description": "Notify your team when requests made with LangChain have increased latency" + } +} \ No newline at end of file diff --git a/langchain/manifest.json b/langchain/manifest.json index 848046499caaf..d97216ac86ef1 100644 --- a/langchain/manifest.json +++ b/langchain/manifest.json @@ -38,6 +38,10 @@ }, "dashboards": { "LangChain Overview Dashboard": "assets/dashboards/overview_dashboard.json" + }, + "monitors": { + "Request Latency": "assets/recommended_monitors/request_duration.json", + "Error Rate": "assets/recommended_monitors/error_rate.json" } }, "author": {