From 0d9173fccf7a2c670612695aa0d0aadc2fea7fbb Mon Sep 17 00:00:00 2001 From: Yun Kim Date: Fri, 14 Jul 2023 12:09:30 -0400 Subject: [PATCH] Add monitors --- .../assets/dashboards/overview_dashboard.json | 8 ++++---- .../recommended_monitors/error_rate.json | 20 +++++++++++++++++++ .../request_duration.json | 19 ++++++++++++++++++ langchain/manifest.json | 4 ++++ 4 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 langchain/assets/recommended_monitors/error_rate.json create mode 100644 langchain/assets/recommended_monitors/request_duration.json diff --git a/langchain/assets/dashboards/overview_dashboard.json b/langchain/assets/dashboards/overview_dashboard.json index a3a4471ceb53e..611df8b298527 100644 --- a/langchain/assets/dashboards/overview_dashboard.json +++ b/langchain/assets/dashboards/overview_dashboard.json @@ -754,7 +754,6 @@ "title": "Avg Completion Tokens per Request", "title_size": "16", "title_align": "left", - "time": {}, "type": "query_value", "requests": [ { @@ -921,25 +920,26 @@ "value", "sum" ], + "time": {}, "type": "timeseries", "requests": [ { "formulas": [ { "alias": "Error Rate", - "formula": "(query1 / query2) * 100" + "formula": "(query1 / query2)" } ], "queries": [ { "name": "query1", "data_source": "metrics", - "query": "sum:langchain.request.error{$env,$service,$version,$provider,$model,$api_key}.as_count()" + "query": "sum:trace.langchain.request.errors{$env,$service,$version,$provider,$model,$api_key}.as_count()" }, { "data_source": "metrics", "name": "query2", - "query": "count:langchain.request.duration{$env,$service,$version,$provider,$model,$api_key}.as_count()" + "query": "sum:trace.langchain.request.hits{$env,$service,$version,$provider,$model,$api_key}.as_count()" } ], "response_format": "timeseries", diff --git a/langchain/assets/recommended_monitors/error_rate.json b/langchain/assets/recommended_monitors/error_rate.json new file mode 100644 index 0000000000000..469e77833cf3b --- /dev/null +++ b/langchain/assets/recommended_monitors/error_rate.json @@ -0,0 +1,20 @@ +{ + "name": "[LangChain] LangChain service has a high error rate", + "type": "query alert", + "query": "sum(last_5m):(sum:trace.langchain.request.errors{*}.as_count() / sum:trace.langchain.request.hits{*}.as_count()) > 0.1", + "message": "{{#is_alert}}\n\nALERT: The error rate of your LangChain requests is higher than normal. The error rate is currently over {{value}} errors per request.\n{{/is_alert}}", + "tags": [ + "integration:langchain" + ], + "options": { + "thresholds": { + "critical": 0.1, + "warning": 0.05 + }, + "notify_audit": false, + "require_full_window": false, + "notify_no_data": false, + "renotify_interval": 0, + "include_tags": false + } +} \ No newline at end of file diff --git a/langchain/assets/recommended_monitors/request_duration.json b/langchain/assets/recommended_monitors/request_duration.json new file mode 100644 index 0000000000000..4e3561f7036d4 --- /dev/null +++ b/langchain/assets/recommended_monitors/request_duration.json @@ -0,0 +1,19 @@ +{ + "name": "[LangChain] Request Duration Spike", + "type": "query alert", + "query": "avg(last_5m):avg:system.load.1{*} > 10", + "message": "{{#is_alert}}\n\nALERT: The duration of your LangChain requests is higher compared to normal. The average LangChain request duration is currently over {{value}} seconds. \n\n{{/is_alert}}", + "tags": [ + "integration:langchain" + ], + "options": { + "thresholds": { + "critical": 10 + }, + "notify_audit": false, + "require_full_window": false, + "notify_no_data": false, + "renotify_interval": 0, + "include_tags": false + } +} \ No newline at end of file diff --git a/langchain/manifest.json b/langchain/manifest.json index 848046499caaf..d97216ac86ef1 100644 --- a/langchain/manifest.json +++ b/langchain/manifest.json @@ -38,6 +38,10 @@ }, "dashboards": { "LangChain Overview Dashboard": "assets/dashboards/overview_dashboard.json" + }, + "monitors": { + "Request Latency": "assets/recommended_monitors/request_duration.json", + "Error Rate": "assets/recommended_monitors/error_rate.json" } }, "author": {