Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated Datadog agent configuration and added sample dashboard #39

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions monitoring/datadog-agent/README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Datadog Openmetrics Configuration Example
# Datadog Openmetrics Configuration and Dashboard Example
Datadog provides an agent that will scrape metrics across various sources including an Openmetrics endpoint like the one provided by Rockset. You can configure this agent to scrape metrics from your Rockset metrics endpoint using the Openmetrics integration.
You can also import a sample dashboard that visualizes these metrics to get you started quickly.

## Dependencies
1. Datadog Agent version 6.6.0 and above
2. Enable Rockset metrics endpoint
3. Access to a Datadog account

## Instructions
Copy the [example](./openmetrics.d/conf.yaml) from this repository to your agent's host. The location of this configuration file on your agent host will generally be located at:
Expand All @@ -13,6 +15,10 @@ Copy the [example](./openmetrics.d/conf.yaml) from this repository to your agent

The syntax is very specific and please note any differences between your file and the example provided. For exampe, the password must be an empty string in order to be encoded properly.

You can import the sample dashboard into your Datadog account by following the instructions here: https://docs.datadoghq.com/dashboards/#copy-import-or-export-dashboard-json

![image](https://github.com/lukalovosevic/community/assets/62242783/850dd6a2-f9d8-444a-9faa-7ecea01b6268)

## References
- https://rockset.com/docs/monitoring-and-alerting/
- https://docs.datadoghq.com/integrations/openmetrics/
- https://docs.datadoghq.com/integrations/openmetrics/
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"title":"Rockset Metrics Dashboard","description":"","widgets":[{"id":1246456925018978,"definition":{"type":"image","url":"https://rockset-demo-ecommerce.s3.us-west-2.amazonaws.com/rockset.svg","sizing":"scale-down","has_background":false,"has_border":false,"vertical_align":"center","horizontal_align":"center"}},{"id":7186493897479482,"definition":{"title":"Collection Details","background_color":"purple","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":6058642202006128,"definition":{"title":"Number of Collections per Workspace","title_size":"16","title_align":"left","type":"query_table","requests":[{"response_format":"scalar","queries":[{"name":"query1","data_source":"metrics","query":"sum:rockset.rockset_collections{*} by {workspace_name}","aggregator":"avg"}],"formulas":[{"cell_display_mode":"number","alias":"# of coll.","formula":"query1","limit":{"count":500,"order":"desc"},"number_format":{"unit":{"type":"canonical_unit","unit_name":"item"}}}]}],"has_search_bar":"auto"}},{"id":11,"definition":{"title":"Number of Documents per Collection","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_collection_documents{$scope} by {collection_name}"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"area"}]}},{"id":8,"definition":{"title":"Collection Size in Bytes","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"canonical_unit","unit_name":"byte"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_collection_size_bytes{$scope} by {collection_name}"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"area"}]}},{"id":8709473059158392,"definition":{"title":"Collection Size in Bytes","type":"query_table","requests":[{"response_format":"scalar","queries":[{"data_source":"metrics","name":"query1","query":"max:rockset.rockset_collection_size_bytes{*} by {collection_name}","aggregator":"avg"}],"formulas":[{"cell_display_mode":"bar","alias":"Coll. size","formula":"query1","limit":{"count":500,"order":"desc"},"number_format":{"unit":{"type":"canonical_unit","unit_name":"byte"}}}]}],"has_search_bar":"auto"}}]}},{"id":4564612793581622,"definition":{"title":"Virtual Instance Details","background_color":"purple","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":4,"definition":{"title":"VI CPU Utilization","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"percent"}},"formula":"query1"},{"number_format":{"unit":{"type":"canonical_unit","unit_name":"percent"}},"formula":"100 * query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"avg:rockset.rockset_leaf_cpu_utilization_percentage{$scope} by {virtual_instance_id}"}],"response_format":"timeseries","style":{"palette":"purple","order_reverse":false,"line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","min":"0","max":"100"}}},{"id":2168985221699616,"definition":{"title":"VI Memory Utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"percent"}},"formula":"query1"},{"number_format":{"unit":{"type":"canonical_unit","unit_name":"percent"}},"formula":"100 * query1"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:rockset.rockset_leaf_memory_utilization_percentage{*} by {virtual_instance_id}"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"min":"0","max":"100"}}}]}},{"id":5479815899399802,"definition":{"title":"Ingestion Details","background_color":"purple","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":10,"definition":{"title":"Total Ingested Bytes per Collection","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"canonical_unit","unit_name":"byte"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_collection_total_ingest_bytes.count{$scope} by {collection_name}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"area"}]}},{"id":22,"definition":{"title":"Parse Errors per Collection","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_collection_parse_errors.count{$scope} by {collection_name}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"line"}]}},{"id":18,"definition":{"title":"Ingest Transformation Errors per Collection","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_collection_ingest_transformation_errors.count{$scope}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"line"}]}},{"id":29,"definition":{"title":"Data Discovery Latency per Collection (ms)","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"millisecond"}},"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"avg:rockset.rockset_collection_data_discovery_latency.bucket{$scope} by {collection_name}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":14,"definition":{"title":"Data Processing Latency per Collection (ms)","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"millisecond"}},"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"avg:rockset.rockset_collection_data_process_latency.bucket{$scope} by {collection_name}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}}]}},{"id":8525032580543298,"definition":{"title":"Query Metrics","background_color":"purple","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":1,"definition":{"title":"Requested Queries","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_queries.count{virtual_instance_id:*} by {virtual_instance_id}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":17,"definition":{"title":"Query Queue","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"canonical_unit","unit_name":"item"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_queue_size{$scope} by {virtual_instance_id}"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":6,"definition":{"title":"Query Errors","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_errors.count{$scope} by {virtual_instance_id}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":28,"definition":{"title":"Query Admission Control (seconds)","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_admission_latency_seconds.sum{$scope} by {virtual_instance_id}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":0,"definition":{"title":"Query Lambda Latency (ms)","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"custom_unit_label","label":"ms"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_lambda_latency_seconds.bucket{*} by {query_lambda}.as_rate()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}],"yaxis":{"include_zero":false}}},{"id":12,"definition":{"title":"Query Lambda Errors","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_lambda_errors.count{$scope} by {query_lambda}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":27,"definition":{"title":"Query Lambda Admission Control (seconds)","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_lambda_admission_latency_seconds.sum{$scope} by {query_lambda}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}}]}}],"template_variables":[{"name":"scope","available_values":[],"default":"*"}],"layout_type":"ordered","notify_list":[],"reflow_type":"auto"}
28 changes: 25 additions & 3 deletions monitoring/datadog-agent/openmetrics.d/conf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,32 @@ init_config:
service: rockset

instances:
- openmetrics_endpoint: https://api.rs2.usw2.rockset.com/v1/orgs/self/metrics
- openmetrics_endpoint: https://api.usw2a1.rockset.com/v1/orgs/self/metrics
### make sure the api endpoint matches your region's endpoint url
namespace: rockset
extra_headers:
authorization: Apikey <api_key>
authorization: Apikey <your_api_key_here>
max_returned_metrics: 50000
metrics:
- .+
- rockset_leaf_cpu_utilization_percentage
- rockset_leaf_memory_utilization_percentage
- rockset_agg_cpu_utilization_percentage
- rockset_agg_memory_utilization_percentage
- rockset_collections
- rockset_collection_size_bytes
- rockset_collection_documents
- rockset_collection_total_ingest_bytes
- rockset_collection_parse_errors
- rockset_collection_data_discovery_latency
- rockset_collection_data_process_latency
- rockset_data_discovery_latency
- rockset_data_process_latency
- rockset_queries
- rockset_query_latency_seconds
- rockset_query_admission_latency_seconds
- rockset_query_queue_size
- rockset_query_errors
- rockset_query_lambda_queries
- rockset_query_lambda_latency_seconds
- rockset_query_lambda_admission_latency_seconds
- rockset_query_lambda_errors
Loading