Skip to content

Commit

Permalink
Use prometheus-client to parse metrics endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
hammerhead committed Mar 5, 2024
1 parent 3f529ca commit c7a0dae
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 26 deletions.
2 changes: 1 addition & 1 deletion topic/autoscaling/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Run a dedicated cluster in [CrateDB Cloud](https://console.cratedb.cloud/).
The script uses a couple of Python libraries. Make sure you have installed those:

```shell
pip install requests
pip install requests prometheus-client
```

```shell
Expand Down
44 changes: 19 additions & 25 deletions topic/autoscaling/autoscale.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
import time
import os
import argparse
from functools import reduce
from datetime import datetime
from prometheus_client.parser import text_string_to_metric_families
import requests
from requests.auth import HTTPBasicAuth

Expand Down Expand Up @@ -42,31 +44,25 @@


def num_shards():
"""Function to calculate the average number of shards per node for a given cluster"""
headers = {"accept": "plain/text"}
"Calculate the average number of shards per node for a given cluster"
api_url = f"https://console.cratedb.cloud/api/v2/organizations/{args.organization_id}/metrics/prometheus/"
response = requests.get(api_url, auth=auth_data, headers=headers, timeout=60)
if response.status_code == 200:
lines = response.text.split("\n")
total_shard_count = 0
shard_count_instances = 0
for line in lines:
response = requests.get(api_url, auth=auth_data, timeout=60)

samples = []
for family in text_string_to_metric_families(response.text):
for sample in family.samples:
if (
"crate_node" in line
and "shard_stats" in line
and args.cluster_id in line
and 'property="total"' in line
sample.name == "crate_node"
and sample.labels["name"] == "shard_stats"
and args.cluster_id in sample.labels["pod_name"]
and sample.labels["property"] == "total"
):
shard_count = float(line.split()[1])
total_shard_count += shard_count
shard_count_instances += 1
if shard_count_instances == 0:
return None # Return None if no shard counts were found
return (
total_shard_count / shard_count_instances
) # Calculate and return the average
logging.info(f"Failed to retrieve metrics. Status code: {response.status_code}")
return None
samples.append(sample.value)

if len(samples) == 0:
return None

return reduce(lambda a, b: a + b, samples) / len(samples)


def get_cluster_status():
Expand Down Expand Up @@ -142,9 +138,7 @@ def scale_cluster(num, cluster_status, max_num_shard):
number_shards = num_shards() # Calculate average shard count
if number_shards is not None:
logging.info(f"Current avg number of shards: {number_shards}")
scale_cluster(
number_shards, status, MAX_NUM_SHARDS
) # Refactored scaling logic into this function
scale_cluster(number_shards, status, MAX_NUM_SHARDS)
else:
logging.error("Failed to retrieve shard metrics.")
except Exception as e:
Expand Down

0 comments on commit c7a0dae

Please sign in to comment.