From b69ab7f9758d5153b7e9bc32375f0f3579b76cd6 Mon Sep 17 00:00:00 2001 From: Gary Date: Sat, 14 Dec 2024 21:40:14 +0000 Subject: [PATCH] rip out stats bloat --- exo/main.py | 5 ----- exo/stats/__init__.py | 0 exo/stats/docker-compose-stats.yml | 27 --------------------------- exo/stats/metrics.py | 29 ----------------------------- exo/stats/prometheus.yml | 7 ------- 5 files changed, 68 deletions(-) delete mode 100644 exo/stats/__init__.py delete mode 100644 exo/stats/docker-compose-stats.yml delete mode 100644 exo/stats/metrics.py delete mode 100644 exo/stats/prometheus.yml diff --git a/exo/main.py b/exo/main.py index 3c02872b3..05d3b7d01 100644 --- a/exo/main.py +++ b/exo/main.py @@ -52,7 +52,6 @@ parser.add_argument("--listen-port", type=int, default=5678, help="Listening port for discovery") parser.add_argument("--download-quick-check", action="store_true", help="Quick check local path for model shards download") parser.add_argument("--max-parallel-downloads", type=int, default=4, help="Max parallel downloads for model shards download") -parser.add_argument("--prometheus-client-port", type=int, default=None, help="Prometheus client port") parser.add_argument("--broadcast-port", type=int, default=5678, help="Broadcast port for discovery") parser.add_argument("--discovery-module", type=str, choices=["udp", "tailscale", "manual"], default="udp", help="Discovery module to use") parser.add_argument("--discovery-timeout", type=int, default=30, help="Discovery timeout in seconds") @@ -170,10 +169,6 @@ def preemptively_start_download(request_id: str, opaque_status: str): node.on_opaque_status.register("start_download").on_next(preemptively_start_download) -if args.prometheus_client_port: - from exo.stats.metrics import start_metrics_server - start_metrics_server(node, args.prometheus_client_port) - last_broadcast_time = 0 diff --git a/exo/stats/__init__.py b/exo/stats/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/exo/stats/docker-compose-stats.yml b/exo/stats/docker-compose-stats.yml deleted file mode 100644 index a37fda439..000000000 --- a/exo/stats/docker-compose-stats.yml +++ /dev/null @@ -1,27 +0,0 @@ -version: '3.8' - -services: - prometheus: - image: prom/prometheus:latest - container_name: prometheus - volumes: - - ./prometheus.yml:/etc/prometheus/prometheus.yml - command: - - '--config.file=/etc/prometheus/prometheus.yml' - ports: - - "9090:9090" - networks: - - monitoring - - grafana: - image: grafana/grafana:latest - container_name: grafana - ports: - - "3000:3000" - networks: - - monitoring - depends_on: - - prometheus - -networks: - monitoring: diff --git a/exo/stats/metrics.py b/exo/stats/metrics.py deleted file mode 100644 index f29533ff7..000000000 --- a/exo/stats/metrics.py +++ /dev/null @@ -1,29 +0,0 @@ -from exo.orchestration import Node -from prometheus_client import start_http_server, Counter, Histogram -import json - -# Create metrics to track time spent and requests made. -PROCESS_PROMPT_COUNTER = Counter("process_prompt_total", "Total number of prompts processed", ["node_id"]) -PROCESS_TENSOR_COUNTER = Counter("process_tensor_total", "Total number of tensors processed", ["node_id"]) -PROCESS_TENSOR_TIME = Histogram("process_tensor_seconds", "Time spent processing tensor", ["node_id"]) - - -def start_metrics_server(node: Node, port: int): - start_http_server(port) - - def _on_opaque_status(request_id, opaque_status: str): - status_data = json.loads(opaque_status) - _type = status_data.get("type", "") - node_id = status_data.get("node_id", "") - if _type != "node_status": - return - status = status_data.get("status", "") - - if status == "end_process_prompt": - PROCESS_PROMPT_COUNTER.labels(node_id=node_id).inc() - elif status == "end_process_tensor": - elapsed_time_ns = status_data.get("elapsed_time_ns", 0) - PROCESS_TENSOR_COUNTER.labels(node_id=node_id).inc() - PROCESS_TENSOR_TIME.labels(node_id=node_id).observe(elapsed_time_ns/1e9) # Convert ns to seconds - - node.on_opaque_status.register("stats").on_next(_on_opaque_status) diff --git a/exo/stats/prometheus.yml b/exo/stats/prometheus.yml deleted file mode 100644 index 1530f45d0..000000000 --- a/exo/stats/prometheus.yml +++ /dev/null @@ -1,7 +0,0 @@ -global: - scrape_interval: 15s - -scrape_configs: - - job_name: 'exo-node' - static_configs: - - targets: ['host.docker.internal:8005']