diff --git a/.gitignore b/.gitignore index 2b9cef78..f8be6f3f 100644 --- a/.gitignore +++ b/.gitignore @@ -129,7 +129,7 @@ docs/_build /services/monitoring/pgsql_query_exporter_config.yaml /services/monitoring/docker-compose.yml /services/monitoring/smokeping_prober_config.yaml - +services/monitoring/tempo_config.yaml # Simcore: Contains location of repo.config file on the machine and of the whole config directory .config.location diff --git a/Makefile b/Makefile index 13fd07bc..2e80bf48 100644 --- a/Makefile +++ b/Makefile @@ -71,7 +71,6 @@ down-maintenance: ## Stop the maintenance mode fi \ ,) - # Misc: info & clean .PHONY: info info-vars info-local info: ## Displays some important info diff --git a/services/minio/template.env b/services/minio/template.env index d9a99f67..d45d4f9b 100644 --- a/services/minio/template.env +++ b/services/minio/template.env @@ -2,3 +2,7 @@ MINIO_ACCESS_KEY=${S3_ACCESS_KEY} MINIO_SECRET_KEY=${S3_SECRET_KEY} STORAGE_DOMAIN=${STORAGE_DOMAIN} +TEMPO_S3_BUCKET=${TEMPO_S3_BUCKET} +MONITORING_DOMAIN=${MONITORING_DOMAIN} +PUBLIC_NETWORK=${PUBLIC_NETWORK} +MONITORED_NETWORK=${MONITORED_NETWORK} diff --git a/services/monitoring/Makefile b/services/monitoring/Makefile index 1827363a..3c2cff64 100644 --- a/services/monitoring/Makefile +++ b/services/monitoring/Makefile @@ -49,28 +49,28 @@ up-master: .init .env config.monitoring config.prometheus.ceph.simcore ${TEMP_C @docker stack deploy --with-registry-auth --prune --compose-file ${TEMP_COMPOSE}-master ${STACK_NAME} $(MAKE) grafana-import -${TEMP_COMPOSE}: docker-compose.yml config.monitoring .env pgsql_query_exporter_config.yaml smokeping_prober_config.yaml +${TEMP_COMPOSE}: docker-compose.yml config.monitoring .env pgsql_query_exporter_config.yaml smokeping_prober_config.yaml tempo_config.yaml @${REPO_BASE_DIR}/scripts/docker-stack-config.bash -e .env $< > $@ -${TEMP_COMPOSE}-letsencrypt-http: docker-compose.yml docker-compose.letsencrypt.http.yml config.monitoring .env pgsql_query_exporter_config.yaml smokeping_prober_config.yaml +${TEMP_COMPOSE}-letsencrypt-http: docker-compose.yml docker-compose.letsencrypt.http.yml config.monitoring .env pgsql_query_exporter_config.yaml smokeping_prober_config.yaml tempo_config.yaml @${REPO_BASE_DIR}/scripts/docker-stack-config.bash -e .env $< docker-compose.letsencrypt.http.yml > $@ -${TEMP_COMPOSE}-letsencrypt-dns: docker-compose.yml docker-compose.letsencrypt.dns.yml config.monitoring .env pgsql_query_exporter_config.yaml smokeping_prober_config.yaml +${TEMP_COMPOSE}-letsencrypt-dns: docker-compose.yml docker-compose.letsencrypt.dns.yml config.monitoring .env pgsql_query_exporter_config.yaml smokeping_prober_config.yaml tempo_config.yaml @${REPO_BASE_DIR}/scripts/docker-stack-config.bash -e .env $< docker-compose.letsencrypt.dns.yml > $@ -${TEMP_COMPOSE}-dalco: docker-compose.yml docker-compose.dalco.yml config.monitoring .env pgsql_query_exporter_config.yaml smokeping_prober_config.yaml +${TEMP_COMPOSE}-dalco: docker-compose.yml docker-compose.dalco.yml config.monitoring .env pgsql_query_exporter_config.yaml smokeping_prober_config.yaml tempo_config.yaml @${REPO_BASE_DIR}/scripts/docker-stack-config.bash -e .env $< docker-compose.dalco.yml > $@ -${TEMP_COMPOSE}-public: docker-compose.yml docker-compose.public.yml config.monitoring .env pgsql_query_exporter_config.yaml smokeping_prober_config.yaml +${TEMP_COMPOSE}-public: docker-compose.yml docker-compose.public.yml config.monitoring .env pgsql_query_exporter_config.yaml smokeping_prober_config.yaml tempo_config.yaml @${REPO_BASE_DIR}/scripts/docker-stack-config.bash -e .env $< docker-compose.public.yml > $@ -${TEMP_COMPOSE}-aws: docker-compose.yml docker-compose.aws.yml config.monitoring .env pgsql_query_exporter_config.yaml smokeping_prober_config.yaml +${TEMP_COMPOSE}-aws: docker-compose.yml docker-compose.aws.yml config.monitoring .env pgsql_query_exporter_config.yaml smokeping_prober_config.yaml tempo_config.yaml @${REPO_BASE_DIR}/scripts/docker-stack-config.bash -e .env $< docker-compose.aws.yml > $@ -${TEMP_COMPOSE}-master: docker-compose.yml docker-compose.master.yml config.monitoring .env pgsql_query_exporter_config.yaml smokeping_prober_config.yaml +${TEMP_COMPOSE}-master: docker-compose.yml docker-compose.master.yml config.monitoring .env pgsql_query_exporter_config.yaml smokeping_prober_config.yaml tempo_config.yaml @${REPO_BASE_DIR}/scripts/docker-stack-config.bash -e .env $< docker-compose.master.yml > $@ -${TEMP_COMPOSE}-local: docker-compose.yml docker-compose.letsencrypt.dns.yml config.monitoring .env pgsql_query_exporter_config.yaml smokeping_prober_config.yaml +${TEMP_COMPOSE}-local: docker-compose.yml docker-compose.letsencrypt.dns.yml config.monitoring .env pgsql_query_exporter_config.yaml smokeping_prober_config.yaml tempo_config.yaml @${REPO_BASE_DIR}/scripts/docker-stack-config.bash -e .env $< docker-compose.letsencrypt.dns.yml > $@ docker-compose.yml: docker-compose.yml.j2 .env .venv pgsql_query_exporter_config.yaml @@ -153,6 +153,9 @@ pgsql_query_exporter_config.yaml: pgsql_query_exporter_config.yaml.j2 ${REPO_CON smokeping_prober_config.yaml: smokeping_prober_config.yaml.j2 ${REPO_CONFIG_LOCATION} .env .venv $(call jinja, $<, $@); +tempo_config.yaml: tempo_config.yaml.j2 ${REPO_CONFIG_LOCATION} .env .venv + $(call jinja, $<, $@); + .PHONY: grafana/assets grafana/assets: ${REPO_CONFIG_LOCATION} @if [ ! -d "$(shell dirname ${REPO_CONFIG_LOCATION})/assets/grafana" ]; then \ diff --git a/services/monitoring/docker-compose.yml.j2 b/services/monitoring/docker-compose.yml.j2 index 680f0f09..33b8adda 100644 --- a/services/monitoring/docker-compose.yml.j2 +++ b/services/monitoring/docker-compose.yml.j2 @@ -17,6 +17,8 @@ networks: configs: alertmanager_config: file: ./alertmanager/config.yml + tempo_config: + file: ./tempo_config.yaml node_exporter_entrypoint: file: ./node-exporter/docker-entrypoint.sh prometheus_config: @@ -396,3 +398,75 @@ services: reservations: memory: 32M cpus: "0.1"{% endfor %} + tempo1: + image: grafana/tempo:2.6.1 + command: "-target=scalable-single-binary -config.file=/etc/tempo.yaml" + configs: + - source: tempo_config + target: /etc/tempo.yaml + networks: + - monitored + deploy: + labels: + - traefik.enable=true + - traefik.docker.network=${PUBLIC_NETWORK} + - traefik.http.services.tempo.loadbalancer.server.port=9095 + - traefik.http.routers.tempo.rule=Host(`${MONITORING_DOMAIN}`) && PathPrefix(`/tempo`) + - traefik.http.routers.tempo.priority=10 + - traefik.http.routers.tempo.entrypoints=https + - traefik.http.routers.tempo.tls=true + - traefik.http.middlewares.tempo_replace_regex.replacepathregex.regex=^/tempo/?(.*)$$ + - traefik.http.middlewares.tempo_replace_regex.replacepathregex.replacement=/$${1} + - traefik.http.routers.tempo.middlewares=ops_whitelist_ips@swarm, ops_gzip@swarm, tempo_replace_regex + resources: + limits: + memory: 2000M + cpus: "2.0" + tempo2: + image: grafana/tempo:2.6.1 + command: "-target=scalable-single-binary -config.file=/etc/tempo.yaml" + configs: + - source: tempo_config + target: /etc/tempo.yaml + networks: + - monitored + deploy: + labels: + - traefik.enable=true + - traefik.docker.network=${PUBLIC_NETWORK} + - traefik.http.services.tempo.loadbalancer.server.port=9095 + - traefik.http.routers.tempo.rule=Host(`${MONITORING_DOMAIN}`) && PathPrefix(`/tempo`) + - traefik.http.routers.tempo.priority=10 + - traefik.http.routers.tempo.entrypoints=https + - traefik.http.routers.tempo.tls=true + - traefik.http.middlewares.tempo_replace_regex.replacepathregex.regex=^/tempo/?(.*)$$ + - traefik.http.middlewares.tempo_replace_regex.replacepathregex.replacement=/$${1} + - traefik.http.routers.tempo.middlewares=ops_whitelist_ips@swarm, ops_gzip@swarm, tempo_replace_regex + resources: + limits: + memory: 2000M + cpus: "2.0" + tempo3: + image: grafana/tempo:2.6.1 + command: "-target=scalable-single-binary -config.file=/etc/tempo.yaml" + configs: + - source: tempo_config + target: /etc/tempo.yaml + networks: + - monitored + deploy: + labels: + - traefik.enable=true + - traefik.docker.network=${PUBLIC_NETWORK} + - traefik.http.services.tempo.loadbalancer.server.port=9095 + - traefik.http.routers.tempo.rule=Host(`${MONITORING_DOMAIN}`) && PathPrefix(`/tempo`) + - traefik.http.routers.tempo.priority=10 + - traefik.http.routers.tempo.entrypoints=https + - traefik.http.routers.tempo.tls=true + - traefik.http.middlewares.tempo_replace_regex.replacepathregex.regex=^/tempo/?(.*)$$ + - traefik.http.middlewares.tempo_replace_regex.replacepathregex.replacement=/$${1} + - traefik.http.routers.tempo.middlewares=ops_whitelist_ips@swarm, ops_gzip@swarm, tempo_replace_regex + resources: + limits: + memory: 2000M + cpus: "2.0" diff --git a/services/monitoring/template.env b/services/monitoring/template.env index 2aa5237c..32bde67f 100644 --- a/services/monitoring/template.env +++ b/services/monitoring/template.env @@ -22,3 +22,8 @@ MONITORING_PROMETHEUS_PGSQL_GID_MONITORED=${MONITORING_PROMETHEUS_PGSQL_GID_MONI MONITORING_PROMETHEUS_SMOKEPING_TARGETS=${MONITORING_PROMETHEUS_SMOKEPING_TARGETS} PUBLIC_NETWORK=${PUBLIC_NETWORK} MONITORED_NETWORK=${MONITORED_NETWORK} +TEMPO_S3_BUCKET=${TEMPO_S3_BUCKET} +STORAGE_DOMAIN=${STORAGE_DOMAIN} +S3_REGION=${S3_REGION} +S3_ACCESS_KEY=${S3_ACCESS_KEY} +S3_SECRET_KEY=${S3_SECRET_KEY} diff --git a/services/monitoring/tempo_config.yaml.j2 b/services/monitoring/tempo_config.yaml.j2 new file mode 100644 index 00000000..3b8cc7c1 --- /dev/null +++ b/services/monitoring/tempo_config.yaml.j2 @@ -0,0 +1,60 @@ +server: + http_listen_port: 3200 + +distributor: + receivers: # this configuration will listen on all ports and protocols that tempo is capable of. + otlp: + protocols: + http: + grpc: + +#ingester: +# max_block_duration: 5m # cut the headblock when this much time passes. this should probably be left alone normally + +compactor: + compaction: + block_retention: 96h # overall Tempo trace retention. + +memberlist: + abort_if_cluster_join_fails: false + bind_port: 7946 + join_members: + - tempo1:7946 + - tempo2:7946 + - tempo3:7946 + +metrics_generator: + registry: + external_labels: + source: tempo + cluster: {{ MACHINE_FQDN }} + storage: + path: /var/tempo/generator/wal + remote_write: + - url: http://prometheuscatchall:9090/api/v1/write + +storage: + trace: + backend: s3 # backend configuration to use + wal: + path: /var/tempo/wal # where to store the wal locally + s3: + bucket: {{ TEMPO_S3_BUCKET }} # how to store data in s3 + endpoint: {{STORAGE_DOMAIN}} + region: {{S3_REGION}} + access_key: {{S3_ACCESS_KEY}} + secret_key: {{S3_SECRET_KEY}} + insecure: false + tls_insecure_skip_verify: true + # For using AWS, select the appropriate regional endpoint and region + # endpoint: s3.dualstack.us-west-2.amazonaws.com + # region: us-west-2 + +#querier: +# frontend_worker: +# frontend_address: {{MONITORING_DOMAIN}}/tempo:9095 + +overrides: + defaults: + metrics_generator: + processors: ['service-graphs', 'span-metrics'] diff --git a/services/registry/Makefile b/services/registry/Makefile index f00ab1b3..07531240 100644 --- a/services/registry/Makefile +++ b/services/registry/Makefile @@ -17,6 +17,8 @@ define create-s3-bucket source .env; \ echo Creating bucket "$${S3_BUCKET}";\ ${REPO_BASE_DIR}/scripts/create-s3-bucket.bash "$${S3_BUCKET}" && \ + echo Creating bucket "$${TEMPO_S3_BUCKET}";\ + ${REPO_BASE_DIR}/scripts/create-s3-bucket.bash "$${TEMPO_S3_BUCKET}" && \ set +o allexport; \ # bucket is available in S3 endef diff --git a/services/traefik/.gitignore b/services/traefik/.gitignore index 117a2de6..99c8c967 100644 --- a/services/traefik/.gitignore +++ b/services/traefik/.gitignore @@ -1,3 +1,4 @@ docker-compose.letsencrypt.dns.yml docker-compose.yml traefik_dynamic_config.yml +tempo_config.yaml