Skip to content

Commit

Permalink
Merge branch 'main' into fix/monitoringNetworksTracing
Browse files Browse the repository at this point in the history
  • Loading branch information
mrnicegyu11 authored Sep 30, 2024
2 parents 298da31 + fde6c79 commit c877112
Show file tree
Hide file tree
Showing 19 changed files with 61 additions and 69 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,14 @@ env:
value: {{ requiredEnv "RABBIT_SECURE" }}
- name: RABBIT_USER
value: {{ requiredEnv "RABBIT_USER" }}
- name: REDIS_USER
value: {{ requiredEnv "REDIS_USER" }}
- name: REDIS_HOST
value: {{ requiredEnv "REDIS_EXTERNAL_HOST" }}
- name: REDIS_PORT
value: {{ requiredEnv "REDIS_EXTERNAL_PORT" }}
- name: REDIS_SECURE
value: {{ requiredEnv "REDIS_SECURE" }}
- name: REDIS_PASSWORD
value: {{ requiredEnv "REDIS_PASSWORD" }}
sensitive: true
Expand Down
14 changes: 2 additions & 12 deletions services/jaeger/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,14 @@ up-letsencrypt-dns: .init .env ${TEMP_COMPOSE}-letsencrypt-dns
@docker stack deploy --with-registry-auth --prune --compose-file ${TEMP_COMPOSE}-letsencrypt-dns ${STACK_NAME}

.PHONY: up-dalco ## Deploys jaeger stack for Dalco Cluster
up-dalco: .init .env ${TEMP_COMPOSE}-dalco
@docker stack deploy --with-registry-auth --prune --compose-file ${TEMP_COMPOSE}-dalco ${STACK_NAME}
up-dalco: up

.PHONY: up-aws ## Deploys jaeger stack for aws
up-aws: .init .env ${TEMP_COMPOSE}-aws ## Deploys jaeger stack in aws
@docker stack deploy --with-registry-auth --prune --compose-file ${TEMP_COMPOSE}-aws ${STACK_NAME}

.PHONY: up-master ## Deploys jaeger stack for master Cluster
up-master: .init .env ${TEMP_COMPOSE}-master
@docker stack deploy --with-registry-auth --prune --compose-file ${TEMP_COMPOSE}-master ${STACK_NAME}
up-master: up

.PHONY: up-public ## Deploys jaeger stack for public acess deploy
up-public: up-dalco
Expand All @@ -55,14 +53,6 @@ ${TEMP_COMPOSE}-letsencrypt-http: docker-compose.yml docker-compose.letsencrypt.
${TEMP_COMPOSE}-letsencrypt-dns: docker-compose.yml docker-compose.letsencrypt.dns.yml
@${REPO_BASE_DIR}/scripts/docker-stack-config.bash $< docker-compose.letsencrypt.dns.yml > $@

.PHONY: ${TEMP_COMPOSE}-dalco
${TEMP_COMPOSE}-dalco: docker-compose.yml docker-compose.dalco.yml
@${REPO_BASE_DIR}/scripts/docker-stack-config.bash $< docker-compose.dalco.yml > $@

.PHONY: ${TEMP_COMPOSE}-master
${TEMP_COMPOSE}-master: docker-compose.yml docker-compose.master.yml
@${REPO_BASE_DIR}/scripts/docker-stack-config.bash $< docker-compose.master.yml > $@

.PHONY: ${TEMP_COMPOSE}-aws
${TEMP_COMPOSE}-aws: docker-compose.yml docker-compose.aws.yml
@${REPO_BASE_DIR}/scripts/docker-stack-config.bash $< docker-compose.aws.yml > $@
4 changes: 0 additions & 4 deletions services/jaeger/docker-compose.aws.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,3 @@ services:
jaeger:
dns: # Add this always for AWS, otherwise we get "No such image: " for docker services
8.8.8.8
deploy:
placement:
constraints:
- node.labels.jaeger==true
7 changes: 0 additions & 7 deletions services/jaeger/docker-compose.dalco.yml

This file was deleted.

7 changes: 0 additions & 7 deletions services/jaeger/docker-compose.master.yml

This file was deleted.

13 changes: 10 additions & 3 deletions services/jaeger/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ services:
environment:
MEMORY_MAX_TRACES: 10000
deploy:
placement:
constraints:
- node.labels.ops==true
labels:
- traefik.enable=true
- traefik.docker.network=${PUBLIC_NETWORK}
Expand All @@ -39,15 +42,19 @@ services:
hostname: "{{.Node.Hostname}}-{{.Task.Slot}}"
command:
- "--config=/etc/otel/config.yaml"
deploy:
placement:
constraints:
- node.labels.ops==true
ports:
- "4318:4318" # OTLP HTTP receiver
networks:
- public
- monitored
environment:
TRACING_OTEL_COLLECTOR_BATCH_SIZE: ${TRACING_OTEL_COLLECTOR_BATCH_SIZE}
TRACING_OTEL_COLLECTOR_SAMPLING_PERCENTAGE: ${TRACING_OTEL_COLLECTOR_SAMPLING_PERCENTAGE}
TRACING_OTEL_COLLECTOR_EXPORTER_ENDPOINT: ${TRACING_OTEL_COLLECTOR_EXPORTER_ENDPOINT}
TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE: ${TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE}
TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE: ${TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE}
TRACING_OPENTELEMETRY_COLLECTOR_EXPORTER_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_EXPORTER_ENDPOINT}
networks:
public:
external: true
Expand Down
6 changes: 3 additions & 3 deletions services/jaeger/opentelemetry-collector-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ receivers:
endpoint: 0.0.0.0:4318 # Default endpoint for OTLP over HTTP
exporters:
otlphttp:
endpoint: ${TRACING_OTEL_COLLECTOR_EXPORTER_ENDPOINT} # Adjust to your Jaeger endpoint
endpoint: ${TRACING_OPENTELEMETRY_COLLECTOR_EXPORTER_ENDPOINT} # Adjust to your Jaeger endpoint
debug:
verbosity: detailed
service:
Expand All @@ -21,6 +21,6 @@ service:
processors:
batch:
timeout: 5s
send_batch_size: ${TRACING_OTEL_COLLECTOR_BATCH_SIZE}
send_batch_size: ${TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE}
probabilistic_sampler:
sampling_percentage: ${TRACING_OTEL_COLLECTOR_SAMPLING_PERCENTAGE}
sampling_percentage: ${TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE}
6 changes: 3 additions & 3 deletions services/jaeger/template.env
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MONITORING_DOMAIN=${MONITORING_DOMAIN}
PUBLIC_NETWORK=${PUBLIC_NETWORK}
MONITORED_NETWORK=${MONITORED_NETWORK}
TRACING_OTEL_COLLECTOR_BATCH_SIZE=${TRACING_OTEL_COLLECTOR_BATCH_SIZE}
TRACING_OTEL_COLLECTOR_SAMPLING_PERCENTAGE=${TRACING_OTEL_COLLECTOR_SAMPLING_PERCENTAGE}
TRACING_OTEL_COLLECTOR_EXPORTER_ENDPOINT=${TRACING_OTEL_COLLECTOR_EXPORTER_ENDPOINT}
TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE=${TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE}
TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE=${TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE}
TRACING_OPENTELEMETRY_COLLECTOR_EXPORTER_ENDPOINT=${TRACING_OPENTELEMETRY_COLLECTOR_EXPORTER_ENDPOINT}
4 changes: 2 additions & 2 deletions services/monitoring/prometheus/prometheus-base.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# global config
# DOLLAR SIGNS NEED TO BE EXCAPED (see https://stackoverflow.com/a/61259844/10198629)
global:
scrape_interval: 20s # By default, scrape targets every 15 seconds.
evaluation_interval: 20s # By default, scrape targets every 15 seconds.
scrape_interval: 15s # By default, scrape targets every 15 seconds.
evaluation_interval: 15s # By default, scrape targets every 15 seconds.
# scrape_timeout global default would be (10s).

# Attach these labels to any time series or alerts when communicating with
Expand Down
2 changes: 1 addition & 1 deletion services/monitoring/prometheus/prometheus-simcore.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
scrape_configs:
# SIMCORE -------------------------------------------------------------------
- job_name: "simcore"
scrape_interval: 73s
scrape_interval: 15s
relabel_configs:
- source_labels: [__meta_dns_name]
separator: ;
Expand Down
4 changes: 2 additions & 2 deletions services/redis-commander/config.json.j2
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"label": "{{ label }}",
"host": "{{ host }}",
"port": "{{ port }}",
{%- if username %}
{%- if username.lower() not in ["", "null", "none"] %}
"username": "{{ username }}",
{%- endif %}
"password": "{{ password }}",
Expand All @@ -16,7 +16,7 @@
{#- special if for DALCO. Remove once dalco is not special case anymore #}
{%- if REDIS_STAGING_DATABASES %}
{%- for label in REDIS_STAGING_DATABASES.strip(',').split(',') %}
{{ redis_connection(label, REDIS_HOST, REDIS_PORT, REDIS_USER, REDIS_STAGING_PASSWORD, loop.index0, REDIS_SECURE) | indent(4) }},
{{ redis_connection(label, REDIS_STAGING_HOST, REDIS_STAGING_PORT, REDIS_STAGING_USER, REDIS_STAGING_PASSWORD, loop.index0, REDIS_STAGING_SECURE) | indent(4) }},
{%- endfor %}
{%- endif %}
{%- if REDIS_DATABASES %}
Expand Down
4 changes: 4 additions & 0 deletions services/redis-commander/template.env
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,9 @@ REDIS_SECURE=${REDIS_SECURE}
REDIS_DATABASES=${REDIS_DATABASES}
REDIS_STAGING_PASSWORD=${REDIS_STAGING_PASSWORD}
REDIS_STAGING_DATABASES=${REDIS_STAGING_DATABASES}
REDIS_STAGING_HOST=${REDIS_STAGING_HOST}
REDIS_STAGING_PORT=${REDIS_STAGING_PORT}
REDIS_STAGING_USER=${REDIS_STAGING_USER}
REDIS_STAGING_SECURE=${REDIS_STAGING_SECURE}
PUBLIC_NETWORK=${PUBLIC_NETWORK}
MONITORED_NETWORK=${MONITORED_NETWORK}
23 changes: 10 additions & 13 deletions services/simcore/docker-compose.deploy.aws.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@ services:
agent:
volumes:
- /docker/volumes/:/docker/volumes/


dask-sidecar:
deploy:
placement:
constraints:
- node.role == worker

efs-guardian:
volumes:
- efs_volume:/data/efs
Expand All @@ -19,6 +22,7 @@ services:
replicas: 3
static-webserver:
hostname: "{{.Node.Hostname}}-{{.Service.Name}}"

postgres:
deploy:
replicas: 0
Expand All @@ -38,9 +42,15 @@ services:
- "--metrics.prometheus.entryPoint=metrics"
- "--entryPoints.http.address=:80"
- "--entryPoints.http.forwardedHeaders.insecure"
- "--entryPoints.http.transport.respondingTimeouts.idleTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.http.transport.respondingTimeouts.writeTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.http.transport.respondingTimeouts.readTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.postgres.address=:5432"
- "--entryPoints.simcore_api.address=:10081"
- "--entryPoints.simcore_api.forwardedHeaders.insecure"
- "--entryPoints.simcore_api.transport.respondingTimeouts.idleTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.simcore_api.transport.respondingTimeouts.writeTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.simcore_api.transport.respondingTimeouts.readTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.traefik_monitor.address=:8080"
- "--entryPoints.traefik_monitor.forwardedHeaders.insecure"
- "--providers.swarm.endpoint=unix:///var/run/docker.sock"
Expand All @@ -65,19 +75,6 @@ services:
deploy:
replicas: 3

redis:
deploy:
labels:
- traefik.enable=true
- traefik.docker.network=${PUBLIC_NETWORK}
- traefik.tcp.services.${SWARM_STACK_NAME}_redis.loadBalancer.server.port=6379
- traefik.tcp.routers.redis.service=${SWARM_STACK_NAME}_redis
- traefik.tcp.routers.redis.entrypoints=redis
- traefik.tcp.routers.redis.tls=false
- traefik.tcp.routers.redis.rule=ClientIP(`10.0.0.0/8`) || ClientIP(`172.16.0.0/12`) || ClientIP(`192.168.0.0/16`)
networks:
- public

volumes:
efs_volume:
driver_opts:
Expand Down
6 changes: 6 additions & 0 deletions services/simcore/docker-compose.deploy.dalco.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,15 @@ services:
- "--metrics.prometheus.entryPoint=metrics"
- "--entryPoints.http.address=:80"
- "--entryPoints.http.forwardedHeaders.insecure"
- "--entryPoints.http.transport.respondingTimeouts.idleTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.http.transport.respondingTimeouts.writeTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.http.transport.respondingTimeouts.readTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.postgres.address=:5432"
- "--entryPoints.simcore_api.address=:10081"
- "--entryPoints.simcore_api.forwardedHeaders.insecure"
- "--entryPoints.simcore_api.transport.respondingTimeouts.idleTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.simcore_api.transport.respondingTimeouts.writeTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.simcore_api.transport.respondingTimeouts.readTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.traefik_monitor.address=:8080"
- "--entryPoints.traefik_monitor.forwardedHeaders.insecure"
- "--providers.swarm.endpoint=unix:///var/run/docker.sock"
Expand Down
9 changes: 6 additions & 3 deletions services/simcore/docker-compose.deploy.local.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,6 @@ services:
rabbit:
deploy:
replicas: 1
datcore-adapter:
deploy:
replicas: 0
redis:
deploy:
replicas: 1
Expand All @@ -89,9 +86,15 @@ services:
- "--metrics.prometheus.entryPoint=metrics"
- "--entryPoints.http.address=:80"
- "--entryPoints.http.forwardedHeaders.insecure"
- "--entryPoints.http.transport.respondingTimeouts.idleTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.http.transport.respondingTimeouts.writeTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.http.transport.respondingTimeouts.readTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.postgres.address=:5432"
- "--entryPoints.simcore_api.address=:10081"
- "--entryPoints.simcore_api.forwardedHeaders.insecure"
- "--entryPoints.simcore_api.transport.respondingTimeouts.idleTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.simcore_api.transport.respondingTimeouts.writeTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.simcore_api.transport.respondingTimeouts.readTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.traefik_monitor.address=:8080"
- "--entryPoints.traefik_monitor.forwardedHeaders.insecure"
- "--providers.swarm.endpoint=unix:///var/run/docker.sock"
Expand Down
6 changes: 6 additions & 0 deletions services/simcore/docker-compose.deploy.master.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,15 @@ services:
- "--metrics.prometheus.entryPoint=metrics"
- "--entryPoints.http.address=:80"
- "--entryPoints.http.forwardedHeaders.insecure"
- "--entryPoints.http.transport.respondingTimeouts.idleTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.http.transport.respondingTimeouts.writeTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.http.transport.respondingTimeouts.readTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.postgres.address=:5432"
- "--entryPoints.simcore_api.address=:10081"
- "--entryPoints.simcore_api.forwardedHeaders.insecure"
- "--entryPoints.simcore_api.transport.respondingTimeouts.idleTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.simcore_api.transport.respondingTimeouts.writeTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.simcore_api.transport.respondingTimeouts.readTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805
- "--entryPoints.traefik_monitor.address=:8080"
- "--entryPoints.traefik_monitor.forwardedHeaders.insecure"
- "--providers.swarm.endpoint=unix:///var/run/docker.sock"
Expand Down
5 changes: 1 addition & 4 deletions services/simcore/docker-compose.deploy.public.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@ services:
placement:
constraints:
- node.labels.postgres==true
datcore-adapter:
deploy:
replicas: 0

resource-usage-tracker:
deploy:
replicas: 3
Expand All @@ -23,7 +21,6 @@ services:
- prometheus-job=traefik_simcore_production
# oSparc postgres
- traefik.tcp.routers.${SWARM_STACK_NAME}_postgresRoute.entrypoints=postgres
- traefik.tcp.routers.${SWARM_STACK_NAME}_postgresRoute.entrypoints=${PREFIX_STACK_NAME}_postgres
- traefik.tcp.routers.${SWARM_STACK_NAME}_postgresRoute.tls=false
- traefik.tcp.routers.${SWARM_STACK_NAME}_postgresRoute.service=${SWARM_STACK_NAME}_postgresRoute
- traefik.tcp.services.${SWARM_STACK_NAME}_postgresRoute.loadbalancer.server.port=5432
Expand Down
1 change: 1 addition & 0 deletions services/simcore/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,7 @@ services:
networks:
- monitored
deploy:
replicas: ${SIMCORE_DATCORE_ADAPTER_REPLICAS}
update_config:
parallelism: 2
order: start-first
Expand Down
5 changes: 0 additions & 5 deletions services/traefik/docker-compose.aws.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ services:
- "--entryPoints.https.forwardedHeaders.insecure"
- "--providers.file.directory=/etc/traefik/"
- "--providers.file.watch=true"
- '--entryPoints.redis.address=:6379'
ports:
- target: 6379
published: 31379
mode: host
environment:
- AWS_ACCESS_KEY_ID=${ROUTE53_DNS_CHALLANGE_ACCESS_KEY}
- AWS_SECRET_ACCESS_KEY=${ROUTE53_DNS_CHALLANGE_SECRET_KEY}
Expand Down

0 comments on commit c877112

Please sign in to comment.