Skip to content

Commit

Permalink
Merge pull request #66 from ai-cfia/65-custom-dockerfile-for-instrume…
Browse files Browse the repository at this point in the history
…ntation-stack

Issue #65: push new dockerfiles
  • Loading branch information
SonOfLope authored Nov 1, 2024
2 parents 7d3a5e6 + da3ee07 commit 5b03300
Show file tree
Hide file tree
Showing 10 changed files with 279 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/custom-dockerfile-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ on:
- otel-auto
- webtop-fedora-kde
- webtop-ubuntu-kde
- alloy
- tempo
- loki
- prometheus
tag:
required: true
description: Version to tag the image
Expand Down
6 changes: 6 additions & 0 deletions dockerfiles/alloy/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
FROM grafana/alloy:latest

COPY config.alloy /etc/alloy/config.alloy
COPY endpoints.json /etc/alloy/endpoints.json

CMD ["run", "--server.http.listen-addr=0.0.0.0:12345", "--stability.level=public-preview", "/etc/alloy/config.alloy"]
87 changes: 87 additions & 0 deletions dockerfiles/alloy/config.alloy
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Load endpoint credentials and options
local.file "endpoints" {
filename = "/etc/alloy/endpoints.json"
}

// Metrics scraping configuration
prometheus.scrape "infrastructure" {
targets = [
{"__address__" = "prometheus:9090", group = "infrastructure", service = "prometheus"},
{"__address__" = "tempo:3200", group = "infrastructure", service = "tempo"},
{"__address__" = "loki:3100", group = "infrastructure", service = "loki"},
{"__address__" = "grafana:3000", group = "infrastructure", service = "grafana"},
]
scrape_interval = "15s"
forward_to = [prometheus.remote_write.default.receiver]
}

// OTLP Receiver for OpenTelemetry data
otelcol.receiver.otlp "default" {
grpc { }
http { }

output {
metrics = [otelcol.exporter.prometheus.default.input]
logs = [otelcol.exporter.loki.default.input]
traces = [otelcol.exporter.otlp.tempo.input]
}
}

// Memory Limiter Processor to manage memory
otelcol.processor.memory_limiter "default" {
check_interval = "1s"
limit = "1GiB"
output {
metrics = [otelcol.processor.batch.default.input]
logs = [otelcol.processor.batch.default.input]
traces = [otelcol.processor.batch.default.input]
}
}

// Batch Processor for batching trace data
otelcol.processor.batch "default" {
output {
metrics = [otelcol.exporter.prometheus.default.input]
logs = [otelcol.exporter.loki.default.input]
traces = [otelcol.exporter.otlp.tempo.input]
}
}

// Logging configuration
logging {
level = "info"
format = "logfmt"
}

// Loki Exporter for logs
otelcol.exporter.loki "default" {
forward_to = [loki.write.default.receiver]
}

// Write logs to the local Loki instance
loki.write "default" {
endpoint {
url = "http://loki:3100/loki/api/v1/push"
}
}

// Tempo Exporter for trace data
otelcol.exporter.otlp "tempo" {
client {
endpoint = "http://tempo:4317"
tls {
insecure = true
}
}
}

otelcol.exporter.prometheus "default" {
forward_to = [prometheus.remote_write.default.receiver]
}

// Remote write configuration to Prometheus instance
prometheus.remote_write "default" {
endpoint {
url = "http://prometheus:9090/api/v1/write"
}
}
24 changes: 24 additions & 0 deletions dockerfiles/alloy/endpoints.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"metrics": {
"url": "http://prometheus:9009/api/v1/push",
"basicAuth": {
"username": "",
"password": ""
}
},
"logs": {
"url": "http://loki:3100/loki/api/v1/push",
"basicAuth": {
"username": "",
"password": ""
}
},
"traces": {
"url": "http://tempo:4317",
"basicAuthToken": "",
"tls": {
"insecure": true,
"insecureSkipVerify": true
}
}
}
5 changes: 5 additions & 0 deletions dockerfiles/loki/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM grafana/loki:latest

COPY loki.yaml /etc/loki/loki.yaml

CMD ["--pattern-ingester.enabled=true", "-config.file=/etc/loki/loki.yaml"]
36 changes: 36 additions & 0 deletions dockerfiles/loki/loki.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
auth_enabled: false

server:
http_listen_port: 3100

common:
path_prefix: /data/loki
replication_factor: 1
ring:
kvstore:
store: inmemory

ingester:
wal:
enabled: false

schema_config:
configs:
- from: "2023-01-05"
index:
period: 24h
prefix: index_
object_store: filesystem
schema: v13
store: tsdb

storage_config:
filesystem:
directory: /data/loki/chunks
tsdb_shipper:
active_index_directory: /data/loki/tsdb-index
cache_location: /data/loki/tsdb-cache

limits_config:
reject_old_samples: true
reject_old_samples_max_age: 168h
5 changes: 5 additions & 0 deletions dockerfiles/prometheus/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM prom/prometheus:latest

COPY prometheus.yaml /etc/prometheus/prometheus.yml

CMD ["--config.file=/etc/prometheus/prometheus.yml", "--web.enable-remote-write-receiver"]
13 changes: 13 additions & 0 deletions dockerfiles/prometheus/prometheus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
global:
scrape_interval: 15s
evaluation_interval: 15s

scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']

- job_name: 'backend'
metrics_path: '/metrics'
static_configs:
- targets: ['backend:5000']
5 changes: 5 additions & 0 deletions dockerfiles/tempo/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM grafana/tempo:latest

COPY tempo.yaml /etc/tempo.yaml

CMD ["-config.file=/etc/tempo.yaml"]
94 changes: 94 additions & 0 deletions dockerfiles/tempo/tempo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# For more information on this configuration, see the complete reference guide at
# https://grafana.com/docs/tempo/latest/configuration/

# Enables result streaming from Tempo (to Grafana) via HTTP.
stream_over_http_enabled: true

# Configure the server block.
server:
# Listen for all incoming requests on port 3200.
http_listen_port: 3200

# The distributor receives incoming trace span data for the system.
distributor:
receivers: # This configuration will listen on all ports and protocols that tempo is capable of.
jaeger: # The receivers all come from the OpenTelemetry collector. More configuration information can
protocols: # be found there: https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver
thrift_http: #
grpc: # For a production deployment you should only enable the receivers you need!
thrift_binary: #
thrift_compact:
otlp:
protocols:
http:
grpc: # This example repository only utilises the OTLP gRPC receiver on port 4317.
zipkin: # Receive trace data in any supported Zipkin format.

# The ingester receives data from the distributor and processes it into indices and blocks.
ingester:
trace_idle_period: 10s # The length of time after a trace has not received spans to consider it complete and flush it.
max_block_bytes: 1_000_000 # Cut the head block when it hits this size or
max_block_duration: 5m # this much time passes

# The compactor block configures the compactor responsible for compacting TSDB blocks.
compactor:
compaction:
compaction_window: 1h # Blocks in this time window will be compacted together.
max_block_bytes: 100_000_000 # Maximum size of a compacted block.
block_retention: 1h # How long to keep blocks. Default is 14 days, this demo system is short-lived.
compacted_block_retention: 10m # How long to keep compacted blocks stored elsewhere.

# Configuration block to determine where to store TSDB blocks.
storage:
trace:
backend: local # Use the local filesystem for block storage. Not recommended for production systems.
block:
bloom_filter_false_positive: .05 # Bloom filter false positive rate. lower values create larger filters but fewer false positives.
# Write Ahead Log (WAL) configuration.
wal:
path: /tmp/tempo/wal # Directory to store the the WAL locally.
# Local configuration for filesystem storage.
local:
path: /tmp/tempo/blocks # Directory to store the TSDB blocks.
# Pool used for finding trace IDs.
pool:
max_workers: 100 # Worker pool determines the number of parallel requests to the object store backend.
queue_depth: 10000 # Maximum depth for the querier queue jobs. A job is required for each block searched.

# Configures the metrics generator component of Tempo.
metrics_generator:
# Specifies which processors to use.
processor:
# Span metrics create metrics based on span type, duration, name and service.
span_metrics:
# Configure extra dimensions to add as metric labels.
dimensions:
- http.method
- http.target
- http.status_code
- service.version
# Service graph metrics create node and edge metrics for determinng service interactions.
service_graphs:
# Configure extra dimensions to add as metric labels.
dimensions:
- http.method
- http.target
- http.status_code
- service.version
# Configure the local blocks processor.
local_blocks:
# Ensure that metrics blocks are flushed to storage so TraceQL metrics queries against historical data.
flush_to_storage: true
# The registry configuration determines how to process metrics.
registry:
collection_interval: 5s
external_labels:
source: tempo
storage:
path: /tmp/tempo/generator/wal
traces_storage:
path: /tmp/tempo/generator/traces

# Global override configuration.
overrides:
metrics_generator_processors: ['service-graphs', 'span-metrics','local-blocks'] # The types of metrics generation to enable for each tenant.

0 comments on commit 5b03300

Please sign in to comment.