Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update with KubeDB configuration #1

Merged
merged 4 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@ ARG ARCH="amd64"
ARG OS="linux"
FROM quay.io/prometheus/golang-builder AS builder

# Get sql_exporter
ADD . /go/src/github.com/burningalchemist/sql_exporter
WORKDIR /go/src/github.com/burningalchemist/sql_exporter
# Get sql_exporter fork
ADD . /go/src/github.com/kubedb/mssqlserver_exporter
WORKDIR /go/src/github.com/kubedb/mssqlserver_exporter

# Do makefile
RUN make

# Make image and copy build sql_exporter
FROM quay.io/prometheus/busybox-${OS}-${ARCH}:latest
LABEL maintainer="The Prometheus Authors <[email protected]>"
COPY --from=builder /go/src/github.com/burningalchemist/sql_exporter/sql_exporter /bin/sql_exporter
COPY --from=builder /go/src/github.com/kubedb/mssqlserver_exporter/sql_exporter /bin/sql_exporter

# Add kubedb configuration files to /etc/sql-exporter/
ADD kubedb /etc/sql-exporter/

EXPOSE 9399
USER nobody
Expand Down
207 changes: 207 additions & 0 deletions kubedb/mssql_standard.collector.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
# A collector defining standard metrics for Microsoft SQL Server.
#
# It is required that the SQL Server user has the following permissions:
#
# GRANT VIEW ANY DEFINITION TO
# GRANT VIEW SERVER STATE TO
#
collector_name: mssql_standard

# Similar to global.min_interval, but applies to the queries defined by this collector only.
#min_interval: 0s

metrics:
- metric_name: mssql_local_time_seconds
type: gauge
help: 'Local time in seconds since epoch (Unix time).'
values: [unix_time]
query: |
SELECT DATEDIFF(second, '19700101', GETUTCDATE()) AS unix_time

- metric_name: mssql_connections
type: gauge
help: 'Number of active connections.'
key_labels:
- db
values: [count]
query: |
SELECT DB_NAME(sp.dbid) AS db, COUNT(sp.spid) AS count
FROM sys.sysprocesses sp
GROUP BY DB_NAME(sp.dbid)

#
# Collected from sys.dm_os_performance_counters
#
- metric_name: mssql_deadlocks
type: counter
help: 'Number of lock requests that resulted in a deadlock.'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Number of Deadlocks/sec' AND instance_name = '_Total'

- metric_name: mssql_user_errors
type: counter
help: 'Number of user errors.'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Errors/sec' AND instance_name = 'User Errors'

- metric_name: mssql_kill_connection_errors
type: counter
help: 'Number of severe errors that caused SQL Server to kill the connection.'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Errors/sec' AND instance_name = 'Kill Connection Errors'

- metric_name: mssql_page_life_expectancy_seconds
type: gauge
help: 'The minimum number of seconds a page will stay in the buffer pool on this node without references.'
values: [cntr_value]
query: |
SELECT top(1) cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Page life expectancy'

- metric_name: mssql_batch_requests
type: counter
help: 'Number of command batches received.'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Batch Requests/sec'

- metric_name: mssql_log_growths
type: counter
help: 'Number of times the transaction log has been expanded, per database.'
key_labels:
- db
values: [cntr_value]
query: |
SELECT rtrim(instance_name) AS db, cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Log Growths' AND instance_name <> '_Total'

- metric_name: mssql_buffer_cache_hit_ratio
type: gauge
help: 'Ratio of requests that hit the buffer cache'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters
WHERE [counter_name] = 'Buffer cache hit ratio'

- metric_name: mssql_checkpoint_pages_sec
type: gauge
help: 'Checkpoint Pages Per Second'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters
WHERE [counter_name] = 'Checkpoint pages/sec'

#
# Collected from sys.dm_io_virtual_file_stats
#
- metric_name: mssql_io_stall_seconds
type: counter
help: 'Stall time in seconds per database and I/O operation.'
key_labels:
- db
value_label: operation
values:
- read
- write
query_ref: mssql_io_stall
- metric_name: mssql_io_stall_total_seconds
type: counter
help: 'Total stall time in seconds per database.'
key_labels:
- db
values:
- io_stall
query_ref: mssql_io_stall

#
# Collected from sys.dm_os_process_memory
#
- metric_name: mssql_resident_memory_bytes
type: gauge
help: 'SQL Server resident memory size (AKA working set).'
values: [resident_memory_bytes]
query_ref: mssql_process_memory

- metric_name: mssql_virtual_memory_bytes
type: gauge
help: 'SQL Server committed virtual memory size.'
values: [virtual_memory_bytes]
query_ref: mssql_process_memory

- metric_name: mssql_memory_utilization_percentage
type: gauge
help: 'The percentage of committed memory that is in the working set.'
values: [memory_utilization_percentage]
query_ref: mssql_process_memory

- metric_name: mssql_page_fault_count
type: counter
help: 'The number of page faults that were incurred by the SQL Server process.'
values: [page_fault_count]
query_ref: mssql_process_memory

#
# Collected from sys.dm_os_sys_memory
#
- metric_name: mssql_os_memory
type: gauge
help: 'OS physical memory, used and available.'
value_label: 'state'
values: [used, available]
query: |
SELECT
(total_physical_memory_kb - available_physical_memory_kb) * 1024 AS used,
available_physical_memory_kb * 1024 AS available
FROM sys.dm_os_sys_memory

- metric_name: mssql_os_page_file
type: gauge
help: 'OS page file, used and available.'
value_label: 'state'
values: [used, available]
query: |
SELECT
(total_page_file_kb - available_page_file_kb) * 1024 AS used,
available_page_file_kb * 1024 AS available
FROM sys.dm_os_sys_memory

queries:
# Populates `mssql_io_stall` and `mssql_io_stall_total`
- query_name: mssql_io_stall
query: |
SELECT
cast(DB_Name(a.database_id) as varchar) AS [db],
sum(io_stall_read_ms) / 1000.0 AS [read],
sum(io_stall_write_ms) / 1000.0 AS [write],
sum(io_stall) / 1000.0 AS io_stall
FROM
sys.dm_io_virtual_file_stats(null, null) a
INNER JOIN sys.master_files b ON a.database_id = b.database_id AND a.file_id = b.file_id
GROUP BY a.database_id

# Populates `mssql_resident_memory_bytes`, `mssql_virtual_memory_bytes`, `mssql_memory_utilization_percentage` and
# `mssql_page_fault_count`.
- query_name: mssql_process_memory
query: |
SELECT
physical_memory_in_use_kb * 1024 AS resident_memory_bytes,
virtual_address_space_committed_kb * 1024 AS virtual_memory_bytes,
memory_utilization_percentage,
page_fault_count
FROM sys.dm_os_process_memory

46 changes: 46 additions & 0 deletions kubedb/sql_exporter.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Global settings and defaults.
global:
scrape_timeout: 10s
scrape_timeout_offset: 500ms
min_interval: 0s
max_connections: 3
max_idle_connections: 3

# The target to monitor and the collectors to execute on it.
target:
name: mssql_database
data_source_name: 'sqlserver://prom_user:[email protected]:1433/master'
# Example for KubeDB
# data_source_name: 'sqlserver://username:password@<pod_name>.<gov_svc_name>.<namespace>.svc.cluster.local:1433/master'
collectors: [mssql_*]
enable_ping: true

# Collectors and metrics definitions
collectors:
- collector_name: mssql_sample
metrics:
- metric_name: mssql_log_growths
type: counter
help: 'Total number of times the transaction log has been expanded since last restart, per database.'
key_labels:
- db
static_labels:
env: dev
region: europe
values: [counter]
query: |
SELECT rtrim(instance_name) AS db, cntr_value AS counter
FROM sys.dm_os_performance_counters
WHERE counter_name = 'Log Growths' AND instance_name <> '_Total'

- metric_name: mssql_hostname
type: gauge
help: 'Database server hostname'
key_labels:
- hostname
static_value: 1
query: |
SELECT @@SERVERNAME AS hostname

collector_files:
- "*.collector.yml"
Loading