Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ Small Python script for sending Varnish statistics to Graphite.
## Usage

```
usage: varnish-graphite [-h] [-H HOST] [-p PORT] [-n NAME] [-P PREFIX]
[-i INTERVAL] [-b BUFFER_SIZE] [-B MAX_BUFFER_SIZE]
usage: varnish-graphite [-h] [-H HOST] [-p PORT] [-n NAME] [-f FORMAT]
[-P PREFIX] [-i INTERVAL] [-b BUFFER_SIZE]
[-B MAX_BUFFER_SIZE]

Collect and stream Varnish statistics to Graphite.

Expand All @@ -16,7 +17,11 @@ optional arguments:
-p PORT, --port PORT The graphite server port (default: 2003)
-n NAME, --name NAME Specifies the name of the varnishd instance to get
logs from. If -n is not specified, the host name is
used. (default: albatross-3.local)
used. (default: $HOSTNAME)
-f FORMAT, --format FORMAT
Specifies the format of the data to collect from
varnishstat. If -f is not specified, json is used.
(default: json)
-P PREFIX, --prefix PREFIX
The prefix for metric names (default: varnish)
-i INTERVAL, --interval INTERVAL
Expand Down
334 changes: 205 additions & 129 deletions varnish-graphite
Original file line number Diff line number Diff line change
Expand Up @@ -5,146 +5,222 @@
# Collect statistics from Varnish, format them, and send them to Graphite.

import argparse
import json
try:
import json
except:
pass
import logging
import socket
from subprocess import check_output
import subprocess
from time import time, sleep
try:
from lxml import objectify
except:
pass


class GraphiteClient:

def __init__(self, host='127.0.0.1', port=2003, prefix='varnish', buffer_size=1428, max_buffer_size=33554432):
self.prefix = prefix
self.host = host
self.port = port
self.sock = None

self.sendbuf = ''
self.buffer_size = buffer_size
self.max_buffer_size = max_buffer_size

self.connect()

def connect(self):
if self.sock == None:
try:
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
except socket.error:
def __init__(
self, host='127.0.0.1', port=2003, prefix='varnish',
buffer_size=1428, max_buffer_size=33554432
):
self.prefix = prefix
self.host = host
self.port = port
self.sock = None
logging.warning("Connection failed. Re-trying on the next interval")
try:
self.sock.connect((self.host, int(self.port)))
except socket.error:
self.sock.close()

self.sendbuf = ''
self.buffer_size = buffer_size
self.max_buffer_size = max_buffer_size

self.connect()

def connect(self):
if self.sock is None:
try:
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
except socket.error:
self.sock = None
logging.warning(
"Connection failed. Re-trying on the next interval"
)
try:
self.sock.connect((self.host, int(self.port)))
except socket.error:
self.sock.close()
self.sock = None
logging.warning(
"Connection failed. Re-trying on the next interval"
)
if self.sock is not None:
logging.info("Connected to {0}:{1}".format(self.host, self.port))

def send_metrics(self, metrics):
if self.sock is None:
self.connect()
for stat in metrics:
if (self.sock is not None and
len(self.sendbuf) +
len("{0}.{1}".format(self.prefix, stat)) >
self.buffer_size):
logging.info(
"Sending {0} bytes to {1}".format(
len(self.sendbuf),
"{0}:{1}".format(self.host, self.port)
)
)
try:
self.sock.send(self.sendbuf)
self.sendbuf = ''
except socket.error:
logging.warning(
"Lost connection to {0}:{1}".format(
self.host, self.port
)
)
self.reconnect()
if (len(self.sendbuf) +
len("{0}.{1}".format(self.prefix, stat)) <
int(self.max_buffer_size)):
self.sendbuf += "{0}.{1}\n".format(self.prefix, stat)

def disconnect(self):
if self.sock is not None:
self.sock.close()
self.sock = None
logging.warning("Connection failed. Re-trying on the next interval")
if self.sock != None:
logging.info("Connected to {}:{}".format(self.host, self.port))

def send_metrics(self, metrics):
if self.sock == None:
self.connect()
for stat in metrics:
if self.sock != None and len(self.sendbuf) + len("{}.{}".format(self.prefix, stat)) > self.buffer_size:
logging.info("Sending {} bytes to {}".format(len(self.sendbuf), "{}:{}".format(self.host, self.port)))
try:
self.sock.send(self.sendbuf)
self.sendbuf = ''
except socket.error:
logging.warning("Lost connection to {}:{}".format(self.host, self.port))
self.reconnect()
if len(self.sendbuf) + len("{}.{}".format(self.prefix, stat)) < int(self.max_buffer_size):
self.sendbuf += "{}.{}\n".format(self.prefix, stat)

def disconnect(self):
if self.sock != None:
self.sock.close()
self.sock = None
logging.warning("Disconnected from {}:{}".format(self.host, self.port))

def reconnect(self):
self.disconnect()
self.connect()

def collect_metrics(name):
varnish_stats = json.loads(check_output(['varnishstat', '-n', name, '-1', '-j']))
timestamp = int(time())

status = []
fmt = lambda x, y: "{} {} {}".format(x, varnish_stats[y]['value'], timestamp)

metrics = [('cache.hit', 'cache_hit'),
('cache.hitpass', 'cache_hitpass'),
('cache.miss', 'cache_miss'),
('backend.conn', 'backend_conn'),
('backend.unhealthy', 'backend_unhealthy'),
('backend.busy', 'backend_busy'),
('backend.fail', 'backend_fail'),
('backend.reuse', 'backend_reuse'),
('backend.toolate', 'backend_toolate'),
('backend.recycle', 'backend_recycle'),
('backend.retry', 'backend_retry'),
('backend.req', 'backend_req'),
('client.conn', 'client_conn'),
('client.drop', 'client_drop'),
('client.req', 'client_req'),
('client.hdrbytes', 's_hdrbytes'),
('client.bodybytes', 's_bodybytes')]

for (name, metric) in metrics:
status.append(fmt(name, metric))

return status
logging.warning(
"Disconnected from {0}:{1}".format(
self.host, self.port
)
)

def reconnect(self):
self.disconnect()
self.connect()


def collect_metrics(name, format):
if format == 'json':
varnish_stats = json.loads(
subprocess.Popen(
['varnishstat', '-n', name, '-1', '-j'],
stdout=subprocess.PIPE,
).communicate()[0]
)
elif format == 'xml':
varnishstat = objectify.fromstring(
subprocess.Popen(
['varnishstat', '-n', name, '-1', '-x'],
stdout=subprocess.PIPE,
).communicate()[0]
)
varnish_stats = {}
for stat in varnishstat.stat:
varnish_stats[str(stat.name)] = {'value': str(stat.value)}
else:
logging.error("unknown format: %s", format)
timestamp = int(time())

status = []
fmt = lambda x, y: "{0} {1} {2}".format(
x, varnish_stats[y]['value'], timestamp
)

metrics = [('cache.hit', 'cache_hit'),
('cache.hitpass', 'cache_hitpass'),
('cache.miss', 'cache_miss'),
('backend.conn', 'backend_conn'),
('backend.unhealthy', 'backend_unhealthy'),
('backend.busy', 'backend_busy'),
('backend.fail', 'backend_fail'),
('backend.reuse', 'backend_reuse'),
('backend.toolate', 'backend_toolate'),
('backend.recycle', 'backend_recycle'),
('backend.retry', 'backend_retry'),
('backend.req', 'backend_req'),
('client.conn', 'client_conn'),
('client.drop', 'client_drop'),
('client.req', 'client_req'),
('client.hdrbytes', 's_hdrbytes'),
('client.bodybytes', 's_bodybytes')]

for (name, metric) in metrics:
status.append(fmt(name, metric))

return status


def main():
parser = argparse.ArgumentParser(
description='Collect and stream Varnish statistics to Graphite.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)

parser.add_argument('-H', '--host',
default='127.0.0.1',
help='The graphite server host')
parser.add_argument('-p', '--port',
default=2003,
help='The graphite server port')
parser.add_argument('-n', '--name',
default=socket.gethostname(),
help="""Specifies the name of the varnishd instance to get logs from. If
-n is not specified, the host name is used.""")
parser.add_argument('-P', '--prefix',
default='varnish',
help='The prefix for metric names')
parser.add_argument('-i', '--interval',
default=10,
help='The collection interval in seconds')
# Ethernet - (IPv6 + TCP) = 1500 - (40 + 32) = 1428
parser.add_argument('-b', '--buffer-size', dest='buffer_size',
default=1428,
help='The number of bytes to send each time')
# Default 32M max buffer size. Discard anything extra if we aren't able to send.
parser.add_argument('-B', '--max-buffer-size', dest='max_buffer_size',
default=33554432,
help='The maximum number of bytes to buffer when reconnecting')

args = parser.parse_args()

logging.basicConfig(level=logging.INFO)

try:
c = GraphiteClient(args.host, args.port, args.prefix, args.buffer_size, args.max_buffer_size)
while True:
interval_start = time()

c.send_metrics(collect_metrics(args.name))

interval_used = time() - interval_start
if interval_used < float(args.interval):
sleep(float(args.interval) - interval_used)
except KeyboardInterrupt:
c.disconnect();
parser = argparse.ArgumentParser(
description='Collect and stream Varnish statistics to Graphite.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
'-H', '--host',
default='127.0.0.1',
help='The graphite server host'
)
parser.add_argument(
'-p', '--port',
default=2003,
help='The graphite server port'
)
parser.add_argument(
'-n', '--name',
default=socket.gethostname(),
help="""Specifies the name of the varnishd instance to get logs from. If
-n is not specified, the host name is used."""
)
parser.add_argument(
'-f', '--format',
default="json",
help="""Specifies the format of the data to collect from varnishstat. If
-f is not specified, json is used."""
)
parser.add_argument(
'-P', '--prefix',
default='varnish',
help='The prefix for metric names'
)
parser.add_argument(
'-i', '--interval',
default=10,
help='The collection interval in seconds'
)
# Ethernet - (IPv6 + TCP) = 1500 - (40 + 32) = 1428
parser.add_argument(
'-b', '--buffer-size', dest='buffer_size',
default=1428,
help='The number of bytes to send each time'
)
# Default 32M max buffer size.
# Discard anything extra if we aren't able to send.
parser.add_argument(
'-B', '--max-buffer-size', dest='max_buffer_size',
default=33554432,
help='The maximum number of bytes to buffer when reconnecting'
)

args = parser.parse_args()

logging.basicConfig(level=logging.INFO)

try:
c = GraphiteClient(
args.host, args.port, args.prefix,
args.buffer_size, args.max_buffer_size,
)
while True:
interval_start = time()

c.send_metrics(collect_metrics(args.name, args.format))

interval_used = time() - interval_start
if interval_used < float(args.interval):
sleep(float(args.interval) - interval_used)
except KeyboardInterrupt:
c.disconnect()

if __name__ == "__main__":
main()

main()