diff --git a/.gitignore b/.gitignore index 53dd873..ecf50f9 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,7 @@ .vagrant/ .idea/ .vscode/ - +.noseids *.pyc ### JetBrains template diff --git a/README.md b/README.md index b669c6e..4eab6d6 100644 --- a/README.md +++ b/README.md @@ -19,12 +19,12 @@ Overview (DCs) # faq -#### how can I filter by more than 1 value? +#### how can I filter by more than 1 value? -Amend the URL and add all the filters together as a comma-separated list, e.g.: +Amend the URL and add all the filters together as a comma-separated list, e.g.: http://localhost:5000/filtered/aaa,bbb,ccc,ddd -#### what do the filters filter by ? +#### what do the filters filter by ? They filter based on the hosts' subscriptions, except in the Events view where they filter on all properties of the check and the host. @@ -85,7 +85,7 @@ Add via pip install or via your package management ``` useradd -r sensu-grid - + ``` ## run as a service @@ -121,10 +121,13 @@ dcs: port: 4567 user: apiuser password: apipassword - + app: refresh: 60 bg_color: #333333 + # This is a python requests layer timeout, as by default, it does not timeout + requests_timeout: 10 + logging_level: info ``` ## run locally / manually diff --git a/gridcheck.py b/gridcheck.py index 2c82762..47b93a3 100644 --- a/gridcheck.py +++ b/gridcheck.py @@ -1,13 +1,14 @@ import requests import re + def check_connection(dc): url = 'http://{0}:{1}/info'.format(dc['url'], dc['port']) try: if 'user' and 'password' in dc: - r = requests.get(url, auth=(dc['user'], dc['password'])) + r = requests.get(url, auth=(dc['user'], dc['password']), timeout=30) else: - r = requests.get(url) + r = requests.get(url, timeout=30) if r: return True else: diff --git a/gridconfig.py b/gridconfig.py index affa89b..175bcf1 100644 --- a/gridconfig.py +++ b/gridconfig.py @@ -1,6 +1,7 @@ import os import yaml + class Config(object): DEBUG = False TESTING = False @@ -25,4 +26,3 @@ class ProdConfig(Config): class TestingConfig(Config): TESTING = True DEBUG = True - diff --git a/griddata.py b/griddata.py index 531e3aa..95d2f65 100644 --- a/griddata.py +++ b/griddata.py @@ -1,96 +1,144 @@ -from gridcheck import * +import logging +import requests +import six +from functools import partial +from multiprocessing.dummy import Pool as ThreadPool -def get_filter_data(dcs): - filter_data = [] - data = None +from gridcheck import check_stash + + +LOGGER = logging.getLogger(__name__) + + +def _filter_data(timeout, dc): + filter_data = list() r = None + data = None + LOGGER.debug("Retrieving filters for datacenter: {0}".format(dc['name'])) + url = 'http://{0}:{1}/clients'.format(dc['url'], dc['port']) + try: + if 'user' and 'password' in dc: + r = requests.get(url, auth=(dc['user'], dc['password']), timeout=timeout) + else: + r = requests.get(url, timeout=timeout) + r.raise_for_status() + except Exception as ex: + LOGGER.error("Got exception while filtering on clients: {0}".format(str(ex))) + pass + finally: + if r: + data = r.json() + r.close() + else: + LOGGER.error("no reponse") - for dc in dcs: - url = 'http://{0}:{1}/clients'.format(dc['url'], dc['port']) - try: - if 'user' and 'password' in dc: - r = requests.get(url, auth=(dc['user'], dc['password'])) - else: - r = requests.get(url) - except Exception: - pass - finally: - if r: - data = r.json() - r.close() + if data: + for i in data: + for s in i['subscriptions']: + if s not in filter_data: + filter_data.append(s) + else: + LOGGER.error("No response data") + LOGGER.debug("Filter Retrieval for datacenter {0} complete".format(dc['name'])) + return filter_data - if data: - for i in data: - for s in i['subscriptions']: - if s not in filter_data: - filter_data.append(s) - if filter_data: - assert type(filter_data) == list - return filter_data +def get_filter_data(dcs, timeout): + aggregated = list() + final_aggregated_filter_data = [] + pool = ThreadPool(len(dcs)) + func = partial(_filter_data, timeout) + try: + aggregated = pool.map(func, dcs) + assert type(aggregated) == list + for filterdata in aggregated: + if filterdata not in final_aggregated_filter_data: + final_aggregated_filter_data.append(filterdata) + + except Exception as e: + LOGGER.error("unable to get filter data, ex: {0}".format(e)) + finally: + pool.close() - return [] + return final_aggregated_filter_data[0] -def get_data(dc): +def get_data(dc, timeout): + LOGGER.debug("Retrieving data for datacenter: {0}".format(dc['name'])) url = 'http://{0}:{1}/results'.format(dc['url'], dc['port']) data = None r = None try: if 'user' and 'password' in dc: - r = requests.get(url, auth=(dc['user'], dc['password'])) + r = requests.get(url, auth=(dc['user'], dc['password']), timeout=timeout) else: - r = requests.get(url) - - except Exception: + r = requests.get(url, timeout=timeout) + r.raise_for_status() + except Exception as ex: + LOGGER.error("Got exception while retrieving data for dc: {0} ex: {1}".format(dc, str(ex))) pass finally: if r: data = r.json() r.close() + else: + LOGGER.error("no reponse") + LOGGER.debug("Data Retrieval for datacenter {0} complete".format(dc['name'])) return data -def get_clients(dc): +def get_clients(dc, timeout): + LOGGER.debug("Retrieving clients for datacenter: {0}".format(dc['name'])) url = 'http://{0}:{1}/clients'.format(dc['url'], dc['port']) data = None r = None try: if 'user' and 'password' in dc: - r = requests.get(url, auth=(dc['user'], dc['password'])) + r = requests.get(url, auth=(dc['user'], dc['password']), timeout=timeout) + r.raise_for_status() data = r.json() else: - r = requests.get(url) + r = requests.get(url, timeout=timeout) data = r.json() - except Exception: + except Exception as ex: + LOGGER.error("Got exception while retrieving clients for dc: {0} ex: {1}".format(dc, str(ex))) pass finally: if r: r.close() + else: + LOGGER.error("no reponse") + LOGGER.debug("Client Retrieval for datacenter {0} complete".format(dc['name'])) return data -def get_stashes(dc): +def get_stashes(dc, timeout): + LOGGER.debug("Retrieving stashes for datacenter: {0}".format(dc['name'])) url = 'http://{0}:{1}/silenced'.format(dc['url'], dc['port']) data = None r = None try: if 'user' and 'password' in dc: - r = requests.get(url, auth=(dc['user'], dc['password'])) + r = requests.get(url, auth=(dc['user'], dc['password']), timeout=timeout) + r.raise_for_status() data = r.json() else: - r = requests.get(url) + r = requests.get(url, timeout=timeout) data = r.json() - except Exception: + except Exception as ex: + LOGGER.error("Got exception while retrieving stashes for dc: {0} ex: {1}".format(dc, str(ex))) pass finally: if r: r.close() + else: + LOGGER.error("no reponse") + LOGGER.debug("Stash Retrieval for datacenter {0} complete".format(dc['name'])) return data @@ -104,7 +152,15 @@ def filter_object(obj, search): if filter_object(value, search): return True else: - return unicode(search) in unicode(obj) + LOGGER.debug("search type {0} // obj type {1}".format(type(search), type(obj))) + try: + return six.u(search) in six.b(obj) + except TypeError as e: + LOGGER.warn("filter_object exception (PY2 vs PY3 unicode/str): {0}".format(e)) + try: + return unicode(search) in unicode(obj) + except Exception as e: + LOGGER.error("filter_object exception: {0}".format(e)) return False @@ -119,7 +175,8 @@ def filter_event(event): return filter_event -def get_events(dc, filters=[]): +def get_events(dc, timeout, filters=[]): + LOGGER.debug("Retrieving events for datacenter: {0}".format(dc['name'])) url = 'http://{0}:{1}/events'.format(dc['url'], dc['port']) data = [] @@ -127,15 +184,20 @@ def get_events(dc, filters=[]): try: if 'user' and 'password' in dc: - r = requests.get(url, auth=(dc['user'], dc['password'])) + r = requests.get(url, auth=(dc['user'], dc['password']), timeout=timeout) + r.raise_for_status() data = r.json() else: - r = requests.get(url) + r = requests.get(url, timeout=timeout) data = r.json() + except Exception as ex: + LOGGER.error("Got exception while retrieving events for dc: {0} ex: {1}".format(dc, str(ex))) + pass finally: if r: r.close() + LOGGER.debug("Events Retrieval for datacenter {0} complete".format(dc['name'])) if len(filters) > 0: return filter(filter_events(filters), data) else: @@ -186,7 +248,8 @@ def agg_data(dc, data, stashes, client_data=None, filters=None): if i['check']['name'] == "keepalive" and i['check']['status'] == 2: if not check_stash(stashes, i['client'], i['check']['name']): - # we cannot currently apply filters as keepalive checks do not have subscribers/subscriptions + # we cannot currently apply filters as keepalive checks do + # not have subscribers/subscriptions down += 1 else: ack += 1 diff --git a/requirements.txt b/requirements.txt index 28a3f55..0dafcce 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ -Flask -PyYAML +Flask==0.12.2 +PyYAML==3.12 argparse -requests -gunicorn \ No newline at end of file +requests==2.18.4 +gunicorn==19.7.1 +six==1.11.0 diff --git a/sensugrid.py b/sensugrid.py index 06e8645..a41d6b5 100644 --- a/sensugrid.py +++ b/sensugrid.py @@ -9,13 +9,25 @@ from flask import abort from reverseproxied import ReverseProxied -from griddata import * -from gridconfig import * +from gridcheck import check_connection +from griddata import ( + agg_data, + get_data, + agg_host_data, + get_stashes, + get_filter_data, + get_clients, + get_events +) +from gridconfig import DevConfig from multiprocessing.dummy import Pool as ThreadPool # https://stackoverflow.com/questions/2846653/how-to-use-threading-in-python import json +import logging +import logging.config + app = Flask(__name__) app.wsgi_app = ReverseProxied(app.wsgi_app) @@ -24,10 +36,66 @@ app.config.from_object(myconfig) dcs = app.config['DCS'] appcfg = app.config['APPCFG'] +timeout = appcfg.get('requests_timeout', 10) +log_level = appcfg.get('logging_level', 'INFO').upper() +logging.config.dictConfig({ + "version": 1, + "disable_existing_loggers": True, + "formatters": { + "simple": { + "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + } + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "level": log_level, + "formatter": "simple", + "stream": "ext://sys.stdout" + } + }, + "loggers": { + "requests": { + "level": "WARNING", + "handlers": ["console"], + "propagate": False + }, + "sensugrid": { + "level": log_level, + "handlers": ["console"], + "propagate": False + }, + "gridcheck": { + "level": log_level, + "handlers": ["console"], + "propagate": False + }, + "gridconfig": { + "level": log_level, + "handlers": ["console"], + "propagate": False + }, + "griddata": { + "level": log_level, + "handlers": ["console"], + "propagate": False + }, + }, + "": { + "level": log_level, + "handlers": ["console"] + } +}) +LOGGER = logging.getLogger(__name__) + + +# Python3 doesn't have cmp +def _cmp(x, y): + return (x > y) - (x < y) def get_agg_data(dc): - r = agg_data(dc, get_data(dc), get_stashes(dc)) + r = agg_data(dc, get_data(dc, timeout), get_stashes(dc, timeout)) return r @@ -41,7 +109,7 @@ def root(): print("Exception: ", e) finally: pool.close() - return render_template('data.html', dcs=dcs, data=aggregated, filter_data=get_filter_data(dcs), appcfg=appcfg) + return render_template('data.html', dcs=dcs, data=aggregated, filter_data=get_filter_data(dcs, timeout), appcfg=appcfg) @app.route('/filtered/', methods=['GET']) @@ -49,9 +117,10 @@ def filtered(filters): aggregated = [] for dc in dcs: if check_connection(dc): - aggregated.append(agg_data(dc, get_data(dc), get_stashes(dc), get_clients(dc), filters)) + aggregated.append(agg_data(dc, get_data(dc, timeout), get_stashes( + dc, timeout), get_clients(dc, timeout), filters)) - return render_template('data.html', dcs=dcs, data=aggregated, filter_data=get_filter_data(dcs), appcfg=appcfg) + return render_template('data.html', dcs=dcs, data=aggregated, filter_data=get_filter_data(dcs, timeout), appcfg=appcfg) @app.route('/show/', methods=['GET']) @@ -63,16 +132,17 @@ def showgrid(d, filters=None): if dc['name'] == d: if check_connection(dc): if filters: - clients = get_clients(dc) + clients = get_clients(dc, timeout) else: clients = None - data_detail = agg_host_data(get_data(dc), get_stashes(dc), clients, filters) + data_detail = agg_host_data(get_data(dc, timeout), + get_stashes(dc, timeout), clients, filters) if data_detail: break else: abort(404) - return render_template('detail.html', dc=dc, data=data_detail, filter_data=get_filter_data(dcs), appcfg=appcfg) + return render_template('detail.html', dc=dc, data=data_detail, filter_data=get_filter_data(dcs, timeout), appcfg=appcfg) @app.route('/events/') @@ -87,15 +157,16 @@ def events(d, filters=''): if dc['name'] == d: dc_found = True if check_connection(dc): - results += get_events(dc, filters.split(',')) + results += get_events(dc, timeout, filters.split(',')) break if dc_found is False: abort(404) - results = sorted(results, lambda x, y: cmp(x['check']['status'], y['check']['status']), reverse=True) + results = sorted(results, lambda x, y: _cmp( + x['check']['status'], y['check']['status']), reverse=True) - return render_template('events.html', dc=dc, data=results, filter_data=get_filter_data(dcs), appcfg=appcfg) + return render_template('events.html', dc=dc, data=results, filter_data=get_filter_data(dcs, timeout), appcfg=appcfg) @app.route('/healthcheck', methods=['GET']) @@ -152,6 +223,7 @@ def icon_for_event(event): return 'question-circle' + if __name__ == '__main__': app.run(host='0.0.0.0', diff --git a/vagrant-sensu-config/conf.d/failing-check3.json b/vagrant-sensu-config/conf.d/failing-check3.json new file mode 100644 index 0000000..9f3e8d8 --- /dev/null +++ b/vagrant-sensu-config/conf.d/failing-check3.json @@ -0,0 +1,12 @@ +{ + "checks": { + "testing-fail": { + "command": "echo -n FAILING 2; exit 2;", + "subscribers": [ + "testing" + ], + "standalone": true, + "interval": 60 + } +} +} diff --git a/vagrant-sensu-config/conf.d/failing-check4.json b/vagrant-sensu-config/conf.d/failing-check4.json new file mode 100644 index 0000000..ba9c4c4 --- /dev/null +++ b/vagrant-sensu-config/conf.d/failing-check4.json @@ -0,0 +1,12 @@ +{ + "checks": { + "diskspace": { + "command": "echo -n CRITICAL; exit 2;", + "subscribers": [ + "default" + ], + "standalone": true, + "interval": 60 + } +} +} diff --git a/vagrant-sensu-config/conf.d/warning-check2.json b/vagrant-sensu-config/conf.d/warning-check2.json new file mode 100644 index 0000000..67bc959 --- /dev/null +++ b/vagrant-sensu-config/conf.d/warning-check2.json @@ -0,0 +1,12 @@ +{ + "checks": { + "testing-warning": { + "command": "echo -n WARNING 2; exit 1;", + "subscribers": [ + "testing" + ], + "standalone": true, + "interval": 60 + } +} +} diff --git a/vagrant-sensu-config/conf.d/warning-check3.json b/vagrant-sensu-config/conf.d/warning-check3.json new file mode 100644 index 0000000..42e2001 --- /dev/null +++ b/vagrant-sensu-config/conf.d/warning-check3.json @@ -0,0 +1,12 @@ +{ + "checks": { + "cpu": { + "command": "echo -n 'WARNING cpu > 75%'; exit 1;", + "subscribers": [ + "default" + ], + "standalone": true, + "interval": 60 + } +} +}