From 811c0f9a3819b12f0f25f79164ae5f14b7f1453f Mon Sep 17 00:00:00 2001 From: wulixuan Date: Sat, 3 Mar 2018 15:54:26 +0800 Subject: [PATCH] Initial commit --- .../inspectionProfiles/profiles_settings.xml | 5 + .idea/misc.xml | 4 + .idea/modules.xml | 8 + .idea/monitor_rest.iml | 8 + db.sqlite3 | Bin 0 -> 3072 bytes manage.py | 22 ++ metrics/__init__.py | 0 metrics/ambari_agent_metrics.py | 199 ++++++++++++ metrics/ambari_server_metrics.py | 200 ++++++++++++ metrics/consul_metrics.py | 304 ++++++++++++++++++ metrics/grafana_metrics.py | 218 +++++++++++++ metrics/keycloak_metrics.py | 200 ++++++++++++ metrics/knox_metrics.py | 197 ++++++++++++ metrics/ldap_metrics.py | 199 ++++++++++++ metrics/monitor.py | 65 ++++ metrics/monitor_params.py | 59 ++++ metrics/mysql_metrics.py | 209 ++++++++++++ metrics/nginx_metrics.py | 211 ++++++++++++ metrics/process_info.txt | 50 +++ metrics/process_status_exporter.py | 241 ++++++++++++++ metrics/prometheus_metrics.py | 218 +++++++++++++ metrics/tomcat_metrics.py | 232 +++++++++++++ metrics/utils.py | 93 ++++++ myapp/__init__.py | 0 myapp/admin.py | 3 + myapp/apps.py | 5 + myapp/models.py | 42 +++ myapp/params.py | 10 + myapp/parse.py | 237 ++++++++++++++ myapp/parse.py.bak20180110 | 214 ++++++++++++ myapp/serializers.py | 13 + myapp/test_metrics.py | 204 ++++++++++++ myapp/tests.py | 3 + myapp/urls.py | 12 + myapp/views.py | 62 ++++ rest/__init__.py | 0 rest/settings.py | 129 ++++++++ rest/urls.py | 22 ++ rest/wsgi.py | 16 + 39 files changed, 3914 insertions(+) create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/monitor_rest.iml create mode 100644 db.sqlite3 create mode 100644 manage.py create mode 100644 metrics/__init__.py create mode 100644 metrics/ambari_agent_metrics.py create mode 100644 metrics/ambari_server_metrics.py create mode 100644 metrics/consul_metrics.py create mode 100644 metrics/grafana_metrics.py create mode 100644 metrics/keycloak_metrics.py create mode 100644 metrics/knox_metrics.py create mode 100644 metrics/ldap_metrics.py create mode 100644 metrics/monitor.py create mode 100644 metrics/monitor_params.py create mode 100644 metrics/mysql_metrics.py create mode 100644 metrics/nginx_metrics.py create mode 100644 metrics/process_info.txt create mode 100644 metrics/process_status_exporter.py create mode 100644 metrics/prometheus_metrics.py create mode 100644 metrics/tomcat_metrics.py create mode 100644 metrics/utils.py create mode 100644 myapp/__init__.py create mode 100644 myapp/admin.py create mode 100644 myapp/apps.py create mode 100644 myapp/models.py create mode 100644 myapp/params.py create mode 100644 myapp/parse.py create mode 100644 myapp/parse.py.bak20180110 create mode 100644 myapp/serializers.py create mode 100644 myapp/test_metrics.py create mode 100644 myapp/tests.py create mode 100644 myapp/urls.py create mode 100644 myapp/views.py create mode 100644 rest/__init__.py create mode 100644 rest/settings.py create mode 100644 rest/urls.py create mode 100644 rest/wsgi.py diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..0eefe32 --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..9431dc4 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..26e30ed --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/monitor_rest.iml b/.idea/monitor_rest.iml new file mode 100644 index 0000000..d0876a7 --- /dev/null +++ b/.idea/monitor_rest.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/db.sqlite3 b/db.sqlite3 new file mode 100644 index 0000000000000000000000000000000000000000..919f2e5f580e255249506160e532d798db468f9d GIT binary patch literal 3072 zcmeHIy>7xV5VjN4PMuj#R)<7@kXo@ftumm70BIO9A(yzRtNeiDc5fe%N9dFEf&>zv zsuLB1PdZ!YyYGDZ&L=-j1E~ePRI1<_Zb^$!O6~v%Ar#d{-AZG(wg&Ev{g+VE`*yD| z%CgqPzQ)CfVc?_;OstD`r$ZCXpL0>Ka(v$NTCCnhxi~cKhsgJ0A7bx5@ZliVDS08f zh;%;_dfD#x=@;MROy78!Rr5k-ifdVw_0fT~mzN#+88}R)3}mTACKQC@7{XZ)z$6-u zyeNT3KLKwRkB1><8u?+|1;&?41|M84Ub%8^1_O5oLuXU#?_PxFQs9Lt*Fwue?4X<3 g+?(o?_QoVP3>XH^iGk*CHuHZ@9Wk{S2L6wMA8dYWCIA2c literal 0 HcmV?d00001 diff --git a/manage.py b/manage.py new file mode 100644 index 0000000..455d8dc --- /dev/null +++ b/manage.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +import os +import sys + +if __name__ == "__main__": + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "rest.settings") + try: + from django.core.management import execute_from_command_line + except ImportError: + # The above import may fail for some other reason. Ensure that the + # issue is really that Django is missing to avoid masking other + # exceptions on Python 2. + try: + import django + except ImportError: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) + raise + execute_from_command_line(sys.argv) diff --git a/metrics/__init__.py b/metrics/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/metrics/ambari_agent_metrics.py b/metrics/ambari_agent_metrics.py new file mode 100644 index 0000000..8617a97 --- /dev/null +++ b/metrics/ambari_agent_metrics.py @@ -0,0 +1,199 @@ +#!/usr/bin/python +#-*- coding:utf-8 -*- +import os, sys +import re + +import requests +import argparse +import logging +import json + +import monitor_params +from time import time +import utils +sys.path.append("..") +from myapp.parse import ParseUtil +from time import time + + +''' + Scrape ambari_agent metrics from Prometheus. +''' + +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S') +import sys +logger = logging.getLogger(sys.path[0] + 'nginx_metrics') + +class AmbariAgentMetrics(object): + + def __init__(self): + pass + + + def ip_list(self): + ''' + return tomcat_ip list + ''' + ip_list = [] + try: + list = re.split(r'[,\s]\s*', monitor_params.ambari_agent_ip.strip()) + except: + logging.error("Can't split ambari_agent_ip. Check the ambari_agent_ip in monitor_params.py.") + sys.exit(1) + else: + ip_list = list + return ip_list + + def ambari_agent_process_instance(self): + ''' + @return list of ambari_agent instances. + ''' + process_instance = utils.get_instances(monitor_params.ambari_agent_ip, monitor_params.process_exporter_port) + return process_instance + + def ambari_agent_cluster_state(self): + ''' + @return ambari-agent cluster state and the numbers of healthy nodes. + ''' + process_instances = self.ambari_agent_process_instance() + state = 0.0 + success_count = 0.0 + + for i in range(len(process_instances)): + ambari_agent_up = self.ambari_agent_node_state(process_instances[i]) + if ambari_agent_up: + success_count +=1 + else: + continue + if success_count >= 1: + state = 1.0 + logging.info("ambari_agent state is %s" % (state)) + return [state, success_count] + + def ambari_agent_node_state(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + state = {} + url = utils.prometheus_url() + param = { + "query": 'ambari_agent_process_up{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + state.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if state.has_key(process_instance): + return float(state[process_instance]) + else: + logging.error("No instance in the ambari-agent cluster, ambari-agent {0} down.".format(process_instance)) + return 0.0 + + + def ambari_agent_cpu_usage(self, process_instance): + ''' + @return ambari-agent cpu usage. + ''' + cpu_usage = {} + url = utils.prometheus_url() + param = { + "query": 'ambari_agent_cpu_percentage{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + cpu_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if cpu_usage.has_key(process_instance): + return float(cpu_usage[process_instance]) + else: + logging.error("No instance in the ambari-agent cluster, get ambari-agent {0} cpu usage failed.".format(process_instance)) + return None + + def ambari_agent_uptime(self, process_instance): + ''' + @return a float value of create time. + ''' + uptime = {} + url = utils.prometheus_url() + param = { + "query": 'ambari_agent_running_time_seconds_total{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + uptime.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if uptime.has_key(process_instance): + return float(uptime[process_instance]) + else: + logging.error("No instance in the ambari-agent cluster, get ambari-agent {0} uptime failed.".format(process_instance)) + return None + + def ambari_agent_mem_usage(self, process_instance): + ''' + @return ambari-agent memory usage. + ''' + mem_usage = {} + url = utils.prometheus_url() + param = { + "query": 'sum by (instance)(ambari_agent_memory_usage_bytes_total{{instance="{0}", mode=~"rss|vms|shared"}})'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + mem_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if mem_usage.has_key(process_instance): + return float(mem_usage[process_instance]) + else: + logging.error("No instance in the ambari-agent cluster, get ambari-agent {0} memory usage failed.".format(process_instance)) + return None + + + def ambari_agent_cluster_list(self): + process_instances = self.ambari_agent_process_instance() + uptime = time() + for i in range(len(process_instances)): + state = self.ambari_agent_node_state(process_instances[i]) + if state: + uptime = self.ambari_agent_uptime(process_instances[i]) + break + else: + continue + + node_info = [] + for i in range(len(process_instances)): + node_info.append(self.ambari_agent_node_detail(process_instances[i])) + + cluster_info = { + "ambari_agent_cluster_state" : self.ambari_agent_cluster_state()[0], + "ambari_agent_total_nodes" : float(len(self.ip_list())), + "ambari_agent_healthy_nodes" : self.ambari_agent_cluster_state()[1], + "ambari_agent_uptime" : time() - uptime, + "ambari_agent_nodes_info": node_info + } + return cluster_info + + def ambari_agent_node_detail(self, process_instance): + if not self.ambari_agent_node_state(process_instance): + node_info = { + "ambari_agent_node_state" : 0.0, + "ambari_agent_uptime" : 0.0, + "ambari_agent_cpu_usage" : 0.0, + "ambari_agent_mem_usage" : 0.0, + "prometheus_url" : None + } + else: + node_info = { + "ambari_agent_node_state" : self.ambari_agent_node_state(process_instance), + "ambari_agent_uptime" : time() - self.ambari_agent_uptime(process_instance), + "ambari_agent_cpu_usage" : self.ambari_agent_cpu_usage(process_instance), + "ambari_agent_mem_usage" : self.ambari_agent_mem_usage(process_instance), + "ambari_agent_url" : 'http://{0}/dashboard/db/ambari-agent-dashboard-for-prometheus?orgId=1&var-instance={1}'.format(utils.grafana_url(), process_instance) + } + return node_info + + +def main(): + ambari_agent = AmbariAgentMetrics() + ambari_agent.ambari_agent_cluster_list() + +if __name__ == '__main__': + main() + diff --git a/metrics/ambari_server_metrics.py b/metrics/ambari_server_metrics.py new file mode 100644 index 0000000..7e8f65a --- /dev/null +++ b/metrics/ambari_server_metrics.py @@ -0,0 +1,200 @@ +#!/usr/bin/python +#-*- coding:utf-8 -*- +import os, sys +import re + +import requests +import argparse +import logging +import json + +import monitor_params +from time import time +import utils +sys.path.append("..") +from myapp.parse import ParseUtil +from time import time + + +''' + Scrape ambari_server metrics from Prometheus. +''' + +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S') +import sys +logger = logging.getLogger(sys.path[0] + 'nginx_metrics') + +class AmbariServerMetrics(object): + + def __init__(self): + pass + + + def ip_list(self): + ''' + return tomcat_ip list + ''' + ip_list = [] + try: + list = re.split(r'[,\s]\s*', monitor_params.ambari_server_ip.strip()) + except: + logging.error("Can't split ambari_server_ip. Check the ambari_server_ip in monitor_params.py.") + sys.exit(1) + else: + ip_list = list + return ip_list + + def ambari_server_process_instance(self): + ''' + @return list of ambari_server instances. + ''' + process_instance = utils.get_instances(monitor_params.ambari_server_ip, monitor_params.process_exporter_port) + logging.info("Ambari-server PROCESS INSTANCE: {0}".format(process_instance)) + return process_instance + + def ambari_server_cluster_state(self): + ''' + @return ambari-servers state and numbers of healthy nodes. + ''' + process_instances = self.ambari_server_process_instance() + state = 0.0 + success_count = 0.0 + + for i in range(len(process_instances)): + ambari_server_up = self.ambari_server_node_state(process_instances[i]) + if ambari_server_up: + success_count +=1 + else: + continue + if success_count >= 1: + state = 1.0 + logging.info("ambari_server state is %s" % (state)) + return [state, success_count] + + def ambari_server_node_state(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + state = {} + url = utils.prometheus_url() + param = { + "query": 'ambari_server_process_up{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + state.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if state.has_key(process_instance): + return float(state[process_instance]) + else: + logging.error("No instance in the ambari-server cluster, ambari-server {0} down.".format(process_instance)) + return 0.0 + + + def ambari_server_cpu_usage(self, process_instance): + ''' + @return ambari-server cpu usage. + ''' + cpu_usage = {} + url = utils.prometheus_url() + param = { + "query": 'ambari_server_cpu_percentage{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + cpu_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if cpu_usage.has_key(process_instance): + return float(cpu_usage[process_instance]) + else: + logging.error("No instance in the ambari-server cluster, get ambari-server {0} cpu usage failed.".format(process_instance)) + return None + + def ambari_server_uptime(self, process_instance): + ''' + @return a float value of create time. + ''' + uptime = {} + url = utils.prometheus_url() + param = { + "query": 'ambari_server_running_time_seconds_total{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + uptime.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if uptime.has_key(process_instance): + return float(uptime[process_instance]) + else: + logging.error("No instance in the ambari-server cluster, get ambari-server {0} uptime failed.".format(process_instance)) + return None + + def ambari_server_mem_usage(self, process_instance): + ''' + @return ambari-server memory usage. + ''' + mem_usage = {} + url = utils.prometheus_url() + param = { + "query": 'sum by (instance)(ambari_server_memory_usage_bytes_total{{instance="{0}", mode=~"rss|vms|shared"}})'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + mem_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if mem_usage.has_key(process_instance): + return float(mem_usage[process_instance]) + else: + logging.error("No instance in the ambari-server cluster, get ambari-server {0} memory usage failed.".format(process_instance)) + return None + + def ambari_server_cluster_list(self): + process_instances = self.ambari_server_process_instance() + uptime = time() + for i in range(len(process_instances)): + state = self.ambari_server_node_state(process_instances[i]) + if state: + uptime = self.ambari_server_uptime(process_instances[i]) + break + else: + continue + + node_info = [] + for i in range(len(process_instances)): + node_info.append(self.ambari_server_node_detail(process_instances[i])) + + cluster_info = { + "ambari_server_cluster_state" : self.ambari_server_cluster_state()[0], + "ambari_server_total_nodes" : float(len(self.ip_list())), + "ambari_server_healthy_nodes" : self.ambari_server_cluster_state()[1], + "ambari_server_uptime" : time() - uptime, + "ambari_server_nodes_info": node_info + } + return cluster_info + + def ambari_server_node_detail(self, process_instance): + logging.info("STATE IS : {0}".format(self.ambari_server_node_state(process_instance))) + if not self.ambari_server_node_state(process_instance): + node_info = { + "ambari_server_node_state" : 0.0, + "ambari_server_uptime" : 0.0, + "ambari_server_cpu_usage" : 0.0, + "ambari_server_mem_usage" : 0.0, + "prometheus_url" : None + } + else: + node_info = { + "ambari_server_node_state" : self.ambari_server_node_state(process_instance), + "ambari_server_uptime" : time() - self.ambari_server_uptime(process_instance), + "ambari_server_cpu_usage" : self.ambari_server_cpu_usage(process_instance), + "ambari_server_mem_usage" : self.ambari_server_mem_usage(process_instance), + "ambari_server_url" : 'http://{0}/dashboard/db/ambari-server-dashboard-for-prometheus?orgId=1&var-instance={1}'.format(utils.grafana_url(), process_instance) + } + return node_info + + +def main(): + ambari_server = AmbariServerMetrics() + ambari_server.ambari_server_cluster_list() + +if __name__ == '__main__': + main() + diff --git a/metrics/consul_metrics.py b/metrics/consul_metrics.py new file mode 100644 index 0000000..15e847f --- /dev/null +++ b/metrics/consul_metrics.py @@ -0,0 +1,304 @@ +#!/usr/bin/python +#-*- coding:utf-8 -*- +import os, sys +import re +import time +import requests +import argparse +import logging +import json + +import monitor_params +import utils +# import father directory, append father directory to the sys.path +sys.path.append("..") +from myapp.parse import ParseUtil +from time import time + +''' +Scrape consul metrics from Consul Cluster or consul_exporter. +''' + +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S') +import sys +logger = logging.getLogger(sys.path[0] + 'consul_metrics') + + +class ConsulMetrics(object): + + def __init__(self): + pass + + def ip_list(self): + ''' + return consul_ip list + ''' + ip_list = [] + try: + list = re.split(r'[,\s]\s*', monitor_params.consul_ip.strip()) + except: + logging.error("Can't split consul_ip. Check the consul_ip in monitor_params.py.") + sys.exit(1) + else: + ip_list = list + + print ip_list + return ip_list + + + def raft_peers(self): + ''' + count how many peers in the cluster via HTTP API. + ''' + ip_list = self.ip_list() + consul_port = monitor_params.consul_port + list_length = len(ip_list) + result = [] + + for i in range(list_length): + url = 'http://{0}:{1}/v1/status/peers'.format(ip_list[i], consul_port) + logging.info("start GET %s", url) + try: + response = requests.get(url, timeout=5) + response.raise_for_status() + except requests.RequestException as e: + logging.error("Connection Error happends, please check url. Error message: %s" % (e)) + continue + else: + res = response.json() + break + for i in range(len(res)): + result.append(res[i].split(":")[0]) + logging.info("raft peers are: %s, and numbers of peers are: %s" % (result, len(result))) + return [result, float(len(result))] + + def catalog_nodes(self): + ''' + count how many nodes in the cluster via HTTP API. + ''' + ip_list = self.ip_list() + consul_port = monitor_params.consul_port + list_length = len(ip_list) + node_list = [] + + for i in range(list_length): + url = 'http://{0}:{1}/v1/catalog/nodes'.format(ip_list[i], consul_port) + logging.info("start GET %s", url) + try: + response = requests.get(url, timeout=5) + response.raise_for_status() + except requests.RequestException as e: + logging.error("Connection Error happends, please check url. Error message: %s" % (e)) + continue + else: + result = response.json() + break + if result: + for i in range(len(result)): + node_list.append(result[i]['Node']) + else: + logging.error('No node in the catalog, please Check.') + + return node_list + + def catalog_services(self): + ''' + count how many services in the cluster via HTTP API. + ''' + node_list = self.catalog_nodes() + ip_list = self.ip_list() + consul_port = monitor_params.consul_port + list_length = len(ip_list) + service_list = {} + service_count = 0 + + for node_name in range(len(node_list)): + for ip in range(list_length): + url = 'http://{0}:{1}/v1/catalog/node/{2}'.format(ip_list[ip], consul_port, node_list[node_name]) + logging.info("start GET %s", url) + try: + response = requests.get(url, timeout=5) + response.raise_for_status() + except requests.RequestException as e: + logging.error("Connection Error happends, please check url. Error message: %s" % (e)) + continue + else: + result = response.json() + break + if result: + logging.info("{0} services in node {1}.".format(len(result['Services']), node_list[node_name])) + service_list.setdefault(node_list[node_name], len(result['Services'])) + service_count += len(result['Services']) + else: + service_list.setdefault(node_list[node_name], 0) + logging.info("There are/is {0} service(s) in cluster nodes. ".format(service_count)) + return service_count + + def consul_process_instance(self): + ''' + @return list of consul instances. + ''' + instances = utils.get_instances(monitor_params.consul_ip, monitor_params.process_exporter_port) + return instances + + def instance_info(self): + instance_list = utils.get_instances(monitor_params.consul_ip, monitor_params.consul_exporter_port) + return instance_list + + + def consul_cluster_state(self): + ''' + Once 1 leader down, more than half peers left in the cluster, the cluster can elected a new leader. + So the cluster can work well. + ''' + success_count = 0.0 + members_count = len(self.ip_list()) + process_instances = self.consul_process_instance() + + for i in range(len(process_instances)): + consul_up = self.consul_node_state(process_instances[i]) + if consul_up: + success_count += 1 + else: + continue + if (success_count >= (int(members_count/2) + 1)): + state = 1.0 + else: + state = 0.0 + logging.info("success count is: %s, and state is %s" % (success_count, state)) + return [state,success_count] + + + def consul_node_state(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + state = {} + url = utils.prometheus_url() + param = { + "query": 'consul_process_up{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + state.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if state.has_key(process_instance): + print float(state[process_instance]) + return float(state[process_instance]) + else: + logging.error("No instance in the consul cluster, consul node {0} down.".format(process_instance)) + return 0.0 + + + def consul_cpu_usage(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + cpu_usage = {} + url = utils.prometheus_url() + param = { + "query": 'consul_cpu_percentage{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + cpu_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if cpu_usage.has_key(process_instance): + print float(cpu_usage[process_instance]) + return float(cpu_usage[process_instance]) + else: + logging.error("Error happends. No instance in the consul cluster, please check.") + return None + + def consul_uptime(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + uptime = {} + url = utils.prometheus_url() + param = { + "query": 'consul_running_time_seconds_total{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + uptime.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if uptime.has_key(process_instance): + print float(uptime[process_instance]) + return float(uptime[process_instance]) + else: + logging.error("Error happends. No instance in the consul cluster, please check.") + return None + + def consul_mem_usage(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + mem_usage = {} + url = utils.prometheus_url() + param = { + "query": 'sum by (instance)(consul_memory_usage_bytes_total{{instance="{0}", mode=~"rss|vms|shared"}})'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + mem_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if mem_usage.has_key(process_instance): + print float(mem_usage[process_instance]) + return float(mem_usage[process_instance]) + else: + logging.error("Error happends. No instance in the consul cluster, please check.") + return None + + + def consul_cluster_list(self): + process_instances = self.consul_process_instance() + instances = self.instance_info() + uptime = time() + for i in range(len(process_instances)): + state = self.consul_node_state(process_instances[i]) + if state: + uptime = self.consul_uptime(process_instances[i]) + break + else: + continue + + node_info = [] + for i in range(len(process_instances)): + print "consul process_instance=".format(process_instances[i]) + node_info.append(self.consul_node_detail(process_instances[i], process_instances[i])) + print "consul node_info=".format(node_info) + + cluster_info = { + "consul_cluster_state" : self.consul_cluster_state()[0], + "consul_total_nodes" : float(len(self.ip_list())), + "consul_healthy_nodes" : self.consul_cluster_state()[1], + "consul_uptime" : time() - uptime, + "consul_nodes_info": node_info + } + return cluster_info + + def consul_node_detail(self, process_instance, instance): + if not self.consul_node_state(process_instance): + node_info = { + "consul_node_state" : 0.0, + "consul_uptime" : 0.0, + "consul_cpu_usage" : 0.0, + "consul_mem_usage" : 0.0, + "consul_url" : None + } + else: + node_info = { + "consul_node_state" : self.consul_node_state(process_instance), + "consul_uptime" : time() - self.consul_uptime(process_instance), + "consul_cpu_usage" : self.consul_cpu_usage(process_instance), + "consul_mem_usage" : self.consul_mem_usage(process_instance), + "consul_url" : 'http://{0}/dashboard/db/consul-dashboard-for-prometheus?orgId=1&var-instance={1}'.format(utils.grafana_url(), instance) + } + return node_info + + +def main(): + consul = ConsulMetrics() + consul.consul_cluster_list() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/metrics/grafana_metrics.py b/metrics/grafana_metrics.py new file mode 100644 index 0000000..b68c8fe --- /dev/null +++ b/metrics/grafana_metrics.py @@ -0,0 +1,218 @@ +#!/usr/bin/python +#-*- coding:utf-8 -*- +import os, sys +import re + +import requests +import argparse +import logging +import json + +import monitor_params +import utils +sys.path.append('..') +from myapp.parse import ParseUtil +from time import time + + +''' + Scrape consul metrics from Consul Cluster or consul_exporter. +''' + +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S') +import sys +logger = logging.getLogger(sys.path[0] + 'nginx_metrics') + +class GrafanaMetrics(object): + + def __init__(self): + pass + + + def ip_list(self): + ''' + return grafana_ip list + ''' + ip_list = [] + try: + list = re.split(r'[,\s]\s*', monitor_params.grafana_ip) + except: + logging.error("Can't split grafana_ip. Check the grafana_exporter_ip in monitor_params.py.") + sys.exit(1) + else: + ip_list = list + + print ip_list + return ip_list + + + def instance_info(self): + instance_list = utils.get_instances(monitor_params.grafana_ip, monitor_params.grafana_port) + return instance_list + + def grafana_process_instance(self): + ''' + @return list of grafana process instances. + ''' + process_instance = utils.get_instances(monitor_params.grafana_ip, monitor_params.process_exporter_port) + return process_instance + + def grafana_cluster_state(self): + ''' + @return grafana cluster state, and numbers of healthy nodes. + ''' + process_instances = self.grafana_process_instance() + state = 0.0 + success_count = 0.0 + + for i in range(len(process_instances)): + grafana_up = self.grafana_node_state(process_instances[i]) + if grafana_up: + success_count +=1 + else: + continue + if success_count >= 1: + state = 1.0 + logging.debug("grafana state is %s" % (state)) + return [state, success_count] + + def grafana_node_state(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + state = {} + url = utils.prometheus_url() + param = { + "query": 'grafana_server_process_up{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + state.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if state.has_key(process_instance): + print float(state[process_instance]) + return float(state[process_instance]) + else: + logging.error("No instance in the grafana cluster, grafana {0} down.".format(process_instance)) + return 0.0 + + + def grafana_cpu_usage(self, process_instance): + ''' + @return process_instance cpu usage. + ''' + cpu_usage = {} + url = utils.prometheus_url() + param = { + "query": 'grafana_server_cpu_percentage{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + cpu_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if cpu_usage.has_key(process_instance): + print float(cpu_usage[process_instance]) + return float(cpu_usage[process_instance]) + else: + logging.error("No instance in the grafana cluster, get grafana {0} cpu usage failed.".format(process_instance)) + return None + + def grafana_uptime(self, process_instance): + ''' + @return process_instance create time. + ''' + uptime = {} + url = utils.prometheus_url() + param = { + "query": 'grafana_server_running_time_seconds_total{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + uptime.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if uptime.has_key(process_instance): + print float(uptime[process_instance]) + return float(uptime[process_instance]) + else: + logging.error("No instance in the grafana cluster, get grafana {0} uptime failed.".format(process_instance)) + return None + + def grafana_mem_usage(self, process_instance): + ''' + @return process_instance memory usage. + ''' + mem_usage = {} + url = utils.prometheus_url() + param = { + "query": 'sum by (instance)(grafana_server_memory_usage_bytes_total{{instance="{0}", mode=~"rss|vms|shared"}})'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + mem_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if mem_usage.has_key(process_instance): + print float(mem_usage[process_instance]) + return float(mem_usage[process_instance]) + else: + logging.error("No instance in the grafana cluster, get grafana {0} memory usage failed.".format(process_instance)) + return None + + def grafana_outside_instance(self): + ''' + For outside access, the url must use a grafana_outside_ip + ''' + outside_instances = utils.get_instances(monitor_params.grafana_outside_ip, monitor_params.grafana_port) + return outside_instances + + def grafana_cluster_list(self): + process_instances = self.grafana_process_instance() + outside_instances = self.grafana_outside_instance() + instances = self.instance_info() + uptime = time() + for i in range(len(process_instances)): + state = self.grafana_node_state(process_instances[i]) + if state: + uptime = self.grafana_uptime(process_instances[i]) + break + else: + continue + + node_info = [] + for i in range(len(instances)): + node_info.append(self.grafana_node_detail(process_instances[i], outside_instances[i], instances[i])) + + cluster_info = { + "grafana_cluster_state" : self.grafana_cluster_state()[0], + "grafana_total_nodes" : float(len(self.ip_list())), + "grafana_healthy_nodes" : self.grafana_cluster_state()[1], + "grafana_uptime" : time() - uptime, + "grafana_nodes_info": node_info + } + return cluster_info + + def grafana_node_detail(self, process_instance, outside_instance, instance): + if not self.grafana_node_state(process_instance): + node_info = { + "grafana_node_state" : 0.0, + "grafana_uptime" : 0.0, + "grafana_cpu_usage" : 0.0, + "grafana_mem_usage" : 0.0, + "grafana_url" : None + } + else: + node_info = { + "grafana_node_state" : self.grafana_node_state(process_instance), + "grafana_uptime" : time() - self.grafana_uptime(process_instance), + "grafana_cpu_usage" : self.grafana_cpu_usage(process_instance), + "grafana_mem_usage" : self.grafana_mem_usage(process_instance), + "grafana_url" : 'http://{0}/dashboard/db/grafana-dashboard-for-prometheus?orgId=1&var-instance={1}'.format(outside_instance, instance) + } + return node_info + + +def main(): + grafana = GrafanaMetrics() + grafana.grafana_cluster_list() + + +if __name__ == '__main__': + main() + diff --git a/metrics/keycloak_metrics.py b/metrics/keycloak_metrics.py new file mode 100644 index 0000000..9f31890 --- /dev/null +++ b/metrics/keycloak_metrics.py @@ -0,0 +1,200 @@ +#!/usr/bin/python +#-*- coding:utf-8 -*- +import os, sys +import re + +import requests +import argparse +import logging +import json + +import monitor_params +from time import time +import utils +sys.path.append("..") +from myapp.parse import ParseUtil +from time import time + + +''' + Scrape keycloak metrics from Prometheus. +''' + +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S') +import sys +logger = logging.getLogger(sys.path[0] + 'nginx_metrics') + +class KeycloakMetrics(object): + + def __init__(self): + pass + + + def ip_list(self): + ''' + return keycloak_ip list + ''' + ip_list = [] + try: + list = re.split(r'[,\s]\s*', monitor_params.keycloak_ip.strip()) + except: + logging.error("Can't split keycloak_ip. Check the keycloak_ip in monitor_params.py.") + sys.exit(1) + else: + ip_list = list + return ip_list + + def keycloak_process_instance(self): + ''' + @return list of keycloak process instances. + ''' + process_instance = utils.get_instances(monitor_params.keycloak_ip, monitor_params.process_exporter_port) + return process_instance + + def keycloak_cluster_state(self): + ''' + @return keycloak cluster state, and numbers of healthy nodes. + ''' + process_instances = self.keycloak_process_instance() + state = 0.0 + success_count = 0.0 + + for i in range(len(process_instances)): + keycloak_up = self.keycloak_node_state(process_instances[i]) + if keycloak_up: + success_count +=1 + else: + continue + if success_count >= 1: + state = 1.0 + logging.debug("keycloak state is %s" % (state)) + return [state, success_count] + + def keycloak_node_state(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + state = {} + url = utils.prometheus_url() + param = { + "query": 'keycloak_process_up{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + state.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if state.has_key(process_instance): + return float(state[process_instance]) + else: + logging.error("No instance in the keycloak cluster, keycloak {0} down.".format(process_instance)) + return 0.0 + + + def keycloak_cpu_usage(self, process_instance): + ''' + @return process_instance cpu usage. + ''' + cpu_usage = {} + url = utils.prometheus_url() + param = { + "query": 'keycloak_cpu_percentage{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + cpu_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if cpu_usage.has_key(process_instance): + return float(cpu_usage[process_instance]) + else: + logging.error("No instance in the keycloak cluster, get keycloak {0} cpu usage failed.".format(process_instance)) + return None + + def keycloak_uptime(self, process_instance): + ''' + @return process_instance create time. + ''' + uptime = {} + url = utils.prometheus_url() + param = { + "query": 'keycloak_running_time_seconds_total{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + uptime.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if uptime.has_key(process_instance): + return float(uptime[process_instance]) + else: + logging.error("No instance in the keycloak cluster, get keycloak {0} uptime failed.".format(process_instance)) + return None + + def keycloak_mem_usage(self, process_instance): + ''' + @return process_instance memory usage. + ''' + mem_usage = {} + url = utils.prometheus_url() + param = { + "query": 'sum by (instance)(keycloak_memory_usage_bytes_total{{instance="{0}", mode=~"rss|vms|shared"}})'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + mem_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if mem_usage.has_key(process_instance): + return float(mem_usage[process_instance]) + else: + logging.error("No instance in the keycloak cluster, get keycloak {0} memory usage failed.".format(process_instance)) + return None + + def keycloak_cluster_list(self): + process_instances = self.keycloak_process_instance() + uptime = time() + for i in range(len(process_instances)): + state = self.keycloak_node_state(process_instances[i]) + if state: + uptime = self.keycloak_uptime(process_instances[i]) + break + else: + continue + + node_info = [] + for i in range(len(process_instances)): + node_info.append(self.keycloak_node_detail(process_instances[i])) + + cluster_info = { + "keycloak_cluster_state" : self.keycloak_cluster_state()[0], + "keycloak_total_nodes" : float(len(self.ip_list())), + "keycloak_healthy_nodes" : self.keycloak_cluster_state()[1], + "keycloak_uptime" : time() - uptime, + "keycloak_nodes_info": node_info + } + return cluster_info + + def keycloak_node_detail(self, process_instance): + if not self.keycloak_node_state(process_instance): + node_info = { + "keycloak_node_state" : 0.0, + "keycloak_uptime" : 0.0, + "keycloak_cpu_usage" : 0.0, + "keycloak_mem_usage" : 0.0, + "keycloak_url" : None + } + else: + node_info = { + "keycloak_node_state" : self.keycloak_node_state(process_instance), + "keycloak_uptime" : time() - self.keycloak_uptime(process_instance), + "keycloak_cpu_usage" : self.keycloak_cpu_usage(process_instance), + "keycloak_mem_usage" : self.keycloak_mem_usage(process_instance), + "keycloak_url" : 'http://{0}/dashboard/db/keycloak-dashboard-for-prometheus?orgId=1&var-instance={1}'.format(utils.grafana_url(), process_instance) + } + return node_info + + +def main(): + keycloak = KeycloakMetrics() + keycloak.keycloak_cluster_list() + + + +if __name__ == '__main__': + main() + diff --git a/metrics/knox_metrics.py b/metrics/knox_metrics.py new file mode 100644 index 0000000..0a261ab --- /dev/null +++ b/metrics/knox_metrics.py @@ -0,0 +1,197 @@ +#!/usr/bin/python +#-*- coding:utf-8 -*- +import os, sys +import re + +import requests +import argparse +import logging +import json + +import monitor_params +from time import time +import utils +sys.path.append("..") +from myapp.parse import ParseUtil +from time import time + +''' + Scrape knox metrics from Prometheus. +''' + +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S') +import sys +logger = logging.getLogger(sys.path[0] + 'nginx_metrics') + +class KnoxMetrics(object): + + def __init__(self): + pass + + + def ip_list(self): + ''' + return tomcat_ip list + ''' + ip_list = [] + try: + list = re.split(r'[,\s]\s*', monitor_params.knox_ip.strip()) + except: + logging.error("Can't split knox_ip. Check the knox_ip in monitor_params.py.") + sys.exit(1) + else: + ip_list = list + return ip_list + + def knox_process_instance(self): + ''' + @return list of knox instances. + ''' + process_instance = utils.get_instances(monitor_params.knox_ip, monitor_params.process_exporter_port) + return process_instance + + def knox_cluster_state(self): + ''' + @return knox cluster state and the numbers of healthy nodes. + ''' + process_instances = self.knox_process_instance() + state = 0.0 + success_count = 0.0 + + for i in range(len(process_instances)): + knox_up = self.knox_node_state(process_instances[i]) + if knox_up: + success_count +=1 + else: + continue + if success_count >= 1: + state = 1.0 + logging.info("knox state is %s" % (state)) + return [state, success_count] + + def knox_node_state(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + state = {} + url = utils.prometheus_url() + param = { + "query": 'knox_process_up{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + state.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if state.has_key(process_instance): + return float(state[process_instance]) + else: + logging.error("No instance in the knox cluster, knox {0} down.".format(process_instance)) + return 0.0 + + + def knox_cpu_usage(self, process_instance): + ''' + @return knox cpu usage. + ''' + cpu_usage = {} + url = utils.prometheus_url() + param = { + "query": 'knox_cpu_percentage{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + cpu_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if cpu_usage.has_key(process_instance): + return float(cpu_usage[process_instance]) + else: + logging.error("No instance in the knox cluster, get knox {0} cpu usage failed.".format(process_instance)) + return None + + def knox_uptime(self, process_instance): + ''' + @return a float value of create time. + ''' + uptime = {} + url = utils.prometheus_url() + param = { + "query": 'knox_running_time_seconds_total{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + uptime.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if uptime.has_key(process_instance): + return float(uptime[process_instance]) + else: + logging.error("No instance in the knox cluster, get knox {0} uptime failed.".format(process_instance)) + return None + + def knox_mem_usage(self, process_instance): + ''' + @return knox memory usage. + ''' + mem_usage = {} + url = utils.prometheus_url() + param = { + "query": 'sum by (instance)(knox_memory_usage_bytes_total{{instance="{0}", mode=~"rss|vms|shared"}})'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + mem_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if mem_usage.has_key(process_instance): + return float(mem_usage[process_instance]) + else: + logging.error("No instance in the knox cluster, get knox {0} memory usage failed.".format(process_instance)) + return None + + + def knox_cluster_list(self): + process_instances = self.knox_process_instance() + uptime = time() + for i in range(len(process_instances)): + state = self.knox_node_state(process_instances[i]) + if state: + uptime = self.knox_uptime(process_instances[i]) + break + else: + continue + + node_info = [] + for i in range(len(process_instances)): + node_info.append(self.knox_node_detail(process_instances[i])) + + cluster_info = { + "knox_cluster_state" : self.knox_cluster_state()[0], + "knox_total_nodes" : float(len(self.ip_list())), + "knox_healthy_nodes" : self.knox_cluster_state()[1], + "knox_uptime" : time() - uptime, + "knox_nodes_info": node_info + } + return cluster_info + + def knox_node_detail(self, process_instance): + if not self.knox_node_state(process_instance): + node_info = { + "knox_node_state" : 0.0, + "knox_uptime" : 0.0, + "knox_cpu_usage" : 0.0, + "knox_mem_usage" : 0.0, + "knox_url" : None + } + else: + node_info = { + "knox_node_state" : self.knox_node_state(process_instance), + "knox_uptime" : time() - self.knox_uptime(process_instance), + "knox_cpu_usage" : self.knox_cpu_usage(process_instance), + "knox_mem_usage" : self.knox_mem_usage(process_instance), + "knox_url" : 'http://{0}/dashboard/db/knox-dashboard-for-prometheus?orgId=1&var-instance={1}'.format(utils.grafana_url(), process_instance) + } + return node_info + +def main(): + knox = KnoxMetrics() + knox.knox_cluster_list() + +if __name__ == '__main__': + main() + diff --git a/metrics/ldap_metrics.py b/metrics/ldap_metrics.py new file mode 100644 index 0000000..c6b4f04 --- /dev/null +++ b/metrics/ldap_metrics.py @@ -0,0 +1,199 @@ +#!/usr/bin/python +#-*- coding:utf-8 -*- +import os, sys +import re + +import requests +import argparse +import logging +import json + +import monitor_params +from time import time +import utils +sys.path.append("..") +from myapp.parse import ParseUtil +from time import time + + +''' + Scrape ldap metrics from Prometheus. +''' + +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S') +import sys +logger = logging.getLogger(sys.path[0] + 'nginx_metrics') + +class LdapMetrics(object): + + def __init__(self): + pass + + + def ip_list(self): + ''' + return tomcat_ip list + ''' + ip_list = [] + try: + list = re.split(r'[,\s]\s*', monitor_params.ldap_ip.strip()) + except: + logging.error("Can't split ldap_ip. Check the ldap_ip in monitor_params.py.") + sys.exit(1) + else: + ip_list = list + return ip_list + + def ldap_process_instance(self): + ''' + @return list of ldap instances. + ''' + process_instance = utils.get_instances(monitor_params.ldap_ip, monitor_params.process_exporter_port) + return process_instance + + def ldap_cluster_state(self): + ''' + @return ldap cluster state and the numbers of healthy nodes. + ''' + process_instances = self.ldap_process_instance() + state = 0.0 + success_count = 0.0 + + for i in range(len(process_instances)): + ldap_up = self.ldap_node_state(process_instances[i]) + if ldap_up: + success_count +=1 + else: + continue + if success_count >= 1: + state = 1.0 + logging.info("ldap state is %s" % (state)) + return [state, success_count] + + def ldap_node_state(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + state = {} + url = utils.prometheus_url() + param = { + "query": 'slapd_process_up{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + state.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if state.has_key(process_instance): + return float(state[process_instance]) + else: + logging.error("No instance in the ldap cluster, ldap {0} down.".format(process_instance)) + return 0.0 + + + def ldap_cpu_usage(self, process_instance): + ''' + @return ldap cpu usage. + ''' + cpu_usage = {} + url = utils.prometheus_url() + param = { + "query": 'slapd_cpu_percentage{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + cpu_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if cpu_usage.has_key(process_instance): + return float(cpu_usage[process_instance]) + else: + logging.error("No instance in the ldap cluster, get ldap {0} cpu usage failed.".format(process_instance)) + return None + + def ldap_uptime(self, process_instance): + ''' + @return a float value of create time. + ''' + uptime = {} + url = utils.prometheus_url() + param = { + "query": 'slapd_running_time_seconds_total{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + uptime.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if uptime.has_key(process_instance): + return float(uptime[process_instance]) + else: + logging.error("No instance in the ldap cluster, get ldap {0} uptime failed.".format(process_instance)) + return None + + def ldap_mem_usage(self, process_instance): + ''' + @return ldap memory usage. + ''' + mem_usage = {} + url = utils.prometheus_url() + param = { + "query": 'sum by (instance)(slapd_memory_usage_bytes_total{{instance="{0}", mode=~"rss|vms|shared"}})'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + mem_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if mem_usage.has_key(process_instance): + return float(mem_usage[process_instance]) + else: + logging.error("No instance in the ldap cluster, get ldap {0} memory usage failed.".format(process_instance)) + return None + + + def ldap_cluster_list(self): + process_instances = self.ldap_process_instance() + uptime = time() + for i in range(len(process_instances)): + state = self.ldap_node_state(process_instances[i]) + if state: + uptime = self.ldap_uptime(process_instances[i]) + break + else: + continue + + node_info = [] + for i in range(len(process_instances)): + node_info.append(self.ldap_node_detail(process_instances[i])) + + cluster_info = { + "ldap_cluster_state" : self.ldap_cluster_state()[0], + "ldap_total_nodes" : float(len(self.ip_list())), + "ldap_healthy_nodes" : self.ldap_cluster_state()[1], + "ldap_uptime" : time() - uptime, + "ldap_nodes_info": node_info + } + return cluster_info + + def ldap_node_detail(self, process_instance): + if not self.ldap_node_state(process_instance): + node_info = { + "ldap_node_state" : 0.0, + "ldap_uptime" : 0.0, + "ldap_cpu_usage" : 0.0, + "ldap_mem_usage" : 0.0, + "ldap_url" : None + } + else: + node_info = { + "ldap_node_state" : self.ldap_node_state(process_instance), + "ldap_uptime" : time() - self.ldap_uptime(process_instance), + "ldap_cpu_usage" : self.ldap_cpu_usage(process_instance), + "ldap_mem_usage" : self.ldap_mem_usage(process_instance), + "ldap_url" : 'http://{0}/dashboard/db/ldap-dashboard-for-prometheus?orgId=1&var-instance={1}'.format(utils.grafana_url(), process_instance) + } + return node_info + + +def main(): + ldap = LdapMetrics() + ldap.ldap_cluster_list() + +if __name__ == '__main__': + main() + diff --git a/metrics/monitor.py b/metrics/monitor.py new file mode 100644 index 0000000..3b05f14 --- /dev/null +++ b/metrics/monitor.py @@ -0,0 +1,65 @@ +# monitor.py +#!/usr/bin/python +#-*- coding:utf-8 -*- + +import os +import sys +import re +import time +import requests +sys.path.append('..') +from myapp.parse import ParseUtil +# from . import monitor_params + +import utils +from consul_metrics import ConsulMetrics +from nginx_metrics import NginxMetrics +from tomcat_metrics import TomcatMetrics +from prometheus_metrics import PrometheusMetrics +from grafana_metrics import GrafanaMetrics +from mysql_metrics import MysqlMetrics +from keycloak_metrics import KeycloakMetrics +from knox_metrics import KnoxMetrics +from ambari_server_metrics import AmbariServerMetrics +from ambari_agent_metrics import AmbariAgentMetrics +from ldap_metrics import LdapMetrics + + +def monitor_metrics(): + + consul = ConsulMetrics() + nginx = NginxMetrics() + tomcat = TomcatMetrics() + prometheus = PrometheusMetrics() + grafana = GrafanaMetrics() + mysql = MysqlMetrics() + keycloak = KeycloakMetrics() + knox = KnoxMetrics() + ambari_server = AmbariServerMetrics() + ambari_agent = AmbariAgentMetrics() + ldap = LdapMetrics() + + result = [] + metric_info = { + "consul_info": consul.consul_cluster_list(), + "nginx_info" : nginx.nginx_cluster_list(), + "tomcat_info" : tomcat.tomcat_cluster_list(), + "prometheus_info" : prometheus.prometheus_cluster_list(), + "grafana_info" : grafana.grafana_cluster_list(), + "mysql_info" : mysql.mysql_cluster_list(), + "keycloak_info" : keycloak.keycloak_cluster_list(), + "knox_info" : knox.knox_cluster_list(), + "ambari_server_info" : ambari_server.ambari_server_cluster_list(), + "ambari_agent_info" : ambari_agent.ambari_agent_cluster_list(), + "ldap_info" : ldap.ldap_cluster_list() + } + result.append(metric_info) + + return result + + +def main(): + result = monitor_metrics() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/metrics/monitor_params.py b/metrics/monitor_params.py new file mode 100644 index 0000000..056f2a0 --- /dev/null +++ b/metrics/monitor_params.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python2 +#-*- coding: utf-8 -*- + +# consul_ip can be set to several values, seperated by ",". +# consul +consul_ip = "172.16.1.47, 172.16.1.33, 172.16.1.19" +consul_port = "8500" +consul_exporter_port = "9107" + +# nginx +# nginx_ip:nginx_stab_port running nginx stab +# nginx_ip:nginx_exporter_port running nginx_exporter +# nginx_ip:nginxlog_exporter_port running nginxlog_exporter +nginx_ip = "172.16.1.47" +nginx_stab_port = "7654" +nginx_exporter_port = "9113" +nginxlog_exporter_port = "4040" +# nginx_instance = "10.110.13.67:9113, 10.110.13.67:9113" + +# tomcat +tomcat_ip = "172.16.1.47, 172.16.1.33" +tomcat_master_port = "9011" +tomcat_tenant_port = "9021" + +# prometheus +prometheus_virtual_ip = "172.16.1.43" +prometheus_ip = "172.16.1.47, 172.16.1.33" +prometheus_outside_ip = "10.10.6.206, 10.10.6.211" +prometheus_port = "9500" + +# grafana version should be v4.6.3 or later +grafana_ip = "172.16.1.47, 172.16.1.33" +grafana_outside_ip = "10.10.6.206, 10.10.6.211" +grafana_port = "3000" +# + +# mysql +mysql_ip = "172.16.1.47, 172.16.1.33" +mysql_exporter_port = "9104" + +# process_status_exporter +process_exporter_ip = "172.16.1.47, 172.16.1.33, 172.16.1.19" +process_exporter_port = "9108" + +# keycloak +keycloak_ip = "172.16.1.47" +keycloak_port = "9110" + +# knox +knox_ip = "172.16.1.47" + +# ambari-server +ambari_server_ip = "172.16.1.47" + +# ambari-agent +ambari_agent_ip = "172.16.1.47, 172.16.1.33, 172.16.1.19" + +# ldap +ldap_ip = "172.16.1.47, 172.16.1.33" \ No newline at end of file diff --git a/metrics/mysql_metrics.py b/metrics/mysql_metrics.py new file mode 100644 index 0000000..084b157 --- /dev/null +++ b/metrics/mysql_metrics.py @@ -0,0 +1,209 @@ +#!/usr/bin/python +#-*- coding:utf-8 -*- +import os, sys +import re + +import requests +import argparse +import logging +import json + +import monitor_params +from time import time +import utils +sys.path.append("..") +from myapp.parse import ParseUtil + +''' + Scrape mysqld metrics from mysqld_exporter. +''' + +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S') +import sys +logger = logging.getLogger(sys.path[0] + 'mysql_metrics') + +class MysqlMetrics(object): + + def __init__(self): + pass + + + def ip_list(self): + ''' + return mysql_ip list + ''' + ip_list = [] + try: + list = re.split(r'[,\s]\s*', monitor_params.mysql_ip.strip()) + except: + logging.error("Can't split mysql_ip. Check the mysql_ip in monitor_params.py.") + sys.exit(1) + else: + ip_list = list + return ip_list + + def instance_info(self): + ''' + @return a list of mysqld_exporter instances. + ''' + instance_list = utils.get_instances(monitor_params.mysql_ip, monitor_params.mysql_exporter_port) + return instance_list + + def mysql_process_instance(self): + ''' + @return list of mysql process instances. + ''' + process_instance = utils.get_instances(monitor_params.mysql_ip, monitor_params.process_exporter_port) + return process_instance + + def mysql_cluster_state(self): + ''' + @return mysql cluster state and the numbers of healthy nodes. + ''' + process_instances = self.mysql_process_instance() + state = 0.0 + success_count = 0.0 + + for i in range(len(process_instances)): + mysql_up = self.mysql_node_state(process_instances[i]) + if mysql_up: + success_count +=1 + else: + continue + if success_count == len(process_instances): + state = 1.0 + logging.info("mysql state is %s" % (state)) + return [state, success_count] + + def mysql_node_state(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + state = {} + url = utils.prometheus_url() + param = { + "query": 'mysqld_process_up{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + state.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if state.has_key(process_instance): + print float(state[process_instance]) + return float(state[process_instance]) + else: + logging.error("No instance in the mysql cluster, mysql node {0} down.".format(process_instance)) + return 0.0 + + def mysql_cpu_usage(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + cpu_usage = {} + url = utils.prometheus_url() + param = { + "query": 'mysqld_cpu_percentage{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + # pprint(response) + for i in range(len(response)): + cpu_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if cpu_usage.has_key(process_instance): + print float(cpu_usage[process_instance]) + return float(cpu_usage[process_instance]) + else: + logging.error("No instance in the mysql cluster, get mysql node {0} cpu usage failed.".format(process_instance)) + return None + + def mysql_uptime(self, process_instance): + ''' + @return a float value of create time, indicating the node state up or down. + ''' + uptime = {} + url = utils.prometheus_url() + param = { + "query": 'mysqld_running_time_seconds_total{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + uptime.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if uptime.has_key(process_instance): + print float(uptime[process_instance]) + return float(uptime[process_instance]) + else: + logging.error("No instance in the mysql cluster, get mysql node {0} uptime failed.".format(process_instance)) + return None + + def mysql_mem_usage(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + mem_usage = {} + url = utils.prometheus_url() + param = { + "query": 'sum by (instance)(mysqld_memory_usage_bytes_total{{instance="{0}", mode=~"rss|vms|shared"}})'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + mem_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if mem_usage.has_key(process_instance): + print float(mem_usage[process_instance]) + return float(mem_usage[process_instance]) + else: + logging.error("No instance in the mysql cluster, get mysql node {0} memory usage failed.".format(process_instance)) + return None + + + def mysql_cluster_list(self): + process_instances = self.mysql_process_instance() + instances = self.instance_info() + uptime = time() + for i in range(len(process_instances)): + state = self.mysql_node_state(process_instances[i]) + if state: + uptime = self.mysql_uptime(process_instances[i]) + break + else: + continue + + node_info = [] + for i in range(len(instances)): + node_info.append(self.mysql_node_detail(process_instances[i], instances[i])) + + cluster_info = { + "mysql_cluster_state" : self.mysql_cluster_state()[0], + "mysql_total_nodes" : float(len(process_instances)), + "mysql_healthy_nodes" : self.mysql_cluster_state()[1], + "mysql_uptime" : time() - uptime, + "mysql_nodes_info": node_info + } + return cluster_info + + def mysql_node_detail(self, process_instance, instance): + if not self.mysql_node_state(process_instance): + node_info = { + "mysql_node_state" : 0.0, + "mysql_uptime" : 0.0, + "mysql_cpu_usage" : 0.0, + "mysql_mem_usage" : 0.0, + "mysql_url" : None + } + else: + node_info = { + "mysql_node_state" : self.mysql_node_state(process_instance), + "mysql_uptime" : time() - self.mysql_uptime(process_instance), + "mysql_cpu_usage" : self.mysql_cpu_usage(process_instance), + "mysql_mem_usage" : self.mysql_mem_usage(process_instance), + "mysql_url" : 'http://{0}/dashboard/db/mysql-dashboard-for-prometheus?orgId=1&var-instance={1}'.format(utils.grafana_url(), instance) + } + return node_info + + +def main(): + mysql = MysqlMetrics() + mysql.mysql_cluster_list() + +if __name__ == '__main__': + main() + diff --git a/metrics/nginx_metrics.py b/metrics/nginx_metrics.py new file mode 100644 index 0000000..89bf07b --- /dev/null +++ b/metrics/nginx_metrics.py @@ -0,0 +1,211 @@ +#!/usr/bin/python +#-*- coding:utf-8 -*- +import os, sys +import re +import time +import requests +import argparse +import logging +import json + +import monitor_params +import utils +sys.path.append("..") +from myapp.parse import ParseUtil +from time import time + +''' + Scrape nginx metrics from nginx_exporter. +''' + +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S') +import sys +logger = logging.getLogger(sys.path[0] + 'nginx_metrics') + +class NginxMetrics(object): + + def __init__(self): + pass + + def ip_list(self): + ''' + return nginx_ip list + ''' + ip_list = [] + try: + list = re.split(r'[,\s]\s*', monitor_params.nginx_ip.strip()) + except: + logging.error("Can't split nginx_ip. Check the nginx_ip in monitor_params.py.") + sys.exit(1) + else: + ip_list = list + return ip_list + + def nginx_cluster_state(self): + ''' + @return the state of the cluster, including cluster state and healthy nodes in the cluster. + ''' + process_instances = self.nginx_process_instance() + state = 0.0 + success_count = 0.0 + + for i in range(len(process_instances)): + nginx_up = self.nginx_node_state(process_instances[i]) + if nginx_up: + success_count +=1 + else: + continue + if success_count >= 1: + state = 1.0 + logging.info("nginx state is %s" % (state)) + return [state, success_count] + + def instance_info(self): + instance_list = utils.get_instances(monitor_params.nginx_ip, monitor_params.nginx_exporter_port) + return instance_list + + def nginx_process_instance(self): + ''' + @return list of nginx instances. + ''' + process_instances = utils.get_instances(monitor_params.nginx_ip, monitor_params.process_exporter_port) + return process_instances + + def nginx_node_state(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + state = {} + url = utils.prometheus_url() + param = { + "query": 'nginx_process_up{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + state.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if state.has_key(process_instance): + print float(state[process_instance]) + return float(state[process_instance]) + else: + logging.error("No instance in the nginx cluster, nginx node {0} down.".format(process_instance)) + return 0.0 + + + def nginx_cpu_usage(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + cpu_usage = {} + url = utils.prometheus_url() + param = { + "query": 'nginx_cpu_percentage{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + # pprint(response) + for i in range(len(response)): + cpu_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if cpu_usage.has_key(process_instance): + print float(cpu_usage[process_instance]) + return float(cpu_usage[process_instance]) + else: + logging.error("No instance in the nginx cluster, get {0} cpu usage error.".format(process_instance)) + return None + + def nginx_uptime(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + uptime = {} + url = utils.prometheus_url() + param = { + "query": 'nginx_running_time_seconds_total{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + # pprint(response) + for i in range(len(response)): + uptime.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if uptime.has_key(process_instance): + print float(uptime[process_instance]) + return float(uptime[process_instance]) + else: + logging.error("No instance in the nginx cluster, get {0} uptime error.".format(process_instance)) + return None + + def nginx_mem_usage(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + mem_usage = {} + url = utils.prometheus_url() + param = { + "query": 'sum by (instance)(nginx_memory_usage_bytes_total{{instance="{0}", mode=~"rss|vms|shared"}})'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + # pprint(response) + for i in range(len(response)): + mem_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if mem_usage.has_key(process_instance): + print float(mem_usage[process_instance]) + return float(mem_usage[process_instance]) + else: + logging.error("No instance in the nginx cluster, get {0} memory usage error.".format(process_instance)) + return None + + + def nginx_cluster_list(self): + process_instances = self.nginx_process_instance() + instances = self.instance_info() + uptime = time() + for i in range(len(process_instances)): + state = self.nginx_node_state(process_instances[i]) + if state: + uptime = self.nginx_uptime(process_instances[i]) + break + else: + continue + + node_info = [] + for i in range(len(instances)): + node_info.append(self.nginx_node_detail(process_instances[i], instances[i])) + + cluster_info = { + "nginx_cluster_state" : self.nginx_cluster_state()[0], + "nginx_total_nodes" : float(len(self.ip_list())), + "nginx_healthy_nodes" : self.nginx_cluster_state()[1], + "nginx_uptime" : time() - uptime, + "nginx_nodes_info": node_info + } + # pprint(cluster_info) + return cluster_info + + def nginx_node_detail(self, process_instance, instance): + ''' + For this time, I didn't install nginx_exporter or nginxlog_exporter, so in nginx_url only process_instance in use. + ''' + if not self.nginx_node_state(process_instance): + node_info = { + "nginx_node_state" : 0.0, + "nginx_uptime" : 0.0, + "nginx_cpu_usage" : 0.0, + "nginx_mem_usage" : 0.0, + "nginx_url" : None + } + else: + node_info = { + "nginx_node_state" : self.nginx_node_state(process_instance), + "nginx_uptime" : time() - self.nginx_uptime(process_instance), + "nginx_cpu_usage" : self.nginx_cpu_usage(process_instance), + "nginx_mem_usage" : self.nginx_mem_usage(process_instance), + "nginx_url" : 'http://{0}/dashboard/db/nginx-dashboard-for-prometheus?orgId=1&var-instance={1}'.format(utils.grafana_url(), process_instance) + } + return node_info + +def main(): + nginx = NginxMetrics() + nginx.nginx_cluster_list() + + +if __name__ == '__main__': + main() diff --git a/metrics/process_info.txt b/metrics/process_info.txt new file mode 100644 index 0000000..ecade72 --- /dev/null +++ b/metrics/process_info.txt @@ -0,0 +1,50 @@ +# process_info.txt +# list process name to monitor +# PID can be found in /run/ or /var/run/ directory if the program got an pid file +# Otherwise, PID can be found in "/sys/fs/cgroup/systemd/system.slice/SERVICE_NAME.service/cgroup.procs" +# if the program was setup by systemdctl + +# /run/ambari-server/ambari-server.pid +# /sys/fs/cgroup/systemd/system.slice/ambari-server.service/cgroup.procs +ambari-server + +# /run/ambari-agent/ambari-server.pid +# /sys/fs/cgroup/systemd/system.slice/ambari-agent.service/cgroup.procs +ambari-agent + +# install outside the cluster as ambari SSO +# /sys/fs/cgroup/systemd/system.slice/knox.service/cgroup.procs +knox + +# /sys/fs/cgroup/systemd/system.slice/keycloak.service/cgroup.procs +keycloak + +# /sys/fs/cgroup/systemd/system.slice/tomcat-master.service/cgroup.procs +tomcat-master + +# /sys/fs/cgroup/systemd/system.slice/tomcat-tenant.service/cgroup.procs +tomcat-tenant + +# /run/nginx.pid +# /sys/fs/cgroup/systemd/system.slice/nginx.service/cgroup.procs +# serveral PIDs written in the cgroup.procs, should pick only the master PID. +nginx + +# /run/consul.pid +# /sys/fs/cgroup/systemd/system.slice/consul.service/cgroup.procs +consul + +# /run/mysqld/mysqld.pid +# /sys/fs/cgroup/systemd/system.slice/mysqld.service/cgroup.procs +mysqld + +# /run/openldap/slapd.pid +# /sys/fs/cgroup/systemd/system.slice/slapd.service/cgroup.procs +slapd + +# /sys/fs/cgroup/systemd/system.slice/prometheus.service +prometheus + +# /run/grafana/grafana-server.pid +# /sys/fs/cgroup/systemd/system.slice/grafana-server.service/cgroup.procs +grafana-server \ No newline at end of file diff --git a/metrics/process_status_exporter.py b/metrics/process_status_exporter.py new file mode 100644 index 0000000..a61f169 --- /dev/null +++ b/metrics/process_status_exporter.py @@ -0,0 +1,241 @@ +#!/usr/bin/python +#-*- coding:utf-8 -*- + +import os +import sys +import re +import time +import argparse +import logging +import psutil + +from subprocess import Popen, PIPE +from prometheus_client import start_http_server +from prometheus_client.core import GaugeMetricFamily, REGISTRY + +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S') + +logger = logging.getLogger(sys.path[0] + 'process_status_exporter') + +class ProcessStatusCollector(object): + """collect process status""" + def __init__(self): + pass + + def collect(self): + # The metrics we want to export. + process_info = get_process_name() + for process_name in process_info: + print 'process_name = %s ' % (process_name) + pid = get_pid(process_name) + print 'PID = %s ' % (pid) + snake_case = re.sub('[-|\s]', r'_', process_name).lower() + + process_metrics = get_process_metrics(pid, process_name) + if process_metrics: + yield GaugeMetricFamily(snake_case + '_process_up', + snake_case + ' Process Up or Down (1 for up, 0 for down).', + value = process_state(pid)) + yield GaugeMetricFamily(snake_case + '_running_time_seconds_total', + snake_case + ' Total Running time in seconds.', + value = process_metrics['create_time']) + yield GaugeMetricFamily(snake_case + '_cpu_percentage', + snake_case + ' CPU Percentage.', + value = process_metrics['cpu_percent']) + yield GaugeMetricFamily(snake_case + '_opened_fds_number', + snake_case + ' Total Number of Opened fds.', + value = process_metrics['num_fds']) + yield GaugeMetricFamily(snake_case + '_threads_number', + snake_case + ' Total Number of Threads.', + value = process_metrics['num_threads']) + yield GaugeMetricFamily(snake_case + '_opened_files_number', + snake_case + ' Total Number of opened files.', + value = process_metrics['open_files']) + + # OrderedDict([('read_count', 49566766), ('write_count', 29218473), ('read_bytes', 269357056), ('write_bytes', 4311138304), ('read_chars', 3803556911), ('write_chars', 7421457950)]) + io_counter = GaugeMetricFamily(snake_case + '_io_counters', + snake_case + ' IO counters.', + labels = ['type']) + + for key in process_metrics['io_counters'].keys(): + io_counter.add_metric([key], process_metrics['io_counters'][key]) + yield io_counter + + # pcputimes(user=10388.04, system=5887.69, children_user=0.0, children_system=0.0) + cpu_times = GaugeMetricFamily(snake_case + '_cpu_time_seconds_total', + snake_case + ' Total CPU time in seconds.', + labels = ['mode']) + for key in process_metrics['cpu_times'].keys(): + cpu_times.add_metric([key], process_metrics['cpu_times'][key]) + yield cpu_times + + # pfullmem(rss=29552640, vms=84586496, shared=12046336, text=11493376, lib=0, data=25661440, dirty=0, uss=30842880, pss=30842880, swap=0) + memory_info = GaugeMetricFamily(snake_case + '_memory_usage_bytes_total', + snake_case + ' Memory Usage by each mode.', + labels = ['mode']) + for key in process_metrics['memory_info'].keys(): + memory_info.add_metric([key], process_metrics['memory_info'][key]) + yield memory_info + + else: + pass + +def get_process_name(): + # select process_name from process_info.txt + try: + with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'process_info.txt')) as file: + for process_name in file: + if process_name.startswith('#') or process_name.strip() == '': + continue + else: + yield process_name.rstrip() + except IOError: + logging.error("open files error, please check!") + pass + +def get_pid(process_name): + ''' + This function is able to get PID by a given process name. Usually PID can be found in a .pid file + located in /run/ directory, or in /run/service_dir/ directory. If no pid file exist in the directories + mentioned above, and if the program was setup by systemctl, PID can still be found in a cgroup.procs + file, which usually located in /sys/fs/cgroup/systemd/system.slice/process_name.service/ directory. + + @process_name: read process_name from process_info.txt file. + @return: return a PID(string type) of the given process_name. + ''' + if os.path.exists(r'/run/{0}/{0}.pid'.format(process_name)): + logging.info("/run/{0}/{0}.pid file exist, read PID.".format(process_name)) + with open("/run/{0}/{0}.pid".format(process_name), "r") as f: + data = f.readline().rstrip() + return data + elif os.path.exists(r'/run/{0}.pid'.format(process_name)): + logging.info("/run/{0}/{0}.pid file not exist, /run/{0}.pid file exist, read PID.".format(process_name)) + with open("/run/{0}.pid".format(process_name), "r") as f: + data = f.readline().rstrip() + return data + elif os.path.exists(r'/sys/fs/cgroup/systemd/system.slice/{0}.service/cgroup.procs'.format(process_name)): + logging.info("no pid file in /run/ directory. .../{0}.service/cgroup.procs file exist, read PID.".format(process_name)) + with open("/sys/fs/cgroup/systemd/system.slice/{0}.service/cgroup.procs".format(process_name), "r") as f: + data = f.readline().rstrip() + return data + elif "knox-server" in process_name and os.path.exists(r'/run/knox/gateway.pid'): + logging.info("/run/knox/gateway.pid file exist, read PID.") + with open("/run/knox/gateway.pid", "r") as f: + data = f.readline().rstrip() + return data + else: + logging.error("Cann't find {0} PID file, error happened, please check why PID file not exist.".format(process_name)) + return None + +def get_process_metrics(pid, process_name): + ''' + This function is setup to scrape process metrics, such as CPU, Mem, uptime, status and so on. + All the metrics can be scraped in the /proc/pid/stat file. However, here I uesed a third-party liberary "psutil" + to get all data I need. + @pid: PID get by get_pid() function. + @return: return a dict of all metrics I need. + ''' + process_metrics = {} + try: + process = psutil.Process(int(pid)) + process_metrics = { + "create_time" : process.create_time(), + "io_counters" : process.io_counters()._asdict(), + "cpu_times" : process.cpu_times()._asdict(), + "cpu_percent" : process.cpu_percent(), + "memory_info" : process.memory_full_info()._asdict(), + "num_fds" : process.num_fds(), + "num_threads" : process.num_threads(), + "open_files" : len(process.open_files()) + } + except: + logging.error("No pid {0} found, please check ! ".format(process_name)) + pass + return process_metrics + + +def process_state(pid): + # Linux + state = 0 + output = Popen(['ps aux | grep -i "' + pid + '" | grep -v grep'], + stdout=PIPE, + shell=True) + result = output.stdout.readlines() + if len(result) >= 1: + state = 1 + elif len(result) == 0: + logging.error('process {0} down, please check!'.format(pid)) + state = 0 + else: + state = 0 + print 'state = %s' % (state) + return float(state) + + +def _parse_stat_file(pid): + """Parse /proc/{pid}/stat file. Return a list of fields where + process name is in position 0. + Using "man proc" as a reference: where "man proc" refers to + position N, always substract 2 (e.g starttime pos 22 in + 'man proc' == pos 20 in the list returned here). + The return value is cached in case oneshot() ctx manager is + in use. + """ + with open("/proc/{0}/stat".format(pid), "rb") as f: + data = f.read() + # Process name is between parentheses. It can contain spaces and + # other parentheses. This is taken into account by looking for + # the first occurrence of "(" and the last occurence of ")". + rpar = data.rfind(b')') + name = data[data.find(b'(') + 1:rpar] + others = data[rpar + 2:].split() + return [name] + others + +def parse_args(): + parser = argparse.ArgumentParser( + description='process status exporter args, including address and port' + ) + parser.add_argument( + '--telemetry-path', + metavar='telemetry_path', + required=False, + help='Path under which to expose metrics. (default "/metrics")', + default='/metrics' + ) + parser.add_argument( + '--address', + metavar='address', + required=False, + help='Running on this address. (default "127.0.0.1")', + default='127.0.0.1' + + ) + parser.add_argument( + '-p', '--port', + metavar='port', + required=False, + type=int, + help='Listen to this port. (default ":9108")', + default=int(os.environ.get('VIRTUAL_PORT', '9108')) + ) + return parser.parse_args() + + +def main(): + try: + args = parse_args() + port = int(args.port) + REGISTRY.register(ProcessStatusCollector()) + start_http_server(port) + print "Polling %s. Serving at port: %s" % (args.address, port) + while True: + time.sleep(5) + + except KeyboardInterrupt: + print "Interrupted" + sys.exit(0) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/metrics/prometheus_metrics.py b/metrics/prometheus_metrics.py new file mode 100644 index 0000000..acbfbcb --- /dev/null +++ b/metrics/prometheus_metrics.py @@ -0,0 +1,218 @@ +#!/usr/bin/python +#-*- coding:utf-8 -*- +import os, sys +import re + +import requests +import argparse +import logging +import json + +import monitor_params +from time import time +import utils +sys.path.append("..") +from myapp.parse import ParseUtil +from time import time + + +''' + Scrape prometheus metrics from Prometheus. +''' + +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S') +import sys +logger = logging.getLogger(sys.path[0] + 'nginx_metrics') + +class PrometheusMetrics(object): + + def __init__(self): + pass + + + def ip_list(self): + ''' + return tomcat_ip list + ''' + ip_list = [] + try: + list = re.split(r'[,\s]\s*', monitor_params.prometheus_ip.strip()) + except: + logging.error("Can't split prometheus_ip. Check the prometheus_ip in monitor_params.py.") + sys.exit(1) + else: + ip_list = list + + print ip_list + return ip_list + + def instance_info(self): + ''' + @return prometheus instance. + ''' + instance_list = utils.get_instances(monitor_params.prometheus_ip, monitor_params.prometheus_port) + return instance_list + + def prometheus_process_instance(self): + ''' + @return a list of prometheus process instances. + ''' + process_instance = utils.get_instances(monitor_params.prometheus_ip, monitor_params.process_exporter_port) + return process_instance + + def prometheus_cluster_state(self): + ''' + @return prometheus cluster state and numbers of healthy nodes. + ''' + process_instances = self.prometheus_process_instance() + state = 0.0 + success_count = 0.0 + + for i in range(len(process_instances)): + prometheus_up = self.prometheus_node_state(process_instances[i]) + if prometheus_up: + success_count +=1 + else: + continue + if success_count >= 1: + state = 1.0 + logging.debug("prometheus state is %s" % (state)) + return [state, success_count] + + def prometheus_node_state(self, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + state = {} + url = utils.prometheus_url() + param = { + "query": 'prometheus_process_up{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + state.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if state.has_key(process_instance): + print float(state[process_instance]) + return float(state[process_instance]) + else: + logging.error("No instance in the prometheus cluster, prometheus node {0} down.".format(process_instance)) + return 0.0 + + + def prometheus_cpu_usage(self, process_instance): + ''' + @return process_instance cpu usage. + ''' + cpu_usage = {} + url = utils.prometheus_url() + param = { + "query": 'prometheus_cpu_percentage{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + cpu_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if cpu_usage.has_key(process_instance): + print float(cpu_usage[process_instance]) + return float(cpu_usage[process_instance]) + else: + logging.error("No instance in the prometheus cluster, get prometheus node {0} cpu usage failed.".format(process_instance)) + return None + + def prometheus_uptime(self, process_instance): + ''' + @return process_instance create time. + ''' + uptime = {} + url = utils.prometheus_url() + param = { + "query": 'prometheus_running_time_seconds_total{{instance="{0}"}}'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + uptime.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if uptime.has_key(process_instance): + print float(uptime[process_instance]) + return float(uptime[process_instance]) + else: + logging.error("No instance in the prometheus cluster, get prometheus node {0} uptime failed.".format(process_instance)) + return None + + def prometheus_mem_usage(self, process_instance): + ''' + @return process_instance memory usage. + ''' + mem_usage = {} + url = utils.prometheus_url() + param = { + "query": 'sum by (instance)(prometheus_memory_usage_bytes_total{{instance="{0}", mode=~"rss|vms|shared"}})'.format(process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + mem_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if mem_usage.has_key(process_instance): + print float(mem_usage[process_instance]) + return float(mem_usage[process_instance]) + else: + logging.error("No instance in the prometheus cluster, get prometheus node {0} memory usage failed.".format(process_instance)) + return None + + + def prometheus_cluster_list(self): + process_instances = self.prometheus_process_instance() + grafana_instances = self.grafana_instance() + uptime = time() + for i in range(len(process_instances)): + state = self.prometheus_node_state(process_instances[i]) + if state: + uptime = self.prometheus_uptime(process_instances[i]) + break + else: + continue + + node_info = [] + for i in range(len(process_instances)): + node_info.append(self.prometheus_node_detail(process_instances[i], grafana_instances[i])) + + cluster_info = { + "prometheus_cluster_state" : self.prometheus_cluster_state()[0], + "prometheus_total_nodes" : float(len(self.ip_list())), + "prometheus_healthy_nodes" : self.prometheus_cluster_state()[1], + "prometheus_uptime" : time() - uptime, + "prometheus_nodes_info": node_info + } + return cluster_info + + def grafana_instance(self): + instance_list = utils.get_instances(monitor_params.grafana_outside_ip, monitor_params.grafana_port) + return instance_list + + def prometheus_node_detail(self, process_instance, grafana_instance): + if not self.prometheus_node_state(process_instance): + node_info = { + "prometheus_node_state" : 0.0, + "prometheus_uptime" : 0.0, + "prometheus_cpu_usage" : 0.0, + "prometheus_mem_usage" : 0.0, + "prometheus_url" : None + } + else: + node_info = { + "prometheus_node_state" : self.prometheus_node_state(process_instance), + "prometheus_uptime" : time() - self.prometheus_uptime(process_instance), + "prometheus_cpu_usage" : self.prometheus_cpu_usage(process_instance), + "prometheus_mem_usage" : self.prometheus_mem_usage(process_instance), + "prometheus_url" : 'http://{0}/dashboard/db/prometheus-stats?orgId=1'.format(grafana_instance) + } + return node_info + + +def main(): + prometheus = PrometheusMetrics() + prometheus.prometheus_cluster_list() + + +if __name__ == '__main__': + main() + diff --git a/metrics/tomcat_metrics.py b/metrics/tomcat_metrics.py new file mode 100644 index 0000000..c6c82d8 --- /dev/null +++ b/metrics/tomcat_metrics.py @@ -0,0 +1,232 @@ +#!/usr/bin/python +#-*- coding:utf-8 -*- +import os, sys +import re +import time +import requests +import argparse +import logging +import json + +import monitor_params +import utils +sys.path.append("..") +from myapp.parse import ParseUtil +from time import time + + +''' + Scrape tomcat metrics from tomcat_exporter. +''' + +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S') +import sys +logger = logging.getLogger(sys.path[0] + 'tomcat_metrics') + +class TomcatMetrics(object): + + def __init__(self): + pass + + + def ip_list(self): + ''' + return tomcat_ip list + ''' + ip_list = [] + try: + list = re.split(r'[,\s]\s*', monitor_params.tomcat_ip.strip()) + except: + logging.error("Can't split tomcat_ip. Check the tomcat_ip in monitor_params.py.") + sys.exit(1) + else: + ip_list = list + return ip_list + + def master_instance(self): + ''' + @return tomcat instance, include tomcat_ip:master_port + ''' + master_instance = utils.get_instances(monitor_params.tomcat_ip, monitor_params.tomcat_master_port) + return master_instance + + def tenant_instance(self): + ''' + @return tomcat instance, include tomcat_ip:tenant_port + ''' + tenant_instance = utils.get_instances(monitor_params.tomcat_ip, monitor_params.tomcat_tenant_port) + return tenant_instance + + def tomcat_process_instance(self): + ''' + @return list of tomcat process instances. + ''' + process_instances = utils.get_instances(monitor_params.tomcat_ip, monitor_params.process_exporter_port) + return process_instances + + def tomcat_node_state(self, role, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + state = {} + url = utils.prometheus_url() + param = { + "query": 'tomcat_{0}_process_up{{instance="{1}"}}'.format(role, process_instance) + } + response = ParseUtil.request_metrics(url, param) + for i in range(len(response)): + state.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if state.has_key(process_instance): + print float(state[process_instance]) + return float(state[process_instance]) + else: + logging.error("No instance in the tomcat cluster, tomcat {0} node {1} down.".format(role, process_instance)) + return 0.0 + + def tomcat_cluster_state(self): + ''' + @return tomcat cluster state, and the numbers of healthy nodes. + ''' + master_instances = self.master_instance() + tenant_instances = self.tenant_instance() + process_instances = self.tomcat_process_instance() + state = 0.0 + success_count = 0.0 + master_count = 0.0 + tenant_count = 0.0 + + for i in range(len(process_instances)): + master_up = self.tomcat_node_state("master", process_instances[i]) + tenant_up = self.tomcat_node_state("tenant", process_instances[i]) + if master_up: + master_count += 1 + if tenant_up: + tenant_count += 1 + success_count = master_count + tenant_count + if master_count >= 1 and tenant_count >=1: + state = 1.0 + logging.info("tomcat state is %s" % (state)) + return [state, success_count] + + def tomcat_cpu_usage(self, role, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + cpu_usage = {} + url = utils.prometheus_url() + param = { + "query": 'tomcat_{0}_cpu_percentage{{instance="{1}"}}'.format(role, process_instance) + } + response = ParseUtil.request_metrics(url, param) + # pprint(response) + for i in range(len(response)): + cpu_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if cpu_usage.has_key(process_instance): + print float(cpu_usage[process_instance]) + return float(cpu_usage[process_instance]) + else: + logging.error("No instance in the tomcat cluster, get tomcat {0} node {1} cpu usage failed.".format(role, process_instance)) + return None + + def tomcat_uptime(self, role, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + uptime = {} + url = utils.prometheus_url() + param = { + "query": 'tomcat_{0}_running_time_seconds_total{{instance="{1}"}}'.format(role, process_instance) + } + response = ParseUtil.request_metrics(url, param) + # pprint(response) + for i in range(len(response)): + uptime.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if uptime.has_key(process_instance): + print float(uptime[process_instance]) + return float(uptime[process_instance]) + else: + logging.error("No instance in the tomcat cluster, get tomcat {0} node {1} uptime failed.".format(role, process_instance)) + return None + + def tomcat_mem_usage(self, role, process_instance): + ''' + @return a float value 1 or 0, indicating the node state up or down. + ''' + mem_usage = {} + url = utils.prometheus_url() + param = { + "query": 'sum by (instance)(tomcat_{0}_memory_usage_bytes_total{{instance="{1}", mode=~"rss|vms|shared"}})'.format(role, process_instance) + } + response = ParseUtil.request_metrics(url, param) + # pprint(response) + for i in range(len(response)): + mem_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if mem_usage.has_key(process_instance): + print float(mem_usage[process_instance]) + return float(mem_usage[process_instance]) + else: + logging.error("No instance in the tomcat cluster, get tomcat {0} node {1} memory usage failed.".format(role, process_instance)) + return None + + def tomcat_cluster_list(self): + master_instances = self.master_instance() + tenant_instances = self.tenant_instance() + process_instances = self.tomcat_process_instance() + ip_list = self.ip_list() + uptime = time() + for i in range(len(process_instances)): + master_state = self.tomcat_node_state("master", process_instances[i]) + tenant_state = self.tomcat_node_state("tenant", process_instances[i]) + if master_state: + uptime = self.tomcat_uptime("master", process_instances[i]) + break + elif tenant_state: + uptime = self.tomcat_uptime("tenant", process_instances[i]) + break + else: + continue + + master_info = [] + tenant_info = [] + for i in range(len(master_instances)): + master_info.append(self.tomcat_node_detail("master", process_instances[i], master_instances[i])) + tenant_info.append(self.tomcat_node_detail("tenant", process_instances[i], tenant_instances[i])) + + cluster_info = { + "tomcat_cluster_state" : self.tomcat_cluster_state()[0], + "tomcat_total_nodes" : float(sum([len(master_instances), len(tenant_instances)])), + "tomcat_healthy_nodes" : self.tomcat_cluster_state()[1], + "tomcat_uptime" : time() - uptime, + "tomcat_master_info": master_info, + "tomcat_tenant_info": tenant_info + } + return cluster_info + + def tomcat_node_detail(self, role, process_instance, role_instance): + if not self.tomcat_node_state(role, process_instance): + node_info = { + "tomcat_{0}_state".format(role) : 0.0, + "tomcat_{0}_uptime".format(role) : 0.0, + "tomcat_{0}_cpu_usage".format(role) : 0.0, + "tomcat_{0}_mem_usage".format(role) : 0.0, + "tomcat_{0}_url".format(role) : None + } + else: + node_info = { + "tomcat_{0}_node_state".format(role) : self.tomcat_node_state(role, process_instance), + "tomcat_{0}_uptime".format(role) : time() - self.tomcat_uptime(role, process_instance), + "tomcat_{0}_cpu_usage".format(role) : self.tomcat_cpu_usage(role, process_instance), + "tomcat_{0}_mem_usage".format(role) : self.tomcat_mem_usage(role, process_instance), + "tomcat_{0}_url".format(role) : 'http://{0}/dashboard/db/tomcat-dashboard-for-prometheus?orgId=1&var-instance={1}'.format(utils.grafana_url(), role_instance) + } + return node_info + +def main(): + tomcat = TomcatMetrics() + tomcat.tomcat_cluster_list() + +if __name__ == '__main__': + main() + diff --git a/metrics/utils.py b/metrics/utils.py new file mode 100644 index 0000000..f21ad57 --- /dev/null +++ b/metrics/utils.py @@ -0,0 +1,93 @@ +#!/usr/bin/python +#-*- coding:utf-8 -*- + +import re, os +import monitor_params +import logging +import requests + +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S') +import sys +logger = logging.getLogger(sys.path[0] + 'utils') + +def get_instances(ip, port): + ip_list = [] + instances = [] + if r',' in ip.strip(): + try: + list = re.split(r'[,\s]\s*', ip.strip()) + print list + except: + logging.error("Can't split ip: {0}. Check the ip {0} in monitor_params.py.".format(ip)) + sys.exit(1) + else: + ip_list = list + else: + ip_list.append(ip.strip()) + + for i in range(len(ip_list)): + url = ip_list[i] + ":" + port + instances.append(url) + + print instances + return instances + +def prometheus_url(): + instances = get_instances(monitor_params.prometheus_ip, monitor_params.prometheus_port) + success = 0 + for i in range(len(instances)): + url = 'http://{0}/api/v1/query'.format(instances[i]) + param = { + "query": 'prometheus_build_info{{instance="{0}"}}'.format(instances[i]) + } + logging.info("start GET %s?%s", url, param) + try: + response = requests.get(url, params=param) + response.raise_for_status() + except requests.exceptions.ConnectionError: + logging.error("GET %s?%s failed! Connection Error.", url, param) + continue + except requests.RequestException as e: + logging.error(e) + continue + else: + logging.info("GET /api/v1/query?%s ok! Response code is = %s", param, response.status_code) + success += 1 + return url + if not success: + logging.error("No prometheus url available, please check prometheus.") + sys.exit(1) + + +def grafana_url(): + prom_url = prometheus_url() + instances = get_instances(monitor_params.grafana_ip, monitor_params.grafana_port) + outside_instances = get_instances(monitor_params.grafana_outside_ip, monitor_params.grafana_port) + success = 0 + for i in range(len(instances)): + param = { + "query": 'grafana_info{{instance="{0}"}}'.format(instances[i]) + } + logging.info("start GET %s?%s", prom_url, param) + response = requests.get(prom_url, params=param) + if response.status_code != requests.codes.ok: + logging.error("GET %s?%s failed! The error code is: %s", prom_url, param, response.status_code) + continue + else: + logging.info("GET /api/v1/query?%s ok! Response code is = %s", param, response.status_code) + success += 1 + return outside_instances[i] + + if not success: + logging.error("No grafana url available, please check! ") + sys.exit(1) + + +def main(): + print prometheus_url() + print grafana_url() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/myapp/__init__.py b/myapp/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/myapp/admin.py b/myapp/admin.py new file mode 100644 index 0000000..8c38f3f --- /dev/null +++ b/myapp/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/myapp/apps.py b/myapp/apps.py new file mode 100644 index 0000000..74d6d13 --- /dev/null +++ b/myapp/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class MyappConfig(AppConfig): + name = 'myapp' diff --git a/myapp/models.py b/myapp/models.py new file mode 100644 index 0000000..5a60d5e --- /dev/null +++ b/myapp/models.py @@ -0,0 +1,42 @@ +#! /usr/bin/env python +# -*- coding: utf-8 + +from django.db import models +from django.contrib.auth.models import User +import json + + +class Instances(models.Model): + ''' + 创建一个 Instances 模型,与 Hosts 模型建立多对一的关系。 + 只有 IP 和 PORT 两个字段,其中 IP 为 Hosts 模型的外键。 + ''' + instance = models.CharField(max_length=25) + + class Meta: + ordering = ['instance'] + + def save(self, *args, **kwargs): + super(Instances, self).save(*args, **kwargs) + +class Hosts(models.Model): + ''' + 建立 Hosts 模型,该模型包括 url, hosts, + cpusage, memusage, diskusage 等字段, + 根据 url, hosts 等信息获取 Prometheus 的 metrics 数据。 + ''' + href = models.URLField() + hosts = models.ForeignKey(Instances, related_name='hosts', on_delete=models.CASCADE) + + cpusage = models.TextField() + memusage = models.TextField() + diskusage = models.TextField() + load = models.TextField() + + class Meta: + ordering = ['hosts'] + + def save(self, *args, **kwargs): + super(Hosts, self).save(*args, **kwargs) + + diff --git a/myapp/params.py b/myapp/params.py new file mode 100644 index 0000000..8ae5474 --- /dev/null +++ b/myapp/params.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python2 +#-*- coding: utf-8 -*- + +# define consul cluster ip and port, both set to be string. +# consul_ip can be set to several values, seperated by ",". +consul_ip = "172.16.1.10, 172.16.1.14, 172.16.1.41" +consul_port = "8500" + +rest_api_url = "http://10.10.6.214:19030" +prometheus_url = "http://10.10.6.214:9500" diff --git a/myapp/parse.py b/myapp/parse.py new file mode 100644 index 0000000..41dd5f5 --- /dev/null +++ b/myapp/parse.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python2 +#-*- coding: utf-8 -*- + +import requests +import json +import re +import logging +import time +from pprint import pprint +# from util import request_util, dic_to_str, cpu_cores +# from . import params +import params +import base64 + + +# LOG_FILE = 'D:\\Python\\Python_workspace\\log\\parse_json.log' +# logging.basicConfig(filename=LOG_FILE, level=logging.DEBUG) + +# filename=cfg.get('log', 'log_path'), +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S') +import sys +logger = logging.getLogger(sys.path[0] + 'parse') + +class ParseUtil(object): + + @staticmethod + def request_metrics(url, param): + ''' + 根据传入的参数,用 requests 库获取 Prometheus 的 metrics 数据 + :param url: 从该 url 抓取数据 + :param param: url + param 拼接成完整的 url,获取数据 + :return: 一组 json 数据 + ''' + data = [] + + logging.info("start GET %s?%s", url, param) + response = requests.get(url, params=param) + if response.status_code != requests.codes.ok: + logging.error("GET %s?%s failed! The error code is: %s", url, param, response.status_code) + return [] + logging.info("GET /api/v1/query?%s ok! Response code is = %s", param, response.status_code) + results = response.json() + logger.debug('The results of getting query are: %s', results) + data = results['data']['result'] + + return data + + + +class ScrapeMetrics(object): + def __init__(self, url, timestamp=time.time(), timeout='2m', **kwargs): + self._url = url + '/api/v1/query' + self._timestamp = timestamp + self._timeout = timeout + + def instances_info(self): + ''' + 去 consul 中获取 instance 的 ip 和 端口,写到一个 json 中, + :return: a list of strings, like [ 'ip:port', 'ip:port', ... ] + ''' + + ip_list = re.split(r'[,\s]\s*', params.consul_ip) + consul_port = params.consul_port + instances = [] + + for i in range(len(ip_list)): + url = 'http://{0}:{1}/v1/catalog/node/node_exporter'.format(ip_list[i], consul_port) + logging.info("start GET %s", url) + try: + response = requests.get(url) + response.raise_for_status() + except requests.RequestException as e: + logging.error(e) + continue + else: + result = response.json() + pprint(result) + for value in result['Services'].values(): + instance = value['Address'] + ':' + str(value['Port']) + instances.append(instance) + break + pprint(instances) + return instances + + def cpu_used_ratio(self, instance): + ''' + calculate the percentage of CPU used, by subtracting the idle usage from 100%: + 100 - (avg by (instance) (irate(node_cpu{job="node",mode="idle"}[5m])) * 100) + ''' + cpus_used_info = {} + param = { + "query": '100-(avg by (instance)(irate(node_cpu{{instance="{0}",mode="idle"}}[5m])) * 100)'.format(instance) + } + response = ParseUtil.request_metrics(self._url, param) + for i in range(len(response)): + cpus_used_info.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + pprint(cpus_used_info) + if cpus_used_info.has_key(instance): + return cpus_used_info[instance] + else: + return [] + + def mem_used_ratio(self, instance): + mem_used_info = {} + param = { + "query": '(node_memory_MemTotal{{instance="{0}"}} - node_memory_MemFree{{instance="{0}"}} - node_memory_Cached{{instance="{0}"}}) / (node_memory_MemTotal{{instance="{0}"}}) * 100'.format(instance) + } + response = ParseUtil.request_metrics(self._url, param) + for i in range(len(response)): + mem_used_info.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if mem_used_info.has_key(instance): + return mem_used_info[instance] + else: + return [] + # pprint(response) + + def disk_used_ratio(self, instance): + disk_used_info = {} + param = { + "query": '100 - ((node_filesystem_avail{{instance="{0}",mountpoint="/",fstype!="rootfs"}} * 100) / node_filesystem_size{{instance="{0}",mountpoint="/",fstype!="rootfs"}})'.format(instance) + } + response = ParseUtil.request_metrics(self._url, param) + for i in range(len(response)): + disk_used_info.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + if disk_used_info.has_key(instance): + return disk_used_info[instance] + else: + return [] + + + def host_detail_metrics(self, instance): + host_info = { + "href": "{0}/api/v1/hosts/{1}/".format(params.rest_api_url, instance), + "instance": instance, + "cpu_used_ratio": self.cpu_used_ratio(instance), + "disk_used_ratio": self.disk_used_ratio(instance), + "load": self.host_loads_metrics(instance), + "memory_used_ratio": self.mem_used_ratio(instance) + } + consul_kv_info = self.single_consul_metrics(instance) + single_host_info = dict(host_info, **consul_kv_info) + return single_host_info + + def hosts_list_metrics(self): + hosts_list_info = [] + instance = self.instances_info() + for i in range(len(instance)): + hosts_list_info.append(self.host_detail_metrics(instance[i])) + return hosts_list_info + + def host_loads_metrics(self, instance): + host_loads_info = {} + tmp = {} + param1 = { + "query": 'node_load1{{instance="{0}"}}'.format(instance) + } + response1 = ParseUtil.request_metrics(self._url, param1) + if len(response1): + tmp.setdefault("load1",response1[0]['value'][1]) + else: + tmp.setdefault("load1",[]) + + param5 = { + "query": 'node_load5{{instance="{0}"}}'.format(instance) + } + response5 = ParseUtil.request_metrics(self._url, param5) + if len(response5): + tmp.setdefault("load5",response5[0]['value'][1]) + else: + tmp.setdefault("load5",[]) + + param15 = { + "query": 'node_load15{{instance="{0}"}}'.format(instance) + } + response15 = ParseUtil.request_metrics(self._url, param15) + if len(response15): + tmp.setdefault("load15",response15[0]['value'][1]) + else: + tmp.setdefault("load15",[]) + + host_loads_info.setdefault(instance, tmp) + pprint(host_loads_info) + if host_loads_info.has_key(instance): + return host_loads_info[instance] + else: + return [] + + def scrape_consul_metrics(self): + ''' + http://consul_ip:8500/v1/kv/hosts?recurse + :return: + ''' + ip_list = re.split(r'[,\s]\s*', params.consul_ip) + consul_port = params.consul_port + consul_info = [] + + for i in range(len(ip_list)): + url = 'http://{0}:{1}/v1/kv/hosts?recurse'.format(ip_list[i], consul_port) + logging.info("start GET %s", url) + try: + response = requests.get(url) + response.raise_for_status() + except requests.RequestException as e: + logging.error(e) + continue + else: + result = response.json() + for i in range(len(result)): + consul_info.append(json.loads(base64.b64decode(result[i]['Value']).decode())) + break + logging.debug(consul_info) + return consul_info + + def single_consul_metrics(self, instance): + ip = instance.split(":")[0] + logging.debug('ip = %s', ip) + consul_metrics = {} + + consul_kv_metrics = self.scrape_consul_metrics() + for i in range(len(consul_kv_metrics)): + if consul_kv_metrics[i]['ip'] == ip: + consul_metrics = consul_kv_metrics[i] + logging.debug(consul_metrics) + return consul_metrics + + +def main(): + url = params.prometheus_url + + h = ScrapeMetrics(url, timestamp=time.time(), timeout='2m') + h.scrape_consul_metrics() + + +if __name__ == '__main__': + main() diff --git a/myapp/parse.py.bak20180110 b/myapp/parse.py.bak20180110 new file mode 100644 index 0000000..5f26563 --- /dev/null +++ b/myapp/parse.py.bak20180110 @@ -0,0 +1,214 @@ +#!/usr/bin/env python2 +#-*- coding: utf-8 -*- + +import requests +import json +import re +import logging +import time +from pprint import pprint +# from util import request_util, dic_to_str, cpu_cores +from . import params +import base64 + + +# LOG_FILE = 'D:\\Python\\Python_workspace\\log\\parse_json.log' +# logging.basicConfig(filename=LOG_FILE, level=logging.DEBUG) + +# filename=cfg.get('log', 'log_path'), +logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S') +import sys +logger = logging.getLogger(sys.path[0] + 'parse') + +class ParseUtil(object): + + @staticmethod + def request_metrics(url, param): + ''' + 根据传入的参数,用 requests 库获取 Prometheus 的 metrics 数据 + :param url: 从该 url 抓取数据 + :param param: url + param 拼接成完整的 url,获取数据 + :return: 一组 json 数据 + ''' + data = [] + + logging.info("start GET %s?%s", url, param) + response = requests.get(url, params=param) + if response.status_code != requests.codes.ok: + logging.error("GET %s?%s failed! The error code is: %s", url, param, response.status_code) + return [] + logging.info("GET /api/v1/query?%s ok! Response code is = %s", param, response.status_code) + results = response.json() + logger.debug('The results of getting query are: %s', results) + data = results['data']['result'] + + return data + + + +class ScrapeMetrics(object): + def __init__(self, url, timestamp=time.time(), timeout='2m', **kwargs): + self._url = url + '/api/v1/query' + self._timestamp = timestamp + self._timeout = timeout + + def instances_info(self): + ''' + 去 consul 中获取 instance 的 ip 和 端口,写到一个 json 中, + :return: a list of strings, like [ 'ip:port', 'ip:port', ... ] + ''' + + ip_list = re.split(r'[,\s]\s*', params.consul_ip) + consul_port = params.consul_port + instances = [] + + for i in range(len(ip_list)): + url = 'http://{0}:{1}/v1/catalog/node/node_exporter'.format(ip_list[i], consul_port) + logging.info("start GET %s", url) + try: + response = requests.get(url) + response.raise_for_status() + except requests.RequestException as e: + logging.error(e) + continue + else: + result = response.json() + pprint(result) + for value in result['Services'].values(): + instance = value['Address'] + ':' + str(value['Port']) + instances.append(instance) + break + pprint(instances) + return instances + + def cpu_used_ratio(self, instance): + ''' + calculate the percentage of CPU used, by subtracting the idle usage from 100%: + 100 - (avg by (instance) (irate(node_cpu{job="node",mode="idle"}[5m])) * 100) + ''' + cpus_used_info = {} + param = { + "query": '100-(avg by (instance)(irate(node_cpu{{instance="{0}",mode="idle"}}[5m])) * 100)'.format(instance) + } + response = ParseUtil.request_metrics(self._url, param) + for i in range(len(response)): + cpus_used_info.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + pprint(cpus_used_info) + return cpus_used_info + + def mem_used_ratio(self, instance): + mem_used_info = {} + param = { + "query": '(node_memory_MemTotal{{instance="{0}"}} - node_memory_MemFree{{instance="{0}"}} - node_memory_Cached{{instance="{0}"}}) / (node_memory_MemTotal{{instance="{0}"}}) * 100'.format(instance) + } + response = ParseUtil.request_metrics(self._url, param) + for i in range(len(response)): + mem_used_info.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + return mem_used_info + # pprint(response) + + def disk_used_ratio(self, instance): + disk_used_info = {} + param = { + "query": '100 - ((node_filesystem_avail{{instance="{0}",mountpoint="/",fstype!="rootfs"}} * 100) / node_filesystem_size{{instance="{0}",mountpoint="/",fstype!="rootfs"}})'.format(instance) + } + response = ParseUtil.request_metrics(self._url, param) + for i in range(len(response)): + disk_used_info.setdefault(response[i]['metric']['instance'], response[i]['value'][1]) + return disk_used_info + + + def host_detail_metrics(self, instance): + host_info = { + "href": "{0}/api/v1/hosts/{1}/".format(params.rest_api_url, instance), + "instance": instance, + "cpu_used_ratio": self.cpu_used_ratio(instance)[instance], + "disk_used_ratio": self.disk_used_ratio(instance)[instance], + "load": self.host_loads_metrics(instance)[instance], + "memory_used_ratio": self.mem_used_ratio(instance)[instance] + } + consul_kv_info = self.single_consul_metrics(instance) + single_host_info = dict(host_info, **consul_kv_info) + return single_host_info + + def hosts_list_metrics(self): + hosts_list_info = [] + instance = self.instances_info() + for i in range(len(instance)): + hosts_list_info.append(self.host_detail_metrics(instance[i])) + return hosts_list_info + + def host_loads_metrics(self, instance): + host_loads_info = {} + tmp = {} + param1 = { + "query": 'node_load1{{instance="{0}"}}'.format(instance) + } + response1 = ParseUtil.request_metrics(self._url, param1) + tmp.setdefault("load1",response1[0]['value'][1]) + + param5 = { + "query": 'node_load5{{instance="{0}"}}'.format(instance) + } + response5 = ParseUtil.request_metrics(self._url, param5) + tmp.setdefault("load5",response5[0]['value'][1]) + + param15 = { + "query": 'node_load15{{instance="{0}"}}'.format(instance) + } + response15 = ParseUtil.request_metrics(self._url, param15) + tmp.setdefault("load15",response15[0]['value'][1]) + host_loads_info.setdefault(instance, tmp) + pprint(host_loads_info) + return host_loads_info + + def scrape_consul_metrics(self): + ''' + http://consul_ip:8500/v1/kv/hosts?recurse + :return: + ''' + ip_list = re.split(r'[,\s]\s*', params.consul_ip) + consul_port = params.consul_port + consul_info = [] + + for i in range(len(ip_list)): + url = 'http://{0}:{1}/v1/kv/hosts?recurse'.format(ip_list[i], consul_port) + logging.info("start GET %s", url) + try: + response = requests.get(url) + response.raise_for_status() + except requests.RequestException as e: + logging.error(e) + continue + else: + result = response.json() + for i in range(len(result)): + consul_info.append(json.loads(base64.b64decode(result[i]['Value']).decode())) + break + logging.debug(consul_info) + return consul_info + + def single_consul_metrics(self, instance): + ip = instance.split(":")[0] + logging.debug('ip = %s', ip) + consul_metrics = {} + + consul_kv_metrics = self.scrape_consul_metrics() + for i in range(len(consul_kv_metrics)): + if consul_kv_metrics[i]['ip'] == ip: + consul_metrics = consul_kv_metrics[i] + logging.debug(consul_metrics) + return consul_metrics + + +def main(): + url = cfg.get('basic', 'prometheus_url') + + h = ScrapeMetrics(url, timestamp=time.time(), timeout='2m') + h.scrape_consul_metrics() + + +if __name__ == '__main__': + main() diff --git a/myapp/serializers.py b/myapp/serializers.py new file mode 100644 index 0000000..0eef354 --- /dev/null +++ b/myapp/serializers.py @@ -0,0 +1,13 @@ +from rest_framework import serializers +from .models import Hosts, Instances + + +class HostsSerializer(serializers.ModelSerializer): + class Meta: + model = Hosts + fields = '__all__' + +class InstancesSerializer(serializers.ModelSerializer): + class Meta: + model = Instances + fileds = '__all__' \ No newline at end of file diff --git a/myapp/test_metrics.py b/myapp/test_metrics.py new file mode 100644 index 0000000..7f747f4 --- /dev/null +++ b/myapp/test_metrics.py @@ -0,0 +1,204 @@ +#!/usr/bin/python +#-*- coding:utf-8 -*- + +''' +1、调整各个节点实际数据 +2、结合前端展示,分析进度 +3、有时间部署 grafana 剩余组件的 dashboard +''' +test=[ + { + "consul集群":{ + "集群状态":"健康", + "运行时长":157632021, + "节点个数":4.0 + } + }, + { + "consul节点1":{ + "节点状态":"健康", + "运行时长":157632021, + "cpu使用率":0.15, + "mem使用率":20 + } + }, + { + "consul节点2":{ + "节点状态":"健康", + "运行时长":157632021, + "cpu使用率":0.16, + "mem使用率":25 + } + }, + { + "consul节点3":{ + "节点状态":"异常", + "运行时长":0, + "cpu使用率":0, + "mem使用率":0 + } + }, + { + "consul节点4":{ + "节点状态":"健康", + "运行时长":152632021, + "cpu使用率":0.12, + "mem使用率":19 + } + }, + { + "nginx组件":{ + "组件状态":"健康", + "运行时长":1930284, + "节点个数":2.0 + } + }, + { + "nginx节点1":{ + "节点状态":"健康", + "运行时长":152632021, + "cpu使用率":0.18, + "mem使用率":11 + } + }, + { + "nginx节点2":{ + "节点状态":"健康", + "运行时长":112632021, + "cpu使用率":0.02, + "mem使用率":7.3 + } + }, + { + "tomcat组件":{ + "组件状态":"健康", + "运行时长":1930284, + "节点个数":3.0 + } + }, + { + "tomcat节点1":{ + "节点状态":"健康", + "运行时长":112632021, + "cpu使用率":0.02, + "mem使用率":7.3 + } + }, + { + "tomcat节点2":{ + "节点状态":"健康", + "运行时长":112632021, + "cpu使用率":0.02, + "mem使用率":7.3 + } + }, + { + "tomcat节点3":{ + "节点状态":"异常", + "运行时长":112632021, + "cpu使用率":0.02, + "mem使用率":7.3 + } + }, + { + "prometheus组件":{ + "服务状态":"健康", + "运行时长":12110235, + "节点个数":2.0 + } + }, + { + "prometheus节点1":{ + "节点状态":"健康", + "运行时长":112632021, + "cpu使用率":0.15, + "mem使用率":10 + } + }, + { + "prometheus节点2":{ + "节点状态":"异常", + "运行时长":112632021, + "cpu使用率":0.02, + "mem使用率":7.3 + } + }, + { + "grafana组件":{ + "服务状态":"健康", + "运行时长":12110235, + "节点个数":2.0 + } + }, + { + "grafana节点1":{ + "节点状态":"健康", + "运行时长":112632021, + "cpu使用率":0.15, + "mem使用率":10 + } + }, + { + "grafana节点2":{ + "节点状态":"异常", + "运行时长":112632021, + "cpu使用率":0.02, + "mem使用率":7.3 + } + }, + { + "mysql":{ + "节点状态":"健康", + "运行时长":112632021, + "cpu使用率":1.6, + "mem使用率":20 + } + }, + { + "knox":{ + "节点状态":"健康", + "运行时长":112632021, + "cpu使用率":1.6, + "mem使用率":20 + } + }, + { + "keycloak":{ + "节点状态":"健康", + "运行时长":112632021, + "cpu使用率":1.6, + "mem使用率":20 + } + }, + { + "ambari-server":{ + "节点状态":"健康", + "运行时长":112632021, + "cpu使用率":1.6, + "mem使用率":20 + } + }, + { + "ambari-agent组件":{ + "服务状态":"健康", + "运行时长":112632021, + "节点个数":2.0 + } + }, + { + "ambari-agent节点1":{ + "节点状态":"健康", + "运行时长":112632021, + "cpu使用率":0.15, + "mem使用率":10 + } + }, + { + "ambari-agent节点2":{ + "节点状态":"异常", + "运行时长":112632021, + "cpu使用率":0.02, + "mem使用率":7.3 + } + } +] \ No newline at end of file diff --git a/myapp/tests.py b/myapp/tests.py new file mode 100644 index 0000000..7ce503c --- /dev/null +++ b/myapp/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/myapp/urls.py b/myapp/urls.py new file mode 100644 index 0000000..2ea5c8a --- /dev/null +++ b/myapp/urls.py @@ -0,0 +1,12 @@ +#!/usr/bin/python +#-*- coding:utf-8 -*- + +from django.conf.urls import url +from . import views + +urlpatterns = [ + url(r'^api/v1/test/$', views.test), + url(r'^api/v1/monitor/$', views.module_list), + url(r'^api/v1/hosts/$', views.hosts_list), + url(r'^api/v1/hosts/(?P((?:(?:25[0-5]|2[0-4]\d|((1\d{2})|([1-9]?\d)))\.){3}(?:25[0-5]|2[0-4]\d|((1\d{2})|([1-9]?[1-9])))(\:\d{1,6})))/$', views.host_detail), +] diff --git a/myapp/views.py b/myapp/views.py new file mode 100644 index 0000000..a37256d --- /dev/null +++ b/myapp/views.py @@ -0,0 +1,62 @@ +#!/usr/bin/python +#-*- coding:utf-8 -*- + +from django.http import HttpResponse, JsonResponse +from django.views.decorators.csrf import csrf_exempt +from rest_framework.renderers import JSONRenderer +from rest_framework.parsers import JSONParser +from .models import Hosts, Instances +from .serializers import HostsSerializer, InstancesSerializer +from .parse import ParseUtil, ScrapeMetrics +from configparser import ConfigParser +import json +from rest_framework.decorators import api_view +from . import params + +url = params.prometheus_url +print('url=', url) +h = ScrapeMetrics(url) + +import sys +sys.path.append('..') +import metrics.monitor + +@api_view(['GET']) +def hosts_list(request): + """ + List all hosts. + """ + if request.method == 'GET': + return JsonResponse(h.hosts_list_metrics(), safe=False) + + +@api_view(['GET']) +def host_detail(request, pk): + """ + Retrieve a host by pk = instance. + """ + if request.method == 'GET': + try: + h.host_detail_metrics(pk) + except: + return JsonResponse('KeyError! not found key {0}'.format(pk),status=404, safe=False) + else: + return JsonResponse(h.host_detail_metrics(pk), safe=False) + +@api_view(['GET']) +def module_list(request): + """ + List all module metrics. + """ + if request.method == 'GET': + return JsonResponse(metrics.monitor.monitor_metrics(), safe=False) + +@api_view(['GET']) +def test(request): + """ + List all module metrics. + """ + import test_metrics + if request.method == 'GET': + print test_metrics + return JsonResponse(test_metrics.test, safe=False) diff --git a/rest/__init__.py b/rest/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rest/settings.py b/rest/settings.py new file mode 100644 index 0000000..82874ab --- /dev/null +++ b/rest/settings.py @@ -0,0 +1,129 @@ +""" +Django settings for rest project. + +Generated by 'django-admin startproject' using Django 1.11.7. + +For more information on this file, see +https://docs.djangoproject.com/en/1.11/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/1.11/ref/settings/ +""" + +import os + +# Build paths inside the project like this: os.path.join(BASE_DIR, ...) +BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + +# Quick-start development settings - unsuitable for production +# See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = 'i406f)l-jvv(tock&o^)sg$ul44*&_i@q33*5f5=bv*@-fws+5' + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = False + +# ALLOWED_HOSTS = [] +ALLOWED_HOSTS = "*" + +# Application definition + +INSTALLED_APPS = [ + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', + 'rest_framework', + 'myapp.apps.MyappConfig', +] + +REST_FRAMEWORK = { + 'DEFAULT_FILTER_BACKENDS': ('rest_framework.filters.BaseFilterBackend',), + 'DEFAULT_PERMISSION_CLASSES': ( + 'rest_framework.permissions.AllowAny', + ), +} + +MIDDLEWARE = [ + 'django.middleware.security.SecurityMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', +] + +ROOT_URLCONF = 'rest.urls' + +TEMPLATES = [ + { + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [], + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + }, + }, +] + +WSGI_APPLICATION = 'rest.wsgi.application' + + +# Database +# https://docs.djangoproject.com/en/1.11/ref/settings/#databases + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), + } +} + + +# Password validation +# https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', + }, +] + + +# Internationalization +# https://docs.djangoproject.com/en/1.11/topics/i18n/ + +LANGUAGE_CODE = 'en-us' + +TIME_ZONE = 'UTC' + +USE_I18N = True + +USE_L10N = True + +USE_TZ = True + + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/1.11/howto/static-files/ + +STATIC_URL = '/static/' diff --git a/rest/urls.py b/rest/urls.py new file mode 100644 index 0000000..64aa8ef --- /dev/null +++ b/rest/urls.py @@ -0,0 +1,22 @@ +"""rest URL Configuration + +The `urlpatterns` list routes URLs to views. For more information please see: + https://docs.djangoproject.com/en/1.11/topics/http/urls/ +Examples: +Function views + 1. Add an import: from my_app import views + 2. Add a URL to urlpatterns: url(r'^$', views.home, name='home') +Class-based views + 1. Add an import: from other_app.views import Home + 2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home') +Including another URLconf + 1. Import the include() function: from django.conf.urls import url, include + 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls')) +""" +from django.conf.urls import url, include +from django.contrib import admin + +urlpatterns = [ + url(r'^', include('myapp.urls')), + url(r'^admin/', admin.site.urls), +] diff --git a/rest/wsgi.py b/rest/wsgi.py new file mode 100644 index 0000000..cf8b5ee --- /dev/null +++ b/rest/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for rest project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/1.11/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "rest.settings") + +application = get_wsgi_application()