Skip to content

Commit

Permalink
load configurations from Consul cluster, not from local files.
Browse files Browse the repository at this point in the history
  • Loading branch information
cauwulixuan committed Mar 14, 2018
1 parent 430c31c commit d1ef410
Show file tree
Hide file tree
Showing 17 changed files with 376 additions and 341 deletions.
Binary file removed db.sqlite3
Binary file not shown.
130 changes: 51 additions & 79 deletions metrics/common_metrics.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
#!/usr/bin/python
#-*- coding:utf-8 -*-
import os, sys
import sys
import re

import requests
import logging
import json

import monitor_params
from time import time
import utils
sys.path.append("..")
import myapp.params as params
from myapp.parse import ParseUtil
from time import time

Expand All @@ -21,46 +17,29 @@
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
datefmt='%a, %d %b %Y %H:%M:%S')
import sys

logger = logging.getLogger(sys.path[0] + 'common_metrics')

class CommonMetrics(object):

def __init__(self, name='', **kwargs):
self._name = name


def ip_list(self, ip):
'''
return common components ip list
'''
ip_list = []
try:
list = re.split(r'[,\s]\s*', ip.strip())
except:
logging.error("Can't split ip {0}. Check the ip in monitor_params.py.".format(ip))
sys.exit(1)
else:
ip_list = list
return ip_list

def process_instance(self, ip):
'''
@return list of common instances.
'''
process_instance = utils.get_instances(ip, monitor_params.process_exporter_port)
return process_instance
def __init__(self, process_exporter_name, service_name):
self._process_name = process_exporter_name
self._service_name = service_name
self._instances = utils.instance_info(service_name, process_exporter_name)
self._service_instance = self._instances['service_instance']
self._process_instance = self._instances['process_instance']
self._prom_url = utils.prometheus_url(self._process_name)
self._grafana_url = utils.grafana_floating_url(self._process_name)

def cluster_state(self, ip):
def cluster_state(self):
'''
@return cluster state and the numbers of healthy nodes.
'''
process_instances = self.process_instance(ip)
state = 0.0
success_count = 0.0

for i in range(len(process_instances)):
common_up = self.node_state(process_instances[i])
for i in range(len(self._process_instance)):
common_up = self.node_state(self._process_instance[i])
if common_up:
success_count +=1
else:
Expand All @@ -75,17 +54,16 @@ def node_state(self, process_instance):
@return a float value 1 or 0, indicating the node state up or down.
'''
state = {}
url = utils.prometheus_url()
param = {
"query": '{0}_process_up{{instance="{1}"}}'.format(self._name, process_instance)
"query": '{0}_process_up{{instance="{1}"}}'.format(self._service_name, process_instance)
}
response = ParseUtil.request_metrics(url, param)
response = ParseUtil.request_metrics(self._prom_url, param)
for i in range(len(response)):
state.setdefault(response[i]['metric']['instance'], response[i]['value'][1])
if state.has_key(process_instance):
return float(state[process_instance])
else:
logging.error("No instance in the {0} cluster, node {1} down.".format(self._name, process_instance))
logging.error("No instance in the {0} cluster, node {1} down.".format(self._service_name, process_instance))
return 0.0


Expand All @@ -94,106 +72,100 @@ def cpu_usage(self, process_instance):
@return components cpu usage.
'''
cpu_usage = {}
url = utils.prometheus_url()
param = {
"query": '{0}_cpu_percentage{{instance="{1}"}}'.format(self._name, process_instance)
"query": '{0}_cpu_percentage{{instance="{1}"}}'.format(self._service_name, process_instance)
}
response = ParseUtil.request_metrics(url, param)
response = ParseUtil.request_metrics(self._prom_url, param)
for i in range(len(response)):
cpu_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1])
if cpu_usage.has_key(process_instance):
return float(cpu_usage[process_instance])
else:
logging.error("No instance in the {0} cluster, get {1} cpu usage failed.".format(self._name, process_instance))
logging.error("No instance in the {0} cluster, get {1} cpu usage failed.".format(self._service_name, process_instance))
return None

def uptime(self, process_instance):
'''
@return a float value of create time.
'''
uptime = {}
url = utils.prometheus_url()
param = {
"query": '{0}_running_time_seconds_total{{instance="{1}"}}'.format(self._name, process_instance)
"query": '{0}_running_time_seconds_total{{instance="{1}"}}'.format(self._service_name, process_instance)
}
response = ParseUtil.request_metrics(url, param)
response = ParseUtil.request_metrics(self._prom_url, param)
for i in range(len(response)):
uptime.setdefault(response[i]['metric']['instance'], response[i]['value'][1])
if uptime.has_key(process_instance):
return float(uptime[process_instance])
else:
logging.error("No instance in the {0} cluster, get {1} uptime failed.".format(self._name, process_instance))
logging.error("No instance in the {0} cluster, get {1} uptime failed.".format(self._service_name, process_instance))
return None

def mem_usage(self, process_instance):
'''
@return components memory usage.
'''
mem_usage = {}
url = utils.prometheus_url()
param = {
"query": 'sum by (instance)({0}_memory_usage_bytes_total{{instance="{1}", mode=~"rss|vms|shared"}})'.format(self._name, process_instance)
"query": 'sum by (instance)({0}_memory_usage_bytes_total{{instance="{1}", mode=~"rss|vms|shared"}})'.format(self._service_name, process_instance)
}
response = ParseUtil.request_metrics(url, param)
response = ParseUtil.request_metrics(self._prom_url, param)
for i in range(len(response)):
mem_usage.setdefault(response[i]['metric']['instance'], response[i]['value'][1])
if mem_usage.has_key(process_instance):
return float(mem_usage[process_instance])
else:
logging.error("No instance in the {0} cluster, get {1} memory usage failed.".format(self._name, process_instance))
logging.error("No instance in the {0} cluster, get {1} memory usage failed.".format(self._service_name, process_instance))
return None


def cluster_list(self, ip):
process_instances = self.process_instance(ip)
def cluster_list(self):
uptime = time()
for i in range(len(process_instances)):
state = self.node_state(process_instances[i])
for i in range(len(self._process_instance)):
state = self.node_state(self._process_instance[i])
if state:
uptime = self.uptime(process_instances[i])
uptime = self.uptime(self._process_instance[i])
break
else:
continue

node_info = []
for i in range(len(process_instances)):
node_info.append(self.node_detail(process_instances[i]))
for i in range(len(self._process_instance)):
node_info.append(self.node_detail(self._process_instance[i]))

cluster_info = {
"{0}_cluster_state".format(self._name) : self.cluster_state(ip)[0],
"{0}_total_nodes".format(self._name) : float(len(self.ip_list(ip))),
"{0}_healthy_nodes".format(self._name) : self.cluster_state(ip)[1],
"{0}_uptime".format(self._name) : time() - uptime,
"{0}_nodes_info".format(self._name) : node_info
"{0}_cluster_state".format(self._service_name) : self.cluster_state()[0],
"{0}_total_nodes".format(self._service_name) : float(len(self._process_instance)),
"{0}_healthy_nodes".format(self._service_name) : self.cluster_state()[1],
"{0}_uptime".format(self._service_name) : time() - uptime,
"{0}_nodes_info".format(self._service_name) : node_info
}
return cluster_info

def node_detail(self, process_instance):
url_name = re.sub('([a-z0-9])_([a-z0-9])', r'\1-\2', self._name).lower()
board_name = re.sub('([a-z0-9])_([a-z0-9])', r'\1-\2', self._service_name).lower()
if not self.node_state(process_instance):
node_info = {
"{0}_node_state".format(self._name) : 0.0,
"{0}_uptime".format(self._name) : 0.0,
"{0}_cpu_usage".format(self._name) : 0.0,
"{0}_mem_usage".format(self._name) : 0.0,
"{0}_url".format(self._name) : None
"{0}_node_state".format(self._service_name) : 0.0,
"{0}_uptime".format(self._service_name) : 0.0,
"{0}_cpu_usage".format(self._service_name) : 0.0,
"{0}_mem_usage".format(self._service_name) : 0.0,
"{0}_self._prom_url".format(self._service_name) : None
}
else:
node_info = {
"{0}_node_state".format(self._name) : self.node_state(process_instance),
"{0}_uptime".format(self._name) : time() - self.uptime(process_instance),
"{0}_cpu_usage".format(self._name) : self.cpu_usage(process_instance),
"{0}_mem_usage".format(self._name) : self.mem_usage(process_instance),
"{0}_url".format(self._name) : 'http://{0}/dashboard/db/{1}-dashboard-for-prometheus?orgId=1&var-instance={2}'.format(utils.grafana_url(), url_name, process_instance)
"{0}_node_state".format(self._service_name) : self.node_state(process_instance),
"{0}_uptime".format(self._service_name) : time() - self.uptime(process_instance),
"{0}_cpu_usage".format(self._service_name) : self.cpu_usage(process_instance),
"{0}_mem_usage".format(self._service_name) : self.mem_usage(process_instance),
"{0}_url".format(self._service_name) : 'http://{0}/dashboard/db/{1}-dashboard-for-prometheus?orgId=1&var-instance={2}&kiosk'.format(self._grafana_url, board_name, process_instance)
}
return node_info

def main():
prom_name = "prometheus"
ip = monitor_params.prometheus_ip
common = CommonMetrics(prom_name)
from pprint import pprint
pprint(common.cluster_list(ip))
service_name = "prometheus"
process_name = "process_status_exporter"
common = CommonMetrics(process_name, service_name)

if __name__ == '__main__':
main()
68 changes: 49 additions & 19 deletions metrics/consul_metrics.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,11 @@
#!/usr/bin/python
#-*- coding:utf-8 -*-
import os, sys
import re
import time
import requests
import argparse
import sys
import logging
import json

import monitor_params
import utils
# import father directory, append father directory to the sys.path
sys.path.append("..")
from myapp.parse import ParseUtil
import myapp.params as params
from common_metrics import CommonMetrics
from time import time

Expand All @@ -23,22 +16,36 @@
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
datefmt='%a, %d %b %Y %H:%M:%S')
import sys

logger = logging.getLogger(sys.path[0] + 'consul_metrics')


class ConsulMetrics(CommonMetrics):
def __init__(self, name = "consul"):
self._name = name

def cluster_state(self, ip):

def __init__(self, process_exporter_name):
CommonMetrics.__init__(self, process_exporter_name, "consul")

def consul_process_instance(self):
'''
@return a list of service ip according the service_name given in the param.
'''
process_instances = []
ip_list = utils.consul_ip_list()
port = utils.get_process_port(self._process_name)
for i in range(len(ip_list)):
process_instances.append("{0}:{1}".format(ip_list[i], port))
return process_instances


def cluster_state(self):
'''
Once 1 leader down, more than half peers left in the cluster, the cluster can elected a new leader.
So the cluster can work well.
'''
process_instances = self.consul_process_instance()
state = 0.0
members_count = float(len(process_instances))
success_count = 0.0
members_count = len(self.ip_list(ip))
process_instances = self.process_instance(ip)

for i in range(len(process_instances)):
consul_up = self.node_state(process_instances[i])
Expand All @@ -53,10 +60,33 @@ def cluster_state(self, ip):
logging.info("success count is: {0}, and state is: {1}".format(success_count, state))
return [state,success_count]

def cluster_list(self):
process_instances = self.consul_process_instance()
uptime = time()
for i in range(len(process_instances)):
state = self.node_state(process_instances[i])
if state:
uptime = self.uptime(process_instances[i])
break
else:
continue

node_info = []
for i in range(len(process_instances)):
node_info.append(self.node_detail(process_instances[i]))

cluster_info = {
"{0}_cluster_state".format(self._service_name) : self.cluster_state()[0],
"{0}_total_nodes".format(self._service_name) : float(len(process_instances)),
"{0}_healthy_nodes".format(self._service_name) : self.cluster_state()[1],
"{0}_uptime".format(self._service_name) : time() - uptime,
"{0}_nodes_info".format(self._service_name) : node_info
}
return cluster_info

def main():
consul = ConsulMetrics()
from pprint import pprint
pprint(consul.cluster_list(monitor_params.consul_ip))
process_name = "process_status_exporter"
consul = ConsulMetrics(process_name)

if __name__ == '__main__':
main()
Loading

0 comments on commit d1ef410

Please sign in to comment.