From 0d419d4042db96c112a0f8969132f7ae3d53f61d Mon Sep 17 00:00:00 2001 From: Tommaso Bailetti Date: Fri, 29 Nov 2024 14:34:23 +0100 Subject: [PATCH] refactor(ns-api): refactored dpi report stats reporting --- packages/ns-api/files/ns.dpireport | 224 +++++++++++++++++++---------- 1 file changed, 145 insertions(+), 79 deletions(-) diff --git a/packages/ns-api/files/ns.dpireport b/packages/ns-api/files/ns.dpireport index 8c54bdec5..e2ef7a1d0 100755 --- a/packages/ns-api/files/ns.dpireport +++ b/packages/ns-api/files/ns.dpireport @@ -11,7 +11,9 @@ import sys import json import glob import socket -from datetime import date +import datetime + +BASE_PATH = '/var/run/dpireport' def reverse_dns(ip): try: @@ -26,94 +28,158 @@ def list_days(): ret.append((tmp[0], tmp[1], tmp[2])) return {"days": ret} -def summary(year, month, day, client="*", limit=10): - hours = dict() - for i in range(24): - hours[f'{i:02}'] = 0 - ret = {"total": 0, "clients": {}, "hours": hours, "names": {}, "protocol": {}, "host": {}, "application": {}} - # prepenad leading zero, if needed - month = f'{int(month):02}' - day = f'{int(day):02}' - for client_f in glob.glob(f'/var/run/dpireport/{year}/{month}/{day}/{client}'): - client = client_f.removeprefix(f'/var/run/dpireport/{year}/{month}/{day}/') - cdetails = _details(year, month, day, client) - ret["clients"][client] = cdetails["total"] - ret["names"][client] = cdetails["name"] - for hour in cdetails["hours"]: - try: - ret["hours"][hour] += cdetails["hours"][hour]["total"] - except: - pass - ret["total"] += cdetails["total"] - - for key in ("protocol", "host", "application"): - for el in cdetails[key]: - if el not in ret[key]: - ret[key][el] = 0 - ret[key][el] += cdetails[key][el] - try: - tmp_c = sorted(ret['clients'].items(), key=lambda x:x[1], reverse=True) - except: - tmp_c = list() - try: - tmp_h = sorted(ret['hours'].items()) - except: - tmp_h = list() - - ret['clients'] = tmp_c - ret['hours'] = tmp_h - - for key in ("protocol", "host", "application"): - try: - tmp = sorted(ret[key].items(), key=lambda x:x[1], reverse=True) - except: - tmp = list() - ret[key] = tmp[0:10] - - return ret - -def _details(year, month, day, client): - hours = dict() - if not client: - return ret - for i in range(24): - hours[f'{i:02}'] = dict() - ret = {"hours": hours, "total": 0, "name": reverse_dns(client), "protocol" : {}, "host": {}, "application": {}} - ddir = f'/var/run/dpireport/{year}/{month}/{day}/{client}/' - for hour_f in glob.glob(f'{ddir}??.json'): - hour = hour_f.removesuffix(".json").removeprefix(ddir) - with open(hour_f, 'r') as fp: - ret["hours"][hour] = json.load(fp) - for key in ("protocol", "host", "application"): - for el in ret["hours"][hour][key]: - if el not in ret[key]: - ret[key][el] = 0 - ret[key][el] += ret["hours"][hour][key][el] - ret["total"] += ret["hours"][hour]["total"] - return ret + +def _load_data(year, month, day): + search_path = f'{BASE_PATH}/{year}/{month}/{day}/' + clients = dict() + for client_file in glob.glob(f'{search_path}/*'): + client_name = client_file.removeprefix(search_path) + client_data = dict() + client_hourly = glob.glob(f'{client_file}/*.json') + for data_file in client_hourly: + time = data_file.removeprefix(f'{client_file}/').removesuffix('.json') + client_data[time] = _extract_data(data_file) + clients[client_name] = client_data + + return clients + + +def _extract_data(dpi_file: str): + with open(dpi_file, 'r') as file: + json_file = json.load(file) + data = { + "total": json_file["total"], + "protocol": json_file["protocol"], + "application": json_file["application"], + "host": json_file["host"] + } + return data + + +def summary(year=None, month=None, day=None, narrow_client=None, + narrow_application=None, limit=20): + if year is None: + year = datetime.date.year + if month is None: + month = datetime.date.month + if day is None: + day = datetime.date.day + data = _load_data(year, month, day) + + total_traffic = 0 + raw_hourly_traffic = dict[str, int]() + raw_applications = dict[str, int]() + raw_clients = list[dict]() + raw_remote_hosts = dict[str, int]() + raw_protocols = dict[str, int]() + + for client in data: + if narrow_client is not None and narrow_client != client: + continue + resolved_client = reverse_dns(client) + raw_client_total_traffic = 0 + + for time in data[client]: + for application in data[client][time]['application']: + # application + if narrow_application is not None and application != narrow_application: + continue + if application not in raw_applications: + raw_applications[application] = 0 + raw_applications[application] += data[client][time]['application'][application] + # total traffic + total_traffic += data[client][time]['application'][application] + # hourly traffic + if time not in raw_hourly_traffic: + raw_hourly_traffic[time] = 0 + raw_hourly_traffic[time] += data[client][time]['application'][application] + # client total traffic + raw_client_total_traffic += data[client][time]['application'][application] + + if narrow_application is None: + # remote hosts + for host in data[client][time]['host']: + if host not in raw_remote_hosts: + raw_remote_hosts[host] = 0 + raw_remote_hosts[host] += data[client][time]['host'][host] + # protocols + for protocol in data[client][time]['protocol']: + if protocol not in raw_protocols: + raw_protocols[protocol] = 0 + raw_protocols[protocol] += data[client][time]['protocol'][protocol] + + # append client + raw_clients.append({ + 'id': client, + 'label': resolved_client, + 'traffic': raw_client_total_traffic + }) + + raw_clients.sort(key=lambda x: x['traffic'], reverse=True) + final_clients = raw_clients[:limit] + + final_applications = list() + for item in raw_applications: + label = item + if item == 'unknown': + label = 'Unknown' + else: + label = label.removeprefix('netify.').capitalize() + final_applications.append({ + 'id': item, + 'label': label, + 'traffic': raw_applications[item] + }) + final_applications.sort(key=lambda x: x['traffic'], reverse=True) + final_applications = final_applications[:limit] + + response = { + 'total_traffic': total_traffic, + 'hourly_traffic': [{'id': time, 'traffic': raw_hourly_traffic[time]} for time in raw_hourly_traffic], + 'applications': final_applications, + 'clients': final_clients, + } + + if narrow_application is None: + # remote hosts + final_remote_hosts = list() + for item in raw_remote_hosts: + final_remote_hosts.append({ + 'id': item, + 'traffic': raw_remote_hosts[item] + }) + final_remote_hosts.sort(key=lambda x: x['traffic'], reverse=True) + final_remote_hosts = final_remote_hosts[:limit] + response['remote_hosts'] = final_remote_hosts + + # protocols + final_protocols = list() + for item in raw_protocols: + final_protocols.append({ + 'id': item, + 'label': item.upper(), + 'traffic': raw_protocols[item] + }) + final_protocols.sort(key=lambda x: x['traffic'], reverse=True) + final_protocols = final_protocols[:limit] + response['protocols'] = final_protocols + + return response cmd = sys.argv[1] if cmd == 'list': print(json.dumps({ - "summary": {"year": "2023", "month": "06", "day": "02", "limit": 10}, - "summary-by-client": {"year": "2023", "month": "06", "day": "02", "client": "192.168.1.1", "limit": 10}, - "details": {"year": "2023", "month": "06", "day": "16", "client": "192.168.100.22"}, + "summary": {"year": "2023", "month": "06", "day": "16", "client": "192.168.100.22", + "application": "netify.ssh"}, "days":{} - })) + })) else: action = sys.argv[2] if action == "days": print(json.dumps(list_days())) else: args = json.loads(sys.stdin.read()) - year = args.get('year', f'{date.today().year:02}') - month = args.get('month', f'{date.today().month:02}') - day = args.get('day', f'{date.today().day:02}') if action == "summary": - limit = args.get('limit', 10) - print(json.dumps(summary(year, month, day, limit=limit))) - elif action == "summary-by-client": - client = args.get('client', '*') - limit = args.get('limit', 10) - print(json.dumps(summary(year, month, day, client, limit=limit))) + print(json.dumps(summary(args.get('year'), args.get('month'), args.get('day'), args.get('client'), + args.get('application'), args.get('limit', 20))))