forked from grihey/atsd-api-python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
nginx_access_log_tail.py
125 lines (101 loc) · 4.61 KB
/
nginx_access_log_tail.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from __future__ import print_function
from atsd_client import connect, connect_url
from atsd_client.models import Message, Severity
from atsd_client.services import MessageService
import socket
from datetime import datetime
# Install sh module separately
from sh import tail
import csv
# nginx must be configured to write logs in CSV format:
#
# geoip_country /etc/nginx/geoip/GeoIP.dat; # country IP database
# geoip_city /etc/nginx/geoip/GeoLiteCity.dat; # city IP database
# geoip_org /etc/nginx/geoip/GeoIPASNum.dat; # org/network database
#
# log_format custom '"$remote_addr","$remote_user","$time_iso8601","$host","$request_method","$server_protocol",
# "$scheme","$request_uri",$status,$body_bytes_sent,"$http_referer","$http_user_agent","$http_x_forwarded_for",
# "$geoip_country_code","$geoip_region",$geoip_region_name","$geoip_city",$geoip_latitude,$geoip_longitude,
# "$geoip_org"'; access_log /var/log/nginx/access.log custom;
# fieldnames must match the custom log_format
fieldnames = ['remote_addr', 'remote_user', 'time_iso8601', 'host', 'request_method', 'server_protocol', 'scheme',
'request_uri', 'status', 'body_bytes_sent', 'http_referer', 'http_user_agent', 'http_x_forwarded_for',
'geoip_country_code', 'geoip_region', 'geoip_region_name', 'geoip_city', 'geoip_latitude',
'geoip_longitude', 'geoip_org']
# string lists for ignoring requests from bots, requests to static resources. basic substring match
ignore_user_agents = ['axibase', 'feedly', 'scollector', 'rome client', 'bot', 'crawl', 'qwant', 'slurp', 'spider',
'webmaster', 'faq', 'apache', 'scan']
ignore_uris = ['.png', '.js', '.css', '.woff', '.jpg', '.gif', '.jpeg', '.ico', '.svg', 'robots.txt', '/feed/', '.ttf',
'.eot', 'wp-content', '.lzma', '.xz', '.bz2']
ignore_ip = ['192.0.2.1', '127.0.0.1']
# Connect to ATSD server
# connection = connect('/path/to/connection.properties')
connection = connect_url('https://atsd_hostname:8443', 'username', 'password')
# Initialize services
svc = MessageService(connection)
def lookup(addr):
try:
return socket.gethostbyaddr(addr)[0]
except socket.herror:
return None
def has_containing_element(lst, str):
for item in lst:
if item in str:
return True
return False
# Current user must read permissions to access.log file, including after rollover
# sudo chmod 0644 /var/log/nginx/access.log
# sudo nano /etc/logrotate.d/nginx
# create 0644 www-data adm
for line in tail("-F", "/var/log/nginx/access.log", _iter=True):
reader = csv.DictReader([line.strip()], fieldnames=fieldnames)
for row in reader:
# print(row)
# Ignore HEAD requests
if row['request_method'] == 'HEAD':
continue
# Ignore HTTP>HTTPS requests
if row['status'] == '301' and row['scheme'] == 'http':
continue
# Ignore bots
user_agent = row['http_user_agent'].lower()
if has_containing_element(ignore_user_agents, user_agent):
continue
# Ignore static file requests
ruri = row['request_uri'].lower()
if has_containing_element(ignore_uris, ruri):
continue
# Ignore 404 requests for apt language translation files
if row['status'] == '404' and '/repository/deb/./' in ruri:
continue
# Ignore requests from ignored IP addresses
radr = row['remote_addr']
if has_containing_element(ignore_ip, radr):
continue
dns = lookup(radr)
if dns is not None and 'localhost' not in dns:
row['remote_host'] = dns
# Handle proxy-forwarded requests
xadr = row['http_x_forwarded_for']
if xadr is not None and xadr != '-':
if has_containing_element(ignore_ip, xadr):
continue
xdns = lookup(xadr)
if xdns is not None and 'localhost' not in xdns:
row['remote_x_host'] = xdns
sev = Severity.NORMAL
# Remove original timestamp in the access log
del row['time_iso8601']
# Remove tags with default (unknown) value -
tags = {k: v for k, v in row.items() if v != '-'}
# Replace example.org with actual DNS name
msg = Message('web', 'access.log', 'example.org', datetime.now(), sev, tags, '')
try:
res = svc.insert(msg)
print(res, msg.date, msg.tags)
except IOError as ioe:
print("IOError ({0}):{1}".format(ioe.errno, ioe.strerror))
print(msg.date, msg.tags)
except:
print('Other error')
print(msg.date, msg.tags)