18
18
from colors import color
19
19
20
20
from lookup import PROTOCOLS
21
- from dbutils import migrate_if_needed , get_db_cursor , DB_PREFIX , S_PER_PARTITION
21
+ from dbutils import migrate_if_needed , get_db_cursor , DB_PREFIX
22
22
from lookup import DIRECTION_INGRESS
23
23
24
24
41
41
# 11-byte signature (constructed in this way to detect possible mangled bytes), flags, header extension
42
42
# https://www.postgresql.org/docs/9.0/sql-copy.html#AEN59377
43
43
PG_COPYFROM_INIT = struct .pack ('!11sII' , b'PGCOPY\n \377 \r \n \0 ' , 0 , 0 )
44
+ # "To determine the appropriate binary format for the actual tuple data you should consult the PostgreSQL
45
+ # source, in particular the *send and *recv functions for each column's data type (typically these functions
46
+ # are found in the src/backend/utils/adt/ directory of the source distribution)."
44
47
# 4-byte INETv4/v6 prefix: family, netmask, is_cidr, n bytes
45
48
# https://doxygen.postgresql.org/network_8c_source.html#l00193
46
49
IPV4_ADDRESS_PREFIX = struct .pack ('!BBBB' , socket .AF_INET , 32 , 0 , 4 )
47
50
# Gotcha: IPv6 address family in Postgres is *not* socket.AF_INET6 (10),
48
51
# instead it is defined as socket.AF_INET + 1 (2 + 1 == 3)
49
52
# https://doxygen.postgresql.org/utils_2inet_8h_source.html#l00040
50
53
IPV6_ADDRESS_PREFIX = struct .pack ('!BBBB' , socket .AF_INET + 1 , 128 , 0 , 16 )
54
+ # Timestamp is encoded as signed number of microseconds from PG epoch
55
+ PG_EPOCH_TIMESTAMP = 946684800 # 2020-01-01T00:00:00Z
51
56
52
57
53
58
def _pgwriter_init ():
@@ -57,9 +62,9 @@ def _pgwriter_init():
57
62
58
63
59
64
def _pgwriter_write (pgwriter , ts , client_ip , IN_BYTES , PROTOCOL , DIRECTION , L4_DST_PORT , L4_SRC_PORT , INPUT_SNMP , OUTPUT_SNMP , address_family , IPVx_DST_ADDR , IPVx_SRC_ADDR ):
60
- buf = struct .pack ('!HiIi4s4siQiHiHiIiIiHiH ' ,
65
+ buf = struct .pack ('!Hiqi4s4siQiHiHiIiIiHiH ' ,
61
66
11 , # number of columns
62
- 4 , int (ts ), # integer - beware of Y2038 problem! :)
67
+ 8 , int (1000000 * ( ts - PG_EPOCH_TIMESTAMP )), # https://doxygen.postgresql.org/backend_2utils_2adt_2timestamp_8c_source.html#l00228
63
68
8 , IPV4_ADDRESS_PREFIX , socket .inet_aton (client_ip ), # 4 bytes prefix + 4 bytes IP
64
69
8 , IN_BYTES , # bigint
65
70
2 , PROTOCOL ,
@@ -86,7 +91,7 @@ def _pgwriter_finish(pgwriter):
86
91
with get_db_cursor () as c :
87
92
pgwriter .write (struct .pack ('!h' , - 1 ))
88
93
pgwriter .seek (0 )
89
- c .copy_expert (f"COPY { DB_PREFIX } flows FROM STDIN WITH BINARY" , pgwriter )
94
+ c .copy_expert (f"COPY { DB_PREFIX } flows2 FROM STDIN WITH BINARY" , pgwriter )
90
95
91
96
92
97
def process_named_pipe (named_pipe_filename ):
@@ -98,7 +103,6 @@ def process_named_pipe(named_pipe_filename):
98
103
99
104
templates = {}
100
105
last_record_seqs = {}
101
- last_partition_no = None
102
106
buffer = [] # we merge together writes to DB
103
107
known_exporters = set ()
104
108
MAX_BUFFER_SIZE = 5
@@ -122,14 +126,6 @@ def process_named_pipe(named_pipe_filename):
122
126
known_exporters .add (client_ip )
123
127
log .warning (f"[{ client_ip } ] New exporter!" )
124
128
125
- # sequence number of the (24h) day from UNIX epoch helps us determine the
126
- # DB partition we are working with:
127
- partition_no = int (ts // S_PER_PARTITION )
128
- if partition_no != last_partition_no :
129
- write_buffer (buffer , last_partition_no )
130
- ensure_flow_table_partition_exists (partition_no )
131
- last_partition_no = partition_no
132
-
133
129
try :
134
130
export = parse_packet (data , templates )
135
131
log .debug (f"[{ client_ip } ] Received record [{ export .header .sequence } ]: { datetime .utcfromtimestamp (ts )} " )
@@ -145,7 +141,7 @@ def process_named_pipe(named_pipe_filename):
145
141
# append the record to a buffer and write to DB when buffer is full enough:
146
142
buffer .append ((ts , client_ip , export ,))
147
143
if len (buffer ) > MAX_BUFFER_SIZE :
148
- write_buffer (buffer , partition_no )
144
+ write_buffer (buffer )
149
145
buffer = []
150
146
except UnknownNetFlowVersion :
151
147
log .warning ("Unknown NetFlow version" )
@@ -158,26 +154,12 @@ def process_named_pipe(named_pipe_filename):
158
154
log .exception ("Error writing line, skipping..." )
159
155
160
156
161
- # Based on timestamp, make sure that the partition exists:
162
- def ensure_flow_table_partition_exists (partition_no ):
163
- ts_start = partition_no * S_PER_PARTITION
164
- ts_end = ts_start + S_PER_PARTITION
165
- with get_db_cursor () as c :
166
- # "When creating a range partition, the lower bound specified with FROM is an inclusive bound, whereas
167
- # the upper bound specified with TO is an exclusive bound."
168
- # PARTITION OF: "Any indexes, constraints and user-defined row-level triggers that exist in the parent
169
- # table are cloned on the new partition."
170
- # https://www.postgresql.org/docs/12/sql-createtable.html
171
- c .execute (f"CREATE UNLOGGED TABLE IF NOT EXISTS { DB_PREFIX } flows_{ partition_no } PARTITION OF { DB_PREFIX } flows FOR VALUES FROM ({ ts_start } ) TO ({ ts_end } )" )
172
- return partition_no
173
-
174
-
175
157
def ensure_exporter (client_ip ):
176
158
with get_db_cursor () as c :
177
159
c .execute (f"INSERT INTO { DB_PREFIX } exporters (ip) VALUES (%s) ON CONFLICT DO NOTHING;" , (client_ip ,))
178
160
179
161
180
- def write_buffer (buffer , partition_no ):
162
+ def write_buffer (buffer ):
181
163
# {
182
164
# "DST_AS": 0,
183
165
# "SRC_AS": 0,
@@ -204,7 +186,7 @@ def write_buffer(buffer, partition_no):
204
186
# https://www.cisco.com/en/US/technologies/tk648/tk362/technologies_white_paper09186a00800a3db9.html#wp9001622
205
187
206
188
207
- log .debug (f"Writing { len (buffer )} records to DB, partition { partition_no } " )
189
+ log .debug (f"Writing { len (buffer )} records to DB" )
208
190
# save each of the flows within the record, but use execute_values() to perform bulk insert:
209
191
def _get_data (buffer ):
210
192
for ts , client_ip , export in buffer :
0 commit comments