Skip to content

Commit

Permalink
Merge pull request #186 from cisagov/v522_merge
Browse files Browse the repository at this point in the history
Fleshed out the Malcolm API and a fix to how Zeek intel. files are managed on Hedgehog Linux.

* Added more capabilities to the API
    * added `/document/` API
    * added `filter` ability to `/agg/` and `/document/` API
    * added more documentation and examples
* For Zeek intel. files, changed location from `/opt/zeek/share/zeek/site/intel` to `/opt/sensor/sensor_ctl/zeek/intel` so that they aren't lost on reboot
  • Loading branch information
mmguero authored Jan 25, 2022
2 parents d138f2f + b5e376e commit 2c62e87
Show file tree
Hide file tree
Showing 13 changed files with 1,704 additions and 131 deletions.
1,603 changes: 1,523 additions & 80 deletions README.md

Large diffs are not rendered by default.

130 changes: 124 additions & 6 deletions api/project/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@
)
fields_to_urls.append([r'^zeek\.ntlm\.', ['DASH:543118a9-02d7-43fe-b669-b8652177fc37']])
fields_to_urls.append([r'^zeek\.ntp\.', ['DASH:af5df620-eeb6-11e9-bdef-65a192b7f586']])
fields_to_urls.append(
[r'^zeek\.opcua.*\.', ['DASH:dd87edd0-796a-11ec-9ce6-b395c1ff58f4', 'DASH:4a4bde20-4760-11ea-949c-bbb5a9feecbf']]
)
fields_to_urls.append([r'^zeek\.ospf\.', ['DASH:1cc01ff0-5205-11ec-a62c-7bc80e88f3f0']])
fields_to_urls.append([r'^zeek\.pe\.', ['DASH:0a490422-0ce9-44bf-9a2d-19329ddde8c3']])
fields_to_urls.append(
Expand Down Expand Up @@ -190,6 +193,33 @@ def gettimes(args):
return start_time, end_time


def getfilters(args):
"""Parses 'filter' dictionary from the request args dictionary, returning
the filters themselves as a dict()
e.g.,
https://localhost/mapi/agg?from=25 years ago&to=now&filter={"network.direction":"outbound"}
Parameters
----------
args : dict
The dictionary which should contain 'filter'.
Returns
-------
return filters
dict containing the filters, e.g., { "fieldname1": "value", "fieldname2": 1234, "fieldname3": ["abc", "123"] }
"""
try:
if (filters := args.get("filter")) and (filters := json.loads(filters)) and isinstance(filters, dict):
return filters
else:
return None
except ValueError:
return None


def urls_for_field(fieldname, start_time=None, end_time=None):
"""looks up a list of URLs relevant to a particular database field
Expand Down Expand Up @@ -231,7 +261,7 @@ def urls_for_field(fieldname, start_time=None, end_time=None):
return list(set(translated))


def filtertime(search, args):
def filtertime(search, args, default_from="1 day ago", default_to="now"):
"""Applies a time filter (inclusive; extracted from request arguments) to an OpenSearch query and
returns the range as a tuple of integers representing the milliseconds since EPOCH. If
either end of the range is unspecified, the start and end times default to "1 day ago" and "now",
Expand All @@ -254,10 +284,11 @@ def filtertime(search, args):
"""
start_time, end_time = gettimes(args)
start_time_ms = int(
start_time.timestamp() * 1000 if start_time is not None else dateparser.parse("1 day ago").timestamp() * 1000
start_time.timestamp() * 1000 if start_time is not None else dateparser.parse(default_from).timestamp() * 1000
)
end_time_ms = int(
end_time.timestamp() * 1000 if end_time is not None else dateparser.parse(default_to).timestamp() * 1000
)
end_time_ms = int(end_time.timestamp() * 1000 if end_time is not None else datetime.now().timestamp() * 1000)
print(f'{app.config["ARKIME_INDEX_TIME_FIELD"]}, {start_time_ms}, {end_time_ms}')
return (
start_time_ms,
end_time_ms,
Expand All @@ -276,6 +307,53 @@ def filtertime(search, args):
)


def filtervalues(search, args):
"""Applies field value filters (logically AND-ing them) to an OpenSearch query. Using a !
effectively negates/excludes the filter. Using a 'null' value implies "does not exist."
Parameters
----------
search : opensearch_dsl.Search
The object representing the OpenSearch Search query
args : dict
The dictionary which should contain 'filter' (see getfilters)
Returns
-------
filters
dict containing the filters, e.g., { "fieldname1": "value", "fieldname2": 1234, "fieldname3": ["abc", "123"] }
search.filter(...)
filtered search object
"""
if (s := search) and (filters := getfilters(args)) and isinstance(filters, dict):
# loop over filters, AND'ing all of them
for fieldname, filtervalue in filters.items():
if fieldname.startswith('!'):
# AND NOT filter
if filtervalue is not None:
# field != value
s = s.exclude(
"terms",
**{fieldname[1:]: get_iterable(filtervalue)},
)
else:
# field exists ("is not null")
s = s.filter("exists", field=fieldname[1:])
else:
# AND filter
if filtervalue is not None:
# field == value
s = s.filter(
"terms",
**{fieldname: get_iterable(filtervalue)},
)
else:
# field does not exist ("is null")
s = s.filter('bool', must_not=opensearch_dsl.Q('exists', field=fieldname))

return (filters, s)


def bucketfield(fieldname, current_request, urls=None):
"""Returns a bucket aggregation for a particular field over a given time range
Expand All @@ -284,20 +362,25 @@ def bucketfield(fieldname, current_request, urls=None):
fieldname : string or Array of string
The name of the field(s) on which to perform the aggregation
current_request : Request
The flask Request object being processed (see gettimes and filtertime)
Uses 'from', 'to', and 'limit' from current_request.args
The flask Request object being processed (see gettimes/filtertime and getfilters/filtervalues)
Uses 'from', 'to', 'limit', and 'filter' from current_request.args
Returns
-------
values
list of dicts containing key and doc_count for each bucket
range
start_time (seconds since EPOCH) and end_time (seconds since EPOCH) of query
filter
dict containing the filters, e.g., { "fieldname1": "value", "fieldname2": 1234, "fieldname3": ["abc", "123"] }
fields
the name of the field(s) on which the aggregation was performed
"""
s = opensearch_dsl.Search(
using=opensearch_dsl.connections.get_connection(), index=app.config["ARKIME_INDEX_PATTERN"]
).extra(size=0)
start_time_ms, end_time_ms, s = filtertime(s, current_request.args)
filters, s = filtervalues(s, current_request.args)
bucket_limit = int(deep_get(current_request.args, ["limit"], app.config["RESULT_SET_LIMIT"]))
last_bucket = s.aggs
for fname in get_iterable(fieldname):
Expand All @@ -313,13 +396,15 @@ def bucketfield(fieldname, current_request, urls=None):
return jsonify(
values=response.aggregations.to_dict()["values"],
range=(start_time_ms // 1000, end_time_ms // 1000),
filter=filters,
fields=get_iterable(fieldname),
urls=urls,
)
else:
return jsonify(
values=response.aggregations.to_dict()["values"],
range=(start_time_ms // 1000, end_time_ms // 1000),
filter=filters,
fields=get_iterable(fieldname),
)

Expand Down Expand Up @@ -352,6 +437,39 @@ def aggregate(fieldname):
)


@app.route("/document", defaults={'index': app.config["ARKIME_INDEX_PATTERN"]})
@app.route("/document/<index>")
def document(index):
"""Returns the matching document(s) from the specified index
Parameters
----------
index : string
the name of the index from which to retrieve the document (defaults: arkime_sessions3-*)
request : Request
Uses 'from', 'to', 'limit', and 'filter' from current_request.args
Returns
-------
filter
dict containing the filters, e.g., {"_id":"210301-Cgnjsc2Tkdl38g25D6-iso_cotp-5485"}
results
array of the documents retrieved (up to 'limit')
"""
s = opensearch_dsl.Search(using=opensearch_dsl.connections.get_connection(), index=index).extra(
size=int(deep_get(request.args, ["limit"], app.config["RESULT_SET_LIMIT"]))
)
start_time_ms, end_time_ms, s = filtertime(s, request.args, default_from="1970-1-1", default_to="now")
filters, s = filtervalues(s, request.args)
return jsonify(
results=s.execute().to_dict()['hits']['hits'],
range=(start_time_ms // 1000, end_time_ms // 1000),
filter=filters,
)


@app.route("/index")
@app.route("/indexes")
@app.route("/indices")
def indices():
"""Provide a list of indices in the OpenSearch data store
Expand Down
4 changes: 2 additions & 2 deletions arkime/wise/source.zeeklogs.js
Original file line number Diff line number Diff line change
Expand Up @@ -1206,7 +1206,7 @@ class MalcolmSource extends WISESource {
this.api.addValueAction("malcolm_dashboards_fields_zeek", {name:filterLabel, url:filterUrl, fields:allFieldsStr});

// add rick-click for opening malcolm agg api
var apiLabel = "Malcolm Agg. API %DBFIELD%";
var apiLabel = "Aggregate %DBFIELD%";
var apiURL = "mapi/agg/%DBFIELD%?from=%ISOSTART%&to=%ISOSTOP%";
this.api.addValueAction("malcolm_mapi_cat_ip", {name:apiLabel, url:apiURL, category:"ip"});
this.api.addValueAction("malcolm_mapi_cat_port", {name:apiLabel, url:apiURL, category:"port"});
Expand All @@ -1217,7 +1217,7 @@ class MalcolmSource extends WISESource {
this.api.addValueAction("malcolm_mapi_fields_zeek", {name:apiLabel, url:apiURL, fields:allFieldsStr});

// add right-click for viewing original JSON document
this.api.addValueAction("malcolm_session_json_source", {name:"View JSON Document", url:"sessions.json?expression=id=%TEXT%&fields=*&%DATE%", fields:"id"});
this.api.addValueAction("malcolm_json_source", {name:"%DBFIELD% Document(s) JSON", url:"mapi/document?filter={\"%DBFIELD%\":\"%TEXT%\"}", fields:"communityId,event.id,id,network.community_id,rootId,zeek.fuid,zeek.uid"});

this.api.addView("malcolm_common",
"if (session.malcolmDocId)\n" +
Expand Down
32 changes: 16 additions & 16 deletions docker-compose-standalone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ x-pcap-capture-variables: &pcap-capture-variables

services:
opensearch:
image: malcolmnetsec/opensearch:5.2.1
image: malcolmnetsec/opensearch:5.2.2
restart: "no"
stdin_open: false
tty: true
Expand Down Expand Up @@ -165,7 +165,7 @@ services:
retries: 3
start_period: 180s
dashboards-helper:
image: malcolmnetsec/dashboards-helper:5.2.1
image: malcolmnetsec/dashboards-helper:5.2.2
restart: "no"
stdin_open: false
tty: true
Expand Down Expand Up @@ -193,7 +193,7 @@ services:
retries: 3
start_period: 30s
dashboards:
image: malcolmnetsec/dashboards:5.2.1
image: malcolmnetsec/dashboards:5.2.2
restart: "no"
stdin_open: false
tty: true
Expand All @@ -214,7 +214,7 @@ services:
retries: 3
start_period: 210s
logstash:
image: malcolmnetsec/logstash-oss:5.2.1
image: malcolmnetsec/logstash-oss:5.2.2
restart: "no"
stdin_open: false
tty: true
Expand Down Expand Up @@ -255,7 +255,7 @@ services:
retries: 3
start_period: 600s
filebeat:
image: malcolmnetsec/filebeat-oss:5.2.1
image: malcolmnetsec/filebeat-oss:5.2.2
restart: "no"
stdin_open: false
tty: true
Expand Down Expand Up @@ -292,7 +292,7 @@ services:
retries: 3
start_period: 60s
arkime:
image: malcolmnetsec/arkime:5.2.1
image: malcolmnetsec/arkime:5.2.2
restart: "no"
stdin_open: false
tty: true
Expand Down Expand Up @@ -330,7 +330,7 @@ services:
retries: 3
start_period: 210s
zeek:
image: malcolmnetsec/zeek:5.2.1
image: malcolmnetsec/zeek:5.2.2
restart: "no"
stdin_open: false
tty: true
Expand Down Expand Up @@ -359,7 +359,7 @@ services:
retries: 3
start_period: 60s
file-monitor:
image: malcolmnetsec/file-monitor:5.2.1
image: malcolmnetsec/file-monitor:5.2.2
restart: "no"
stdin_open: false
tty: true
Expand All @@ -382,7 +382,7 @@ services:
retries: 3
start_period: 60s
pcap-capture:
image: malcolmnetsec/pcap-capture:5.2.1
image: malcolmnetsec/pcap-capture:5.2.2
restart: "no"
stdin_open: false
tty: true
Expand All @@ -402,7 +402,7 @@ services:
volumes:
- ./pcap/upload:/pcap
pcap-monitor:
image: malcolmnetsec/pcap-monitor:5.2.1
image: malcolmnetsec/pcap-monitor:5.2.2
restart: "no"
stdin_open: false
tty: true
Expand All @@ -425,7 +425,7 @@ services:
retries: 3
start_period: 90s
upload:
image: malcolmnetsec/file-upload:5.2.1
image: malcolmnetsec/file-upload:5.2.2
restart: "no"
stdin_open: false
tty: true
Expand All @@ -451,7 +451,7 @@ services:
retries: 3
start_period: 60s
htadmin:
image: malcolmnetsec/htadmin:5.2.1
image: malcolmnetsec/htadmin:5.2.2
restart: "no"
stdin_open: false
tty: true
Expand All @@ -473,7 +473,7 @@ services:
retries: 3
start_period: 60s
freq:
image: malcolmnetsec/freq:5.2.1
image: malcolmnetsec/freq:5.2.2
restart: "no"
stdin_open: false
tty: true
Expand All @@ -491,7 +491,7 @@ services:
retries: 3
start_period: 60s
name-map-ui:
image: malcolmnetsec/name-map-ui:5.2.1
image: malcolmnetsec/name-map-ui:5.2.2
restart: "no"
stdin_open: false
tty: true
Expand All @@ -512,7 +512,7 @@ services:
retries: 3
start_period: 60s
api:
image: malcolmnetsec/api:5.2.1
image: malcolmnetsec/api:5.2.2
command: gunicorn --bind 0:5000 manage:app
restart: "no"
stdin_open: false
Expand All @@ -530,7 +530,7 @@ services:
retries: 3
start_period: 60s
nginx-proxy:
image: malcolmnetsec/nginx-proxy:5.2.1
image: malcolmnetsec/nginx-proxy:5.2.2
restart: "no"
stdin_open: false
tty: true
Expand Down
Loading

0 comments on commit 2c62e87

Please sign in to comment.