Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend Entity Mapping #524

Merged
merged 8 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions packages/kestrel_core/src/kestrel/config/kestrel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,28 @@ debug:
cache_directory_path: "~/kestrel-debug-session" # put in user's home directory by default

# default identifier attribute(s) of an entity across all datasource interfaces
# always provide a list as identifiers even it is a single identifier
# if multiple attributes are specified, logic AND will be added in between
# each datasource interface config could have the same section to override this
entity_identifier:
file: "hashes[?algorithm_id == 3]" # sha256
group: uid
process: uid
endpoint: ip
device: ip
endpoint:
- uid
file: # "hashes[?algorithm_id == 3]" # sha256
- name
- endpoint.uid
group:
- uid
process:
- uid
- endpoint.uid
src_endpoint:
- ip
- port
dst_endpoint:
- ip
- port
certificate: serial_number
user: uid
certificate:
- serial_number
user:
- uid
email:
- uid
11 changes: 9 additions & 2 deletions packages/kestrel_core/src/kestrel/config/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Mapping, Union

from kestrel.utils import update_nested_dict, load_data_file
from kestrel.exceptions import InvalidYamlInConfig
from kestrel.exceptions import InvalidYamlInConfig, InvalidKestrelConfig

CONFIG_DIR_DEFAULT = Path.home() / ".config" / "kestrel"
CONFIG_PATH_DEFAULT = CONFIG_DIR_DEFAULT / "kestrel.yaml"
Expand Down Expand Up @@ -70,4 +70,11 @@ def load_kestrel_config() -> Mapping:
config_user = load_user_config(CONFIG_PATH_ENV_VAR, CONFIG_PATH_DEFAULT)
_logger.debug(f"User configuration loaded: {config_user}")
_logger.debug(f"Updating default config with user config...")
return update_nested_dict(config_default, config_user)
full_config = update_nested_dict(config_default, config_user)

# valid the entity identifier section format
for entity, idx in full_config["entity_identifier"].items():
if not (isinstance(idx, list) and all((isinstance(x, str) for x in idx))):
raise InvalidKestrelConfig(f"Invalid entity_identifier for '{entity}'")

return full_config
8 changes: 8 additions & 0 deletions packages/kestrel_core/src/kestrel/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ class InvalidYamlInConfig(KestrelError):
pass


class InvalidKestrelConfig(KestrelError):
pass


class VariableNotFound(KestrelError):
pass

Expand Down Expand Up @@ -122,3 +126,7 @@ class UnsupportedOperatorError(KestrelError):
"""The data source doesn't support this operator"""

pass


class IncompleteDataMapping(KestrelError):
pass
Empty file.
35 changes: 34 additions & 1 deletion packages/kestrel_core/src/kestrel/mapping/data_model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from functools import reduce
from typing import Optional, Union

import dpath
Expand All @@ -12,6 +13,7 @@
run_transformer_on_series,
)
from kestrel.utils import list_folder_files
from kestrel.exceptions import IncompleteDataMapping

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -51,7 +53,22 @@ def _add_attr(obj: dict, key: str, value: str):


def reverse_mapping(obj: dict, prefix: str = None, result: dict = None) -> dict:
"""Reverse the mapping; return native -> OCSF map"""
"""Reverse the mapping of `obj`

Newly loaded mapping from disk is OCSF -> native mapping. This function
takes in such mapping, and reverse it to native -> OCSF mapping, which can
be used by the frontend. The result mapping is flattened.

To call the function: `reverse_mapping(ocsf_to_native_mapping)`

Parameters:
obj: mapping loaded from disk (OCSF -> native)
prefix: key path to `obj`; used by the recursive function itself
result: intermediate result mapping; used by the recursive function itself

Returns:
native -> OCSF mapping
"""
if result is None:
result = {}
for k, v in obj.items():
Expand Down Expand Up @@ -211,6 +228,22 @@ def load_default_mapping(
return result


@typechecked
def check_entity_identifier_existence_in_mapping(
data_model_mapping: dict, entity_identifiers: dict
):
for entity_name, ids in entity_identifiers.items():
if entity_name in data_model_mapping:
entity = data_model_mapping[entity_name]
for idx in ids:
try:
reduce(dict.__getitem__, idx.split("."), entity)
except KeyError:
raise IncompleteDataMapping(
f"Identifier '{idx}' missing in data mapping"
)


@typechecked
def _get_from_mapping(mapping: Union[str, list, dict], key) -> list:
result = []
Expand Down
80 changes: 47 additions & 33 deletions packages/kestrel_core/src/kestrel/mapping/entityattribute/ecs.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@
# endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint
endpoint: &endpoint
uid: host.id
domain: host.domain
name: host.name
hostname: host.hostname
ip: host.ip
mac: host.mac


# https://schema.ocsf.io/1.1.0/objects/user
user:
domain: user.domain
full_name: user.full_name
name: user.name
uid: user.id


# https://schema.ocsf.io/1.1.0/objects/file
file:
endpoint: *endpoint
accessed_time: file.accessed
attributes: file.attributes
created_time: file.created
Expand Down Expand Up @@ -49,6 +68,7 @@ group:

# https://schema.ocsf.io/1.1.0/objects/process
process:
endpoint: *endpoint
cmd_line: process.command_line
name: process.name
pid: process.pid
Expand Down Expand Up @@ -120,6 +140,11 @@ process:
native_op: LIKE
native_value: posixpath_startswith
ocsf_value: dirname
user:
domain: process.user.domain
full_name: process.user.full_name
name: process.user.name
uid: process.user.id


# src_endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint
Expand All @@ -141,30 +166,6 @@ src_endpoint: &src_ref
- source.port


# endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint
endpoint:
domain:
- client.domain
- source.domain
- server.domain
- destination.domain
hostname:
- client.domain
- source.domain
- server.domain
- destination.domain
ip:
- client.ip
- source.ip
- server.ip
- destination.ip
mac:
- client.mac
- source.mac
- server.mac
- destination.mac


# dst_endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint
dst_endpoint: &dst_ref
domain:
Expand Down Expand Up @@ -225,15 +226,6 @@ certificate:
version: x509.version_number
issuer: x509.issuer.distinguished_name
subject: x509.subject.distinguished_name
#uid:


# https://schema.ocsf.io/1.1.0/objects/user
user:
domain: user.domain
full_name: user.full_name
name: user.name
uid: user.id


# https://schema.ocsf.io/1.1.0/classes/network_activity
Expand All @@ -242,3 +234,25 @@ network_activity:
src_endpoint: *src_ref
dst_endpoint: *dst_ref
traffic: *traffic


# https://schema.ocsf.io/1.2.0/objects/email
email:
uid: email.message_id
from: email.from.address
to: email.to.address
reply_to: email.reply_to.address
cc: email.cc.address
subject: email.subject


# https://schema.ocsf.io/1.2.0/objects/win/reg_key?extensions=win
reg_key:
endpoint: *endpoint
path: registry.key


# https://schema.ocsf.io/1.2.0/objects/win/reg_value?extensions=win
reg_value:
endpoint: *endpoint
path: registry.value
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Not updated for extended endpoint mapping (per process/file)

# https://schema.ocsf.io/1.1.0/objects/file
file:
name: file:name
Expand Down Expand Up @@ -141,3 +143,13 @@ user:
name: user-account:account_login
type: user-account:account_type
uid: user-account:user_id


# https://schema.ocsf.io/1.2.0/objects/win/reg_key?extensions=win
reg_key:
path: windows-registry-key.key


# https://schema.ocsf.io/1.2.0/objects/win/reg_value?extensions=win
reg_value:
path: windows-registry-key.value.data
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
event: base_event
activity: base_event
device: endpoint
registrykey: reg_key
registryvalue: reg_value

# Extended mapping to cover special field in OCSF activity
actor.user: user
# To simplify mapping, we do not cover parent process in actor.process
# - This semantic only appear in Process Activity [1007], please use `process.parent_process` to specify in this case
# - This works in Activity besides Process Activity [1007]
actor.process: process
17 changes: 17 additions & 0 deletions packages/kestrel_core/tests/test_mapping_data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@

import pandas as pd

from kestrel.exceptions import IncompleteDataMapping
from kestrel.config.utils import load_kestrel_config
from kestrel.mapping.data_model import (
load_default_mapping,
reverse_mapping,
translate_comparison_to_native,
translate_comparison_to_ocsf,
translate_dataframe,
translate_projection_to_native,
check_entity_identifier_existence_in_mapping,
)


Expand Down Expand Up @@ -88,6 +91,14 @@
}


# Mapping for testing missing identifier
INCOMPLETE_MAPPING = {
"process": {
"pid": "process.pid"
}
}


# Simplified subset of the standard mapping
STIX_MAPPING = {
"device": {
Expand Down Expand Up @@ -205,3 +216,9 @@ def test_translate_dataframe(): #TODO: more testing here
dmm = load_default_mapping("ecs")
df = translate_dataframe(df, dmm["process"])
#TODO:assert df["file.name"].iloc[0] == "cmd.exe"


def test_incomplete_mapping_no_identifier():
identifier_config = load_kestrel_config()["entity_identifier"]
with pytest.raises(IncompleteDataMapping):
check_entity_identifier_existence_in_mapping(INCOMPLETE_MAPPING, identifier_config)
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
load_kestrel_config,
)
from kestrel.exceptions import InterfaceNotConfigured
from kestrel.mapping.data_model import load_default_mapping
from kestrel.mapping.data_model import (
load_default_mapping,
check_entity_identifier_existence_in_mapping,
)


PROFILE_PATH_DEFAULT = CONFIG_DIR_DEFAULT / "opensearch.yaml"
Expand Down Expand Up @@ -49,6 +52,11 @@ def __post_init__(self):
# Default to the built-in ECS mapping
self.data_model_map = load_default_mapping("ecs")

kestrel_config = load_kestrel_config()
check_entity_identifier_existence_in_mapping(
self.data_model_map, kestrel_config["entity_identifier"]
)


@dataclass
class Config(DataClassJSONMixin):
Expand All @@ -65,13 +73,6 @@ def load_config():
interface_config = Config(
**load_user_config(PROFILE_PATH_ENV_VAR, PROFILE_PATH_DEFAULT)
)

# load default entity identifier from main Kestrel config
kestrel_config = load_kestrel_config()
for ds in interface_config.datasources.values():
if not ds.entity_identifier:
ds.entity_identifier = kestrel_config["entity_identifier"]

return interface_config
except TypeError:
raise InterfaceNotConfigured()
1 change: 0 additions & 1 deletion packages/kestrel_interface_opensearch/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,3 @@ def test_load_config(tmp_path):
assert read_config.connections["localhost"].url == config["connections"]["localhost"]["url"]
assert read_config.datasources["some_ds"].index_pattern == config["datasources"]["some_ds"]["index_pattern"]
assert read_config.datasources["some_ds"].data_model_map["some.field"] == "other.field"
assert read_config.datasources["some_ds"].entity_identifier["process"] == "uid"
Loading
Loading