Skip to content

Commit

Permalink
Merge pull request #524 from opencybersecurityalliance/extended-entit…
Browse files Browse the repository at this point in the history
…iy-mapping

Extend Entity Mapping
  • Loading branch information
subbyte authored Jun 13, 2024
2 parents 4a48a7d + eff71d7 commit abe19d9
Show file tree
Hide file tree
Showing 13 changed files with 173 additions and 64 deletions.
25 changes: 17 additions & 8 deletions packages/kestrel_core/src/kestrel/config/kestrel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,28 @@ debug:
cache_directory_path: "~/kestrel-debug-session" # put in user's home directory by default

# default identifier attribute(s) of an entity across all datasource interfaces
# always provide a list as identifiers even it is a single identifier
# if multiple attributes are specified, logic AND will be added in between
# each datasource interface config could have the same section to override this
entity_identifier:
file: "hashes[?algorithm_id == 3]" # sha256
group: uid
process: uid
endpoint: ip
device: ip
endpoint:
- uid
file: # "hashes[?algorithm_id == 3]" # sha256
- name
- endpoint.uid
group:
- uid
process:
- uid
- endpoint.uid
src_endpoint:
- ip
- port
dst_endpoint:
- ip
- port
certificate: serial_number
user: uid
certificate:
- serial_number
user:
- uid
email:
- uid
11 changes: 9 additions & 2 deletions packages/kestrel_core/src/kestrel/config/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Mapping, Union

from kestrel.utils import update_nested_dict, load_data_file
from kestrel.exceptions import InvalidYamlInConfig
from kestrel.exceptions import InvalidYamlInConfig, InvalidKestrelConfig

CONFIG_DIR_DEFAULT = Path.home() / ".config" / "kestrel"
CONFIG_PATH_DEFAULT = CONFIG_DIR_DEFAULT / "kestrel.yaml"
Expand Down Expand Up @@ -70,4 +70,11 @@ def load_kestrel_config() -> Mapping:
config_user = load_user_config(CONFIG_PATH_ENV_VAR, CONFIG_PATH_DEFAULT)
_logger.debug(f"User configuration loaded: {config_user}")
_logger.debug(f"Updating default config with user config...")
return update_nested_dict(config_default, config_user)
full_config = update_nested_dict(config_default, config_user)

# valid the entity identifier section format
for entity, idx in full_config["entity_identifier"].items():
if not (isinstance(idx, list) and all((isinstance(x, str) for x in idx))):
raise InvalidKestrelConfig(f"Invalid entity_identifier for '{entity}'")

return full_config
8 changes: 8 additions & 0 deletions packages/kestrel_core/src/kestrel/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ class InvalidYamlInConfig(KestrelError):
pass


class InvalidKestrelConfig(KestrelError):
pass


class VariableNotFound(KestrelError):
pass

Expand Down Expand Up @@ -122,3 +126,7 @@ class UnsupportedOperatorError(KestrelError):
"""The data source doesn't support this operator"""

pass


class IncompleteDataMapping(KestrelError):
pass
Empty file.
35 changes: 34 additions & 1 deletion packages/kestrel_core/src/kestrel/mapping/data_model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from functools import reduce
from typing import Optional, Union

import dpath
Expand All @@ -12,6 +13,7 @@
run_transformer_on_series,
)
from kestrel.utils import list_folder_files
from kestrel.exceptions import IncompleteDataMapping

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -51,7 +53,22 @@ def _add_attr(obj: dict, key: str, value: str):


def reverse_mapping(obj: dict, prefix: str = None, result: dict = None) -> dict:
"""Reverse the mapping; return native -> OCSF map"""
"""Reverse the mapping of `obj`
Newly loaded mapping from disk is OCSF -> native mapping. This function
takes in such mapping, and reverse it to native -> OCSF mapping, which can
be used by the frontend. The result mapping is flattened.
To call the function: `reverse_mapping(ocsf_to_native_mapping)`
Parameters:
obj: mapping loaded from disk (OCSF -> native)
prefix: key path to `obj`; used by the recursive function itself
result: intermediate result mapping; used by the recursive function itself
Returns:
native -> OCSF mapping
"""
if result is None:
result = {}
for k, v in obj.items():
Expand Down Expand Up @@ -211,6 +228,22 @@ def load_default_mapping(
return result


@typechecked
def check_entity_identifier_existence_in_mapping(
data_model_mapping: dict, entity_identifiers: dict
):
for entity_name, ids in entity_identifiers.items():
if entity_name in data_model_mapping:
entity = data_model_mapping[entity_name]
for idx in ids:
try:
reduce(dict.__getitem__, idx.split("."), entity)
except KeyError:
raise IncompleteDataMapping(
f"Identifier '{idx}' missing in data mapping"
)


@typechecked
def _get_from_mapping(mapping: Union[str, list, dict], key) -> list:
result = []
Expand Down
80 changes: 47 additions & 33 deletions packages/kestrel_core/src/kestrel/mapping/entityattribute/ecs.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@
# endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint
endpoint: &endpoint
uid: host.id
domain: host.domain
name: host.name
hostname: host.hostname
ip: host.ip
mac: host.mac


# https://schema.ocsf.io/1.1.0/objects/user
user:
domain: user.domain
full_name: user.full_name
name: user.name
uid: user.id


# https://schema.ocsf.io/1.1.0/objects/file
file:
endpoint: *endpoint
accessed_time: file.accessed
attributes: file.attributes
created_time: file.created
Expand Down Expand Up @@ -49,6 +68,7 @@ group:

# https://schema.ocsf.io/1.1.0/objects/process
process:
endpoint: *endpoint
cmd_line: process.command_line
name: process.name
pid: process.pid
Expand Down Expand Up @@ -120,6 +140,11 @@ process:
native_op: LIKE
native_value: posixpath_startswith
ocsf_value: dirname
user:
domain: process.user.domain
full_name: process.user.full_name
name: process.user.name
uid: process.user.id


# src_endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint
Expand All @@ -141,30 +166,6 @@ src_endpoint: &src_ref
- source.port


# endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint
endpoint:
domain:
- client.domain
- source.domain
- server.domain
- destination.domain
hostname:
- client.domain
- source.domain
- server.domain
- destination.domain
ip:
- client.ip
- source.ip
- server.ip
- destination.ip
mac:
- client.mac
- source.mac
- server.mac
- destination.mac


# dst_endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint
dst_endpoint: &dst_ref
domain:
Expand Down Expand Up @@ -225,15 +226,6 @@ certificate:
version: x509.version_number
issuer: x509.issuer.distinguished_name
subject: x509.subject.distinguished_name
#uid:


# https://schema.ocsf.io/1.1.0/objects/user
user:
domain: user.domain
full_name: user.full_name
name: user.name
uid: user.id


# https://schema.ocsf.io/1.1.0/classes/network_activity
Expand All @@ -242,3 +234,25 @@ network_activity:
src_endpoint: *src_ref
dst_endpoint: *dst_ref
traffic: *traffic


# https://schema.ocsf.io/1.2.0/objects/email
email:
uid: email.message_id
from: email.from.address
to: email.to.address
reply_to: email.reply_to.address
cc: email.cc.address
subject: email.subject


# https://schema.ocsf.io/1.2.0/objects/win/reg_key?extensions=win
reg_key:
endpoint: *endpoint
path: registry.key


# https://schema.ocsf.io/1.2.0/objects/win/reg_value?extensions=win
reg_value:
endpoint: *endpoint
path: registry.value
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Not updated for extended endpoint mapping (per process/file)

# https://schema.ocsf.io/1.1.0/objects/file
file:
name: file:name
Expand Down Expand Up @@ -141,3 +143,13 @@ user:
name: user-account:account_login
type: user-account:account_type
uid: user-account:user_id


# https://schema.ocsf.io/1.2.0/objects/win/reg_key?extensions=win
reg_key:
path: windows-registry-key.key


# https://schema.ocsf.io/1.2.0/objects/win/reg_value?extensions=win
reg_value:
path: windows-registry-key.value.data
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
event: base_event
activity: base_event
device: endpoint
registrykey: reg_key
registryvalue: reg_value

# Extended mapping to cover special field in OCSF activity
actor.user: user
# To simplify mapping, we do not cover parent process in actor.process
# - This semantic only appear in Process Activity [1007], please use `process.parent_process` to specify in this case
# - This works in Activity besides Process Activity [1007]
actor.process: process
17 changes: 17 additions & 0 deletions packages/kestrel_core/tests/test_mapping_data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@

import pandas as pd

from kestrel.exceptions import IncompleteDataMapping
from kestrel.config.utils import load_kestrel_config
from kestrel.mapping.data_model import (
load_default_mapping,
reverse_mapping,
translate_comparison_to_native,
translate_comparison_to_ocsf,
translate_dataframe,
translate_projection_to_native,
check_entity_identifier_existence_in_mapping,
)


Expand Down Expand Up @@ -88,6 +91,14 @@
}


# Mapping for testing missing identifier
INCOMPLETE_MAPPING = {
"process": {
"pid": "process.pid"
}
}


# Simplified subset of the standard mapping
STIX_MAPPING = {
"device": {
Expand Down Expand Up @@ -205,3 +216,9 @@ def test_translate_dataframe(): #TODO: more testing here
dmm = load_default_mapping("ecs")
df = translate_dataframe(df, dmm["process"])
#TODO:assert df["file.name"].iloc[0] == "cmd.exe"


def test_incomplete_mapping_no_identifier():
identifier_config = load_kestrel_config()["entity_identifier"]
with pytest.raises(IncompleteDataMapping):
check_entity_identifier_existence_in_mapping(INCOMPLETE_MAPPING, identifier_config)
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
load_kestrel_config,
)
from kestrel.exceptions import InterfaceNotConfigured
from kestrel.mapping.data_model import load_default_mapping
from kestrel.mapping.data_model import (
load_default_mapping,
check_entity_identifier_existence_in_mapping,
)


PROFILE_PATH_DEFAULT = CONFIG_DIR_DEFAULT / "opensearch.yaml"
Expand Down Expand Up @@ -49,6 +52,11 @@ def __post_init__(self):
# Default to the built-in ECS mapping
self.data_model_map = load_default_mapping("ecs")

kestrel_config = load_kestrel_config()
check_entity_identifier_existence_in_mapping(
self.data_model_map, kestrel_config["entity_identifier"]
)


@dataclass
class Config(DataClassJSONMixin):
Expand All @@ -65,13 +73,6 @@ def load_config():
interface_config = Config(
**load_user_config(PROFILE_PATH_ENV_VAR, PROFILE_PATH_DEFAULT)
)

# load default entity identifier from main Kestrel config
kestrel_config = load_kestrel_config()
for ds in interface_config.datasources.values():
if not ds.entity_identifier:
ds.entity_identifier = kestrel_config["entity_identifier"]

return interface_config
except TypeError:
raise InterfaceNotConfigured()
1 change: 0 additions & 1 deletion packages/kestrel_interface_opensearch/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,3 @@ def test_load_config(tmp_path):
assert read_config.connections["localhost"].url == config["connections"]["localhost"]["url"]
assert read_config.datasources["some_ds"].index_pattern == config["datasources"]["some_ds"]["index_pattern"]
assert read_config.datasources["some_ds"].data_model_map["some.field"] == "other.field"
assert read_config.datasources["some_ds"].entity_identifier["process"] == "uid"
Loading

0 comments on commit abe19d9

Please sign in to comment.