Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add pre-commit #5

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,4 @@ dmypy.json
.pytype/

# Cython debug symbols
cython_debug/
cython_debug/
17 changes: 17 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-toml
- id: detect-private-key
- id: check-added-large-files

# ruff
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: "v0.3.4"
hooks:
- id: ruff
args: ["--fix"]
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@

Microsoft Defender for Cloud [threat matrix for Kubernetes (TMFK)](https://github.com/microsoft/Threat-Matrix-for-Kubernetes) contains attack tactics, techniques and mitigations relevant for Kubernetes environment.

This repository contains the TMFK dataset represented in STIX 2.1 JSON collections.
This repository contains the TMFK dataset represented in STIX 2.1 JSON collections.

## Repository Structure

```
.
├─ build ∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙ Collection folder
├─ build ∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙ Collection folder
│ ├─ tmfk_strict.json ∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙ Most recent strict TMFK release
│ ├─ tmfk_attack_compatible.json ∙∙∙∙∙∙∙∙∙∙∙∙∙∙ Most recent ATT&CK compatible TMFK release
│ ├─ tmfk_strict_b885d18.json ∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙∙ TMFK strict collection for commit hash b885d18 of site repo
Expand Down
3,374 changes: 1,687 additions & 1,687 deletions build/tmfk_attack_compatible.json

Large diffs are not rendered by default.

3,374 changes: 1,687 additions & 1,687 deletions build/tmfk_attack_compatible_b885d18.json

Large diffs are not rendered by default.

3,374 changes: 1,687 additions & 1,687 deletions build/tmfk_strict.json

Large diffs are not rendered by default.

3,374 changes: 1,687 additions & 1,687 deletions build/tmfk_strict_b885d18.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion index.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@
"description": "STIX 2.1 Threat Matrix for Kubernetes collection bundle with it's own source name, killchain name and domain"
}
]
}
}
2 changes: 1 addition & 1 deletion make.bat
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ popd

CALL pipenv install
mkdir build
CALL pipenv run python ./src/parse.py
CALL pipenv run python ./src/parse.py
2 changes: 1 addition & 1 deletion make.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ popd

pipenv install
mkdir -p build
pipenv run python ./src/parse.py
pipenv run python ./src/parse.py
43 changes: 43 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
[tool.ruff]
line-length = 99
src = ["src"]

[tool.ruff.lint]
select = ["ALL"]
ignore = [
"ARG",
"ANN",
"D",
"EM101",
"EM102",
"PT001",
"PT023",
"SIM108",
"SIM114",
"TRY003",
"PLW2901",
"RET505",
"PLR0913",
"FA",
"S101",
"PLR2004",
"TCH001",
"PGH003",
"TD001",
"TD002",
"TD003",
"FIX001",
"FIX002",
"TCH002",
"ERA001",
"N818",
"E501",
"PTH118",
"PERF401",

# maybe fix :/
"DTZ005",
"PTH123",


]
31 changes: 26 additions & 5 deletions src/custom_tmfk_objects.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""The classes found here are how ATRM objects can be represented as custom STIX objects instead of python dictionaries."""

from collections import OrderedDict
from datetime import datetime
from typing import ClassVar

from constants import Mode, get_tmfk_source
from stix2 import CustomObject, KillChainPhase
from stix2.properties import (
BooleanProperty,
Expand All @@ -15,6 +16,8 @@
)
from stix2.v21.base import _STIXBase21

from constants import Mode, get_tmfk_source


class CustomStixObject:
"""Custom STIX object used for ATRM objects."""
Expand Down Expand Up @@ -60,7 +63,7 @@ def get_id(self, mode: Mode):
if external_references:
for reference in external_references:
if reference.get("external_id") and reference.get(
"source_name"
"source_name",
) == get_tmfk_source(mode=mode):
return reference["external_id"]
return None
Expand All @@ -75,6 +78,22 @@ def get_id(self, mode: Mode):
"x_mitre_modified_by_ref",
ReferenceProperty(valid_types="identity", spec_version="2.1"),
),
(
"created",
TimestampProperty(
default=datetime.now,
precision="millisecond",
precision_constraint="min",
),
),
(
"modified",
TimestampProperty(
default=datetime.now,
precision="millisecond",
precision_constraint="min",
),
),
("description", StringProperty()),
("x_mitre_version", StringProperty()),
("x_mitre_domains", ListProperty(StringProperty())),
Expand Down Expand Up @@ -115,16 +134,18 @@ class Relationship(CustomStixObject):


class ObjectRef(_STIXBase21):
_properties = OrderedDict(
_properties: ClassVar[OrderedDict] = OrderedDict(
[
("object_ref", StringProperty(required=True)),
(
"object_modified",
TimestampProperty(
precision="millisecond", precision_constraint="min", required=True
precision="millisecond",
precision_constraint="min",
required=True,
),
),
]
],
)


Expand Down
35 changes: 27 additions & 8 deletions src/git_tools.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from contextlib import contextmanager
from datetime import datetime
from io import BytesIO
from typing import Generator, Iterator

import git

Expand All @@ -13,17 +16,33 @@ def get_last_commit_hash(repo_path: str):
return repo.commit("main").hexsha[:7]


def get_file_creation_date(repo_path: str, file_path: str) -> datetime:
def iter_file_commits(repo_path: str, file_path: str) -> Iterator[git.Commit]:
repo = git.Repo(repo_path)
commits = list(repo.iter_commits(paths=file_path))
return repo.iter_commits(paths=file_path)


def get_file_creation_date(repo_path: str, file_path: str) -> datetime | None:
commits = list(iter_file_commits(repo_path, file_path))
if commits and len(commits):
return commits[-1].committed_datetime
return None


def get_file_modification_date(repo_path: str, file_path: str) -> datetime:
repo = git.Repo(repo_path)
commits = list(repo.iter_commits(paths=file_path))
if commits and len(commits):
return commits[0].committed_datetime
return None
def get_file_modification_date(repo_path: str, file_path: str) -> datetime | None:
try:
return next(iter_file_commits(repo_path, file_path)).committed_datetime
except StopIteration:
return None


@contextmanager
def open_file_at_commit(
commit: git.Commit,
file_path: str,
) -> Generator[BytesIO, None, None]:
targetfile = commit.tree / file_path
try:
f = BytesIO(targetfile.data_stream.read())
yield f
finally:
f.close()
55 changes: 37 additions & 18 deletions src/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
from datetime import datetime
from pathlib import Path

from mitreattack.stix20.custom_attack_objects import Matrix
from stix2 import Bundle, parse

from constants import (
ATTACK_SPEC_VERSION,
CREATOR_IDENTITY,
Expand All @@ -19,12 +22,14 @@
get_tmfk_source,
)
from custom_tmfk_objects import Collection, ObjectRef, Relationship
from git_tools import get_last_commit_hash, get_first_commit_date
from mitreattack.stix20.custom_attack_objects import Matrix
from parse_mitigation import handle_folder, parse_mitigation
from git_tools import get_first_commit_date, get_last_commit_hash
from parse_mitigation import (
handle_folder,
parse_mitigation,
parse_relationship_created_modified_fields,
)
from parse_tactic import parse_tactic
from parse_technique import parse_technique
from stix2 import Bundle, parse


def parse_tmfk(mode: ModeEnumAttribute) -> None:
Expand All @@ -49,7 +54,7 @@ def parse_tmfk(mode: ModeEnumAttribute) -> None:
filter(
lambda x: x.endswith(".md") and x != "index.md",
os.listdir(MITIGATIONS_PATH),
)
),
)

folders = list(filter(lambda x: "." not in x, os.listdir(MITIGATIONS_PATH)))
Expand All @@ -59,18 +64,28 @@ def parse_tmfk(mode: ModeEnumAttribute) -> None:
objects.append(mitigation)

for idx in ids:
technique = techniques[idx]

relationship_dt = parse_relationship_created_modified_fields(
repo_path=TMFK_PATH,
file_path=file_path,
technique=technique,
)

objects.append(
Relationship(
source_ref=mitigation.id,
description=mitigation.description.split(".")[0],
relationship_type="mitigates",
target_ref=techniques[idx].id,
target_ref=technique.id,
created_by_ref=CREATOR_IDENTITY,
x_mitre_version=TMFK_VERSION,
x_mitre_modified_by_ref=CREATOR_IDENTITY,
x_mitre_attack_spec_version="2.1.0",
x_mitre_domains=[get_tmfk_domain(mode=mode)],
)
created=relationship_dt.created,
modified=relationship_dt.modified,
),
)

for folder in folders:
Expand All @@ -80,18 +95,27 @@ def parse_tmfk(mode: ModeEnumAttribute) -> None:

for idx in ids:
for t in ids[idx]:
technique = techniques[t]

relationship_dt = parse_relationship_created_modified_fields(
repo_path=TMFK_PATH,
file_path=file_path,
technique=technique,
)
objects.append(
Relationship(
source_ref=idx,
description=mitigations[idx].description.split(".")[0],
relationship_type="mitigates",
target_ref=techniques[t].id,
target_ref=technique,
created_by_ref=CREATOR_IDENTITY,
x_mitre_version=TMFK_VERSION,
x_mitre_modified_by_ref=CREATOR_IDENTITY,
x_mitre_attack_spec_version="2.1.0",
x_mitre_domains=[get_tmfk_domain(mode=mode)],
)
created=relationship_dt.created,
modified=relationship_dt.modified,
),
)

matrix = Matrix(
Expand All @@ -104,7 +128,7 @@ def parse_tmfk(mode: ModeEnumAttribute) -> None:
"external_id": "tmfk",
"source_name": get_tmfk_source(mode=mode),
"url": "https://microsoft.github.io/Threat-Matrix-for-Kubernetes",
}
},
],
name="Threat Matrix for Kubernetes",
description="The purpose of the threat matrix for Kubernetes is to conceptualize the known tactics, techniques, and procedures (TTP) that adversaries may use against Kubernetes environments. Inspired from MITRE ATT&CK, the threat matrix for Kubernetes is designed to give quick insight into a potential TTP that an adversary may be using in their attack campaign. The threat matrix for Kubernetes contains also mitigations specific to Kubernetes environments and attack techniques.",
Expand All @@ -131,23 +155,18 @@ def parse_tmfk(mode: ModeEnumAttribute) -> None:
x_mitre_version=TMFK_VERSION,
created_by_ref=CREATOR_IDENTITY,
x_mitre_contents=[
ObjectRef(object_ref=obj.id, object_modified=obj.modified)
for obj in objects
ObjectRef(object_ref=obj.id, object_modified=obj.modified) for obj in objects
],
)

bundle = Bundle(collection, objects, allow_custom=True)
commit_hash = get_last_commit_hash(TMFK_PATH)
output_file_last = (
Path(__file__).parent.parent / "build" / f"tmfk_{mode.name.lower()}.json"
)
output_file_last = Path(__file__).parent.parent / "build" / f"tmfk_{mode.name.lower()}.json"
with open(output_file_last, "w", encoding="utf-8") as f:
f.write(bundle.serialize(pretty=True))

output_file_versioned = (
Path(__file__).parent.parent
/ "build"
/ f"tmfk_{mode.name.lower()}_{commit_hash}.json"
Path(__file__).parent.parent / "build" / f"tmfk_{mode.name.lower()}_{commit_hash}.json"
)
with open(output_file_versioned, "w", encoding="utf-8") as f:
f.write(bundle.serialize(pretty=True))
Expand Down
Loading