Skip to content

Commit

Permalink
Add faster recurse method to filesystem (#749)
Browse files Browse the repository at this point in the history
Co-authored-by: Erik Schamper <[email protected]>
  • Loading branch information
JSCU-CNI and Schamper authored Aug 22, 2024
1 parent 6894023 commit d5c7315
Show file tree
Hide file tree
Showing 6 changed files with 198 additions and 141 deletions.
58 changes: 36 additions & 22 deletions dissect/target/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,10 +228,9 @@ def walk(
topdown: bool = True,
onerror: Optional[Callable] = None,
followlinks: bool = False,
) -> Iterator[str]:
"""Walk a directory pointed to by ``path``, returning the string representation of both files and directories.
It walks across all the files inside ``path`` recursively.
) -> Iterator[tuple[str, list[str], list[str]]]:
"""Recursively walk a directory pointed to by ``path``, returning the string representation of both files
and directories.
Args:
path: The path to walk on the filesystem.
Expand All @@ -250,10 +249,9 @@ def walk_ext(
topdown: bool = True,
onerror: Optional[Callable] = None,
followlinks: bool = False,
) -> Iterator[FilesystemEntry]:
"""Walk a directory pointed to by ``path``, returning FilesystemEntry's of both files and directories.
It walks across all the files inside ``path`` recursively.
) -> Iterator[tuple[list[FilesystemEntry], list[FilesystemEntry], list[FilesystemEntry]]]:
"""Recursively walk a directory pointed to by ``path``, returning :class:`FilesystemEntry` of files
and directories.
Args:
path: The path to walk on the filesystem.
Expand All @@ -266,6 +264,19 @@ def walk_ext(
"""
return self.get(path).walk_ext(topdown, onerror, followlinks)

def recurse(self, path: str) -> Iterator[FilesystemEntry]:
"""Recursively walk a directory and yield contents as :class:`FilesystemEntry`.
Does not follow symbolic links.
Args:
path: The path to recursively walk on the target filesystem.
Returns:
An iterator of :class:`FilesystemEntry`.
"""
return self.get(path).recurse()

def glob(self, pattern: str) -> Iterator[str]:
"""Iterate over the directory part of ``pattern``, returning entries matching ``pattern`` as strings.
Expand Down Expand Up @@ -578,10 +589,9 @@ def walk(
topdown: bool = True,
onerror: Optional[Callable] = None,
followlinks: bool = False,
) -> Iterator[str]:
"""Walk a directory and list its contents as strings.
It walks across all the files inside the entry recursively.
) -> Iterator[tuple[str, list[str], list[str]]]:
"""Recursively walk a directory and yield its contents as strings split in a tuple
of lists of files, directories and symlinks.
These contents include::
- files
Expand All @@ -603,26 +613,30 @@ def walk_ext(
topdown: bool = True,
onerror: Optional[Callable] = None,
followlinks: bool = False,
) -> Iterator[FilesystemEntry]:
"""Walk a directory and show its contents as :class:`FilesystemEntry`.
It walks across all the files inside the entry recursively.
These contents include::
- files
- directories
- symboliclinks
) -> Iterator[tuple[list[FilesystemEntry], list[FilesystemEntry], list[FilesystemEntry]]]:
"""Recursively walk a directory and yield its contents as :class:`FilesystemEntry` split in a tuple of
lists of files, directories and symlinks.
Args:
topdown: ``True`` puts this entry at the top of the list, ``False`` puts this entry at the bottom.
onerror: A method to execute when an error occurs.
followlinks: ``True`` if we want to follow any symbolic link
Returns:
An iterator of :class:`FilesystemEntry`.
An iterator of tuples :class:`FilesystemEntry`.
"""
yield from fsutil.walk_ext(self, topdown, onerror, followlinks)

def recurse(self) -> Iterator[FilesystemEntry]:
"""Recursively walk a directory and yield its contents as :class:`FilesystemEntry`.
Does not follow symbolic links.
Returns:
An iterator of :class:`FilesystemEntry`.
"""
yield from fsutil.recurse(self)

def glob(self, pattern: str) -> Iterator[str]:
"""Iterate over this directory part of ``patern``, returning entries matching ``pattern`` as strings.
Expand Down
15 changes: 15 additions & 0 deletions dissect/target/helpers/fsutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
"TargetPath",
"walk_ext",
"walk",
"recurse",
]


Expand Down Expand Up @@ -291,6 +292,20 @@ def walk_ext(path_entry, topdown=True, onerror=None, followlinks=False):
yield [path_entry], dirs, files


def recurse(path_entry: filesystem.FilesystemEntry) -> Iterator[filesystem.FilesystemEntry]:
"""Recursively walk the given :class:`FilesystemEntry`, yields :class:`FilesystemEntry` instances."""
yield path_entry

if not path_entry.is_dir():
return

for child_entry in path_entry.scandir():
if child_entry.is_dir() and not child_entry.is_symlink():
yield from recurse(child_entry)
else:
yield child_entry


def glob_split(pattern: str, alt_separator: str = "") -> tuple[str, str]:
"""Split a pattern on path part boundaries on the first path part with a glob pattern.
Expand Down
189 changes: 102 additions & 87 deletions dissect/target/plugins/filesystem/unix/capability.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
import struct
from enum import IntEnum
from io import BytesIO
from typing import Iterator

from dissect.target.exceptions import FileNotFoundError, UnsupportedPluginError
from dissect.target.exceptions import UnsupportedPluginError
from dissect.target.helpers.record import TargetRecordDescriptor
from dissect.target.plugin import Plugin, export
from dissect.target.plugins.filesystem.walkfs import generate_record

CapabilityRecord = TargetRecordDescriptor(
"filesystem/unix/capability",
[
("record", "record"),
("datetime", "ts_mtime"),
("path", "path"),
("string[]", "permitted"),
("string[]", "inheritable"),
("boolean", "effective"),
("uint32", "rootid"),
("uint32", "root_id"),
],
)

Expand Down Expand Up @@ -82,88 +82,103 @@ class CapabilityPlugin(Plugin):
"""Plugin to yield files with capabilites set."""

def check_compatible(self) -> None:
if not self.target.has_function("walkfs") or self.target.os == "windows":
raise UnsupportedPluginError("Unsupported plugin")
if not self.target.has_function("walkfs"):
raise UnsupportedPluginError("Need walkfs plugin")

if not any(fs.__type__ in ("extfs", "xfs") for fs in self.target.filesystems):
raise UnsupportedPluginError("Capability plugin only works on EXT and XFS filesystems")

@export(record=CapabilityRecord)
def capability_binaries(self):
"""Find all files that have capabilities set."""
for path_entries, _, files in self.target.fs.walk_ext("/"):
entries = [path_entries[-1]] + files
for entry in entries:
path = self.target.fs.path(entry.path)
try:
record = generate_record(self.target, path)
except FileNotFoundError:
continue
def capability_binaries(self) -> Iterator[CapabilityRecord]:
"""Find all files that have capabilities set on files.
Resources:
- https://github.com/torvalds/linux/blob/master/include/uapi/linux/capability.h
"""

for entry in self.target.fs.recurse("/"):
if not entry.is_file() or entry.is_symlink():
continue

try:
attrs = [attr for attr in entry.lattr() if attr.name == "security.capability"]
except Exception as e:
self.target.log.warning("Failed to get attrs for entry %s", entry)
self.target.log.debug("", exc_info=e)
continue

for attr in attrs:
try:
attrs = path.get().lattr()
except TypeError:
# Virtual(File|Directory|Symlink) instances don't have a functional lattr()
continue
except Exception:
self.target.log.exception("Failed to get attrs for entry %s", entry)
continue

for attr in attrs:
if attr.name != "security.capability":
continue

buf = BytesIO(attr.value)

# Reference: https://github.com/torvalds/linux/blob/master/include/uapi/linux/capability.h
# The struct is small enough we can just use struct
magic_etc = struct.unpack("<I", buf.read(4))[0]
cap_revision = magic_etc & VFS_CAP_REVISION_MASK

permitted_caps = []
inheritable_caps = []
rootid = None

if cap_revision == VFS_CAP_REVISION_1:
num_caps = VFS_CAP_U32_1
data_len = (1 + 2 * VFS_CAP_U32_1) * 4
elif cap_revision == VFS_CAP_REVISION_2:
num_caps = VFS_CAP_U32_2
data_len = (1 + 2 * VFS_CAP_U32_2) * 4
elif cap_revision == VFS_CAP_REVISION_3:
num_caps = VFS_CAP_U32_3
data_len = (2 + 2 * VFS_CAP_U32_2) * 4
else:
self.target.log.error("Unexpected capability revision: %s", entry)
continue

if data_len != len(attr.value):
self.target.log.error("Unexpected capability length: %s", entry)
continue

for _ in range(num_caps):
permitted_val, inheritable_val = struct.unpack("<2I", buf.read(8))
permitted_caps.append(permitted_val)
inheritable_caps.append(inheritable_val)

if cap_revision == VFS_CAP_REVISION_3:
rootid = struct.unpack("<I", buf.read(4))[0]

permitted = []
inheritable = []

for capability in Capabilities:
for caps, results in [(permitted_caps, permitted), (inheritable_caps, inheritable)]:
# CAP_TO_INDEX
cap_index = capability.value >> 5
if cap_index >= len(caps):
# We loop over all capabilities, but might only have a version 1 caps list
continue

if caps[cap_index] & (1 << (capability.value & 31)) != 0:
results.append(capability.name)

yield CapabilityRecord(
record=record,
permitted=permitted,
inheritable=inheritable,
effective=magic_etc & VFS_CAP_FLAGS_EFFECTIVE != 0,
rootid=rootid,
_target=self.target,
)
permitted, inheritable, effective, root_id = parse_attr(attr.value)
except ValueError as e:
self.target.log.warning("Could not parse attributes for entry %s: %s", entry, str(e.value))
self.target.log.debug("", exc_info=e)

yield CapabilityRecord(
ts_mtime=entry.lstat().st_mtime,
path=self.target.fs.path(entry.path),
permitted=permitted,
inheritable=inheritable,
effective=effective,
root_id=root_id,
_target=self.target,
)


def parse_attr(attr: bytes) -> tuple[list[str], list[str], bool, int]:
"""Efficiently parse a Linux xattr capability struct.
Returns:
A tuple of permitted capability names, inheritable capability names, effective flag and ``root_id``.
"""
buf = BytesIO(attr)

# The struct is small enough we can just use int.from_bytes
magic_etc = int.from_bytes(buf.read(4), "little")
effective = magic_etc & VFS_CAP_FLAGS_EFFECTIVE != 0
cap_revision = magic_etc & VFS_CAP_REVISION_MASK

permitted_caps = []
inheritable_caps = []
root_id = None

if cap_revision == VFS_CAP_REVISION_1:
num_caps = VFS_CAP_U32_1
data_len = (1 + 2 * VFS_CAP_U32_1) * 4

elif cap_revision == VFS_CAP_REVISION_2:
num_caps = VFS_CAP_U32_2
data_len = (1 + 2 * VFS_CAP_U32_2) * 4

elif cap_revision == VFS_CAP_REVISION_3:
num_caps = VFS_CAP_U32_3
data_len = (2 + 2 * VFS_CAP_U32_2) * 4

else:
raise ValueError("Unexpected capability revision '%s'" % cap_revision)

if data_len != (actual_len := len(attr)):
raise ValueError("Unexpected capability length (%s vs %s)", data_len, actual_len)

for _ in range(num_caps):
permitted_caps.append(int.from_bytes(buf.read(4), "little"))
inheritable_caps.append(int.from_bytes(buf.read(4), "little"))

if cap_revision == VFS_CAP_REVISION_3:
root_id = int.from_bytes(buf.read(4), "little")

permitted = []
inheritable = []

for capability in Capabilities:
for caps, results in [(permitted_caps, permitted), (inheritable_caps, inheritable)]:
# CAP_TO_INDEX
cap_index = capability.value >> 5
if cap_index >= len(caps):
# We loop over all capabilities, but might only have a version 1 caps list
continue

if caps[cap_index] & (1 << (capability.value & 31)) != 0:
results.append(capability.name)

return permitted, inheritable, effective, root_id
Loading

0 comments on commit d5c7315

Please sign in to comment.