Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

extract dynamic capabilities #1644

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
e3f60ea
initial commit
yelhamer Jul 17, 2023
4af84e5
bugfixes
yelhamer Jul 17, 2023
bc46bf3
add vverbose rendering
yelhamer Jul 18, 2023
e5d7903
add removed tests
yelhamer Jul 18, 2023
4e4b123
mypy.ini: ignore proto issues
yelhamer Jul 18, 2023
c5d08ec
update extractors and tests
yelhamer Jul 19, 2023
7de223f
Update capa/features/extractors/ida/extractor.py: add call to get_inp…
yelhamer Jul 19, 2023
1029b36
Merge remote-tracking branch 'parentrepo/dynamic-feature-extraction' …
yelhamer Jul 20, 2023
8ac9caf
fix bugs
yelhamer Jul 20, 2023
0a4fe58
fix tests
yelhamer Jul 20, 2023
d99b16e
add copyright and remove old test
yelhamer Jul 20, 2023
482e0d3
use pathlib.Path() in binja and ida extractors
yelhamer Jul 20, 2023
fd7b926
Update capa/features/extractors/base_extractor.py
yelhamer Jul 20, 2023
2b2b2b6
Update capa/features/extractors/base_extractor.py
yelhamer Jul 20, 2023
b4cf50f
fix mypy issues
yelhamer Jul 20, 2023
ab092cb
add sample_hashes attribute to the base extractors
yelhamer Jul 20, 2023
6ee1dfd
address review comments: rename SampleHashes's from_sample() method t…
yelhamer Jul 20, 2023
806bc18
Update mypy.ini: add TODO comment
yelhamer Jul 20, 2023
24b3abd
add get_sample_hashes() to base extractor
yelhamer Jul 21, 2023
6d1a885
update static freeze test
yelhamer Jul 21, 2023
b1e468d
add tests for the get_sample_hashes() method
yelhamer Jul 21, 2023
da4e887
fix comment typo
yelhamer Jul 21, 2023
6f3fb42
update compute_dynamic_layout with the appropriate type
yelhamer Jul 21, 2023
bd83316
update compute_static_layout with the appropriate types
yelhamer Jul 21, 2023
736b2cd
address @mr-tz main.py review comments
yelhamer Jul 21, 2023
3ab3c61
use ida's hash-extraction functions
yelhamer Jul 21, 2023
8085cae
remove the usage of SampleHashes's __iter__() method
yelhamer Jul 21, 2023
6741229
migrate the `get_sample_hashes()` function to each individual extractor
yelhamer Jul 21, 2023
ab585ef
add the `skipif` mark back
yelhamer Jul 21, 2023
4ec39d4
fix linting issues
yelhamer Jul 21, 2023
c4ba5af
replace `: FeatureSet` annotations with a comment type annotation
yelhamer Jul 21, 2023
830bad5
fix bugs
yelhamer Jul 21, 2023
3d1a1fb
add get_sample_hashes() to NullFeatureExtractor
yelhamer Jul 21, 2023
90298fe
Update capa/features/extractors/base_extractor.py
yelhamer Jul 21, 2023
d13114e
remove SampleHashes __iter__method
yelhamer Jul 21, 2023
c32ac19
Update capa/features/extractors/ida/extractor.py
yelhamer Jul 21, 2023
344b3e9
Update capa/features/extractors/base_extractor.py
yelhamer Jul 21, 2023
d8c28e8
add get_sample_hashes() to elf extractor
yelhamer Jul 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions capa/features/extractors/viv/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import hashlib
import logging
from typing import Any, Dict, List, Tuple, Iterator

Expand All @@ -19,19 +20,25 @@
import capa.features.extractors.viv.basicblock
from capa.features.common import Feature
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, StaticFeatureExtractor
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, FeatureExtractor

logger = logging.getLogger(__name__)


class VivisectFeatureExtractor(StaticFeatureExtractor):
class VivisectFeatureExtractor(FeatureExtractor):
def __init__(self, vw, path, os):
super().__init__()
self.vw = vw
self.path = path
with open(self.path, "rb") as f:
self.buf = f.read()

self.sample_hashes = (
hashlib.md5().update(self.buf).hexdigest(),
hashlib.sha1().update(self.buf).hexdigest(),
hashlib.sha256().update(self.buf).hexdigest(),
)
yelhamer marked this conversation as resolved.
Show resolved Hide resolved

# pre-compute these because we'll yield them at *every* scope.
self.global_features: List[Tuple[Feature, Address]] = []
self.global_features.extend(capa.features.extractors.viv.file.extract_file_format(self.buf))
Expand All @@ -42,6 +49,9 @@ def get_base_address(self):
# assume there is only one file loaded into the vw
return AbsoluteVirtualAddress(list(self.vw.filemeta.values())[0]["imagebase"])

def get_sample_hashes(self) -> Tuple[str, str, str]:
return self.sample_hashes

def extract_global_features(self):
yield from self.global_features

Expand Down
2 changes: 1 addition & 1 deletion capa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1022,7 +1022,7 @@ def compute_dynamic_layout(rules, extractor, capabilities) -> rdoc.Layout:
matched_threads = set()
for rule_name, matches in capabilities.items():
rule = rules[rule_name]
if capa.rules.BASIC_BLOCK_SCOPE in rule.meta.get("scopes")["dynamic"]:
if capa.rules.THREAD_SCOPE in rule.meta.get("scopes")["dynamic"]:
for addr, _ in matches:
assert addr in processes_by_thread
matched_threads.add(addr)
Expand Down
1 change: 1 addition & 0 deletions capa/render/proto/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def scope_to_pb2(scope: capa.rules.Scope) -> capa_pb2.Scope.ValueType:


def metadata_to_pb2(meta: rd.Metadata) -> capa_pb2.Metadata:
assert isinstance(meta.analysis, rd.StaticAnalysis)
return capa_pb2.Metadata(
timestamp=str(meta.timestamp),
version=meta.version,
Expand Down
44 changes: 34 additions & 10 deletions capa/render/vverbose.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,8 @@ def render_rules(ostream, doc: rd.ResultDocument):
check for OutputDebugString error
namespace anti-analysis/anti-debugging/debugger-detection
author [email protected]
scope function
static scope: function
dynamic scope: process
mbc Anti-Behavioral Analysis::Detect Debugger::OutputDebugString
function @ 0x10004706
and:
Expand All @@ -268,14 +269,24 @@ def render_rules(ostream, doc: rd.ResultDocument):
api: kernel32.OutputDebugString @ 0x10004767, 0x10004787, 0x10004816, 0x10004895
"""

assert isinstance(doc.meta.analysis, rd.StaticAnalysis)
functions_by_bb: Dict[capa.features.address.Address, capa.features.address.Address] = {}
for finfo in doc.meta.analysis.layout.functions:
faddress = finfo.address.to_capa()

for bb in finfo.matched_basic_blocks:
bbaddress = bb.address.to_capa()
functions_by_bb[bbaddress] = faddress
processes_by_thread: Dict[capa.features.address.Address, capa.features.address.Address] = {}
if isinstance(doc.meta.analysis, rd.StaticAnalysis):
for finfo in doc.meta.analysis.layout.functions:
faddress = finfo.address.to_capa()

for bb in finfo.matched_basic_blocks:
bbaddress = bb.address.to_capa()
functions_by_bb[bbaddress] = faddress
elif isinstance(doc.meta.analysis, rd.DynamicAnalysis):
for pinfo in doc.meta.analysis.layout.processes:
paddress = pinfo.address.to_capa()

for thread in pinfo.matched_threads:
taddress = thread.address.to_capa()
processes_by_thread[taddress] = paddress
else:
raise ValueError("invalid analysis field in the document's meta")

had_match = False

Expand Down Expand Up @@ -324,7 +335,11 @@ def render_rules(ostream, doc: rd.ResultDocument):

rows.append(("author", ", ".join(rule.meta.authors)))

rows.append(("scopes", str(rule.meta.scopes)))
if rule.meta.scopes.static:
rows.append(("static scope:", str(rule.meta.scopes.static)))

if rule.meta.scopes.dynamic:
rows.append(("dynamic scope:", str(rule.meta.scopes.dynamic)))

if rule.meta.attack:
rows.append(("att&ck", ", ".join([rutils.format_parts_id(v) for v in rule.meta.attack])))
Expand Down Expand Up @@ -352,7 +367,8 @@ def render_rules(ostream, doc: rd.ResultDocument):
render_match(ostream, first_match, indent=0)
else:
for location, match in sorted(doc.rules[rule.meta.name].matches):
ostream.write(rule.meta.scopes)
ostream.write(f"static scope: {rule.meta.scopes.static}")
ostream.write(f"dynamic scope: {rule.meta.scopes.dynamic}")
ostream.write(" @ ")
ostream.write(capa.render.verbose.format_address(location))

Expand All @@ -362,6 +378,14 @@ def render_rules(ostream, doc: rd.ResultDocument):
+ capa.render.verbose.format_address(frz.Address.from_capa(functions_by_bb[location.to_capa()]))
)

if capa.rules.THREAD_SCOPE in rule.meta.scopes:
ostream.write(
" in process "
+ capa.render.verbose.format_address(
frz.Address.from_capa(processes_by_thread[location.to_capa()])
)
)

ostream.write("\n")
render_match(ostream, match, indent=1)
if rule.meta.lib:
Expand Down
Loading