Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update main logic to recognize CAPE analysis reports and invoke the right extractor #1541

Closed
williballenthin opened this issue Jun 12, 2023 · 1 comment · Fixed by #1644
Closed
Labels
dynamic related to dynamic analysis flavor enhancement New feature or request

Comments

@williballenthin
Copy link
Collaborator

when capa encounters a CAPE dynamic analysis report, it should recognize the file format and enter "dynamic analysis flavor" mode. it should load the appropriate feature extractor and invoke it such that capa can recognize the processes, threads, etc.

this includes adding parallel logic around here:

capa/capa/main.py

Lines 119 to 242 in 2d7e20f

def find_instruction_capabilities(
ruleset: RuleSet, extractor: FeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
) -> Tuple[FeatureSet, MatchResults]:
"""
find matches for the given rules for the given instruction.
returns: tuple containing (features for instruction, match results for instruction)
"""
# all features found for the instruction.
features = collections.defaultdict(set) # type: FeatureSet
for feature, addr in itertools.chain(
extractor.extract_insn_features(f, bb, insn), extractor.extract_global_features()
):
features[feature].add(addr)
# matches found at this instruction.
_, matches = ruleset.match(Scope.INSTRUCTION, features, insn.address)
for rule_name, res in matches.items():
rule = ruleset[rule_name]
for addr, _ in res:
capa.engine.index_rule_matches(features, rule, [addr])
return features, matches
def find_basic_block_capabilities(
ruleset: RuleSet, extractor: FeatureExtractor, f: FunctionHandle, bb: BBHandle
) -> Tuple[FeatureSet, MatchResults, MatchResults]:
"""
find matches for the given rules within the given basic block.
returns: tuple containing (features for basic block, match results for basic block, match results for instructions)
"""
# all features found within this basic block,
# includes features found within instructions.
features = collections.defaultdict(set) # type: FeatureSet
# matches found at the instruction scope.
# might be found at different instructions, thats ok.
insn_matches = collections.defaultdict(list) # type: MatchResults
for insn in extractor.get_instructions(f, bb):
ifeatures, imatches = find_instruction_capabilities(ruleset, extractor, f, bb, insn)
for feature, vas in ifeatures.items():
features[feature].update(vas)
for rule_name, res in imatches.items():
insn_matches[rule_name].extend(res)
for feature, va in itertools.chain(
extractor.extract_basic_block_features(f, bb), extractor.extract_global_features()
):
features[feature].add(va)
# matches found within this basic block.
_, matches = ruleset.match(Scope.BASIC_BLOCK, features, bb.address)
for rule_name, res in matches.items():
rule = ruleset[rule_name]
for va, _ in res:
capa.engine.index_rule_matches(features, rule, [va])
return features, matches, insn_matches
def find_code_capabilities(
ruleset: RuleSet, extractor: FeatureExtractor, fh: FunctionHandle
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
"""
find matches for the given rules within the given function.
returns: tuple containing (match results for function, match results for basic blocks, match results for instructions, number of features)
"""
# all features found within this function,
# includes features found within basic blocks (and instructions).
function_features = collections.defaultdict(set) # type: FeatureSet
# matches found at the basic block scope.
# might be found at different basic blocks, thats ok.
bb_matches = collections.defaultdict(list) # type: MatchResults
# matches found at the instruction scope.
# might be found at different instructions, thats ok.
insn_matches = collections.defaultdict(list) # type: MatchResults
for bb in extractor.get_basic_blocks(fh):
features, bmatches, imatches = find_basic_block_capabilities(ruleset, extractor, fh, bb)
for feature, vas in features.items():
function_features[feature].update(vas)
for rule_name, res in bmatches.items():
bb_matches[rule_name].extend(res)
for rule_name, res in imatches.items():
insn_matches[rule_name].extend(res)
for feature, va in itertools.chain(extractor.extract_function_features(fh), extractor.extract_global_features()):
function_features[feature].add(va)
_, function_matches = ruleset.match(Scope.FUNCTION, function_features, fh.address)
return function_matches, bb_matches, insn_matches, len(function_features)
def find_file_capabilities(ruleset: RuleSet, extractor: FeatureExtractor, function_features: FeatureSet):
file_features = collections.defaultdict(set) # type: FeatureSet
for feature, va in itertools.chain(extractor.extract_file_features(), extractor.extract_global_features()):
# not all file features may have virtual addresses.
# if not, then at least ensure the feature shows up in the index.
# the set of addresses will still be empty.
if va:
file_features[feature].add(va)
else:
if feature not in file_features:
file_features[feature] = set()
logger.debug("analyzed file and extracted %d features", len(file_features))
file_features.update(function_features)
_, matches = ruleset.match(Scope.FILE, file_features, NO_ADDRESS)
return matches, len(file_features)

that walks the dynamic flavor scopes and merges the features appropriately.

@williballenthin williballenthin added enhancement New feature or request dynamic related to dynamic analysis flavor labels Jun 12, 2023
@yelhamer yelhamer linked a pull request Aug 3, 2023 that will close this issue
3 tasks
@yelhamer
Copy link
Collaborator

yelhamer commented Aug 3, 2023

Addressed in #1644

@yelhamer yelhamer closed this as completed Aug 3, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
dynamic related to dynamic analysis flavor enhancement New feature or request
Projects
Status: done
Development

Successfully merging a pull request may close this issue.

2 participants