From 3b75c812632a13a69b576ba3bfec78d8665bc1c3 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 20 Jan 2025 22:00:52 +0000 Subject: [PATCH] analysis: Change analysis interface to allow passing in properties (#1993) * analysis: Change analysis interface to allow passing in properties Signed-off-by: Arthur Chan * Fix formatting Signed-off-by: Arthur Chan * Fix formatting Signed-off-by: Arthur Chan * Fix logic Signed-off-by: Arthur Chan * Fix formatting Signed-off-by: Arthur Chan * Fix formatting Signed-off-by: Arthur Chan * Fix formatting Signed-off-by: Arthur Chan * Fix logic to allow spearate CLI for json only analysis Signed-off-by: Arthur Chan * Fix logic with new api in cli Signed-off-by: Arthur Chan * Fix logic and formatting Signed-off-by: Arthur Chan * Fix formatting Signed-off-by: Arthur Chan * Fix formatting Signed-off-by: Arthur Chan * Fix formatting Signed-off-by: Arthur Chan --------- Signed-off-by: Arthur Chan --- src/fuzz_introspector/analyses/__init__.py | 25 +++- .../analyses/source_code_line_analyser.py | 135 ++++++++++++++++++ src/fuzz_introspector/analysis.py | 17 ++- src/fuzz_introspector/cli.py | 48 ++++++- src/fuzz_introspector/commands.py | 71 ++++++++- .../datatypes/function_profile.py | 39 +++++ 6 files changed, 324 insertions(+), 11 deletions(-) create mode 100644 src/fuzz_introspector/analyses/source_code_line_analyser.py diff --git a/src/fuzz_introspector/analyses/__init__.py b/src/fuzz_introspector/analyses/__init__.py index 2c1ded4f4..164164ccc 100644 --- a/src/fuzz_introspector/analyses/__init__.py +++ b/src/fuzz_introspector/analyses/__init__.py @@ -1,3 +1,19 @@ +# Copyright 2025 Fuzz Introspector Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Initialisation of AnalysisInterface instances""" + +from fuzz_introspector import analysis from fuzz_introspector.analyses import bug_digestor from fuzz_introspector.analyses import driver_synthesizer from fuzz_introspector.analyses import engine_input @@ -8,10 +24,11 @@ from fuzz_introspector.analyses import runtime_coverage_analysis from fuzz_introspector.analyses import sinks_analyser from fuzz_introspector.analyses import annotated_cfg +from fuzz_introspector.analyses import source_code_line_analyser # All optional analyses. # Ordering here is important as top analysis will be shown first in the report -all_analyses = [ +all_analyses: list[type[analysis.AnalysisInterface]] = [ optimal_targets.OptimalTargets, engine_input.EngineInput, runtime_coverage_analysis.RuntimeCoverageAnalysis, @@ -23,3 +40,9 @@ sinks_analyser.SinkCoverageAnalyser, annotated_cfg.FuzzAnnotatedCFG, ] + +# This is the list of analyses that are meant to run +# directly from CLI without the need to generate HTML reports +standalone_analyses: list[type[analysis.AnalysisInterface]] = [ + source_code_line_analyser.SourceCodeLineAnalyser, +] diff --git a/src/fuzz_introspector/analyses/source_code_line_analyser.py b/src/fuzz_introspector/analyses/source_code_line_analyser.py new file mode 100644 index 000000000..8274d3f95 --- /dev/null +++ b/src/fuzz_introspector/analyses/source_code_line_analyser.py @@ -0,0 +1,135 @@ +# Copyright 2025 Fuzz Introspector Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Analysis plugin for introspection of the function on target line in +target source file.""" + +import os +import json +import logging + +from typing import (Any, List, Dict) + +from fuzz_introspector import (analysis, html_helpers) + +from fuzz_introspector.datatypes import (project_profile, fuzzer_profile, + function_profile) + +logger = logging.getLogger(name=__name__) + + +class SourceCodeLineAnalyser(analysis.AnalysisInterface): + """Locate for the function in given line of given source file.""" + + name: str = 'SourceCodeLineAnalyser' + + def __init__(self): + self.json_results: Dict[str, Any] = {} + self.json_string_result = '' + + @classmethod + def get_name(cls): + """Return the analyser identifying name for processing. + + :return: The identifying name of this analyser + :rtype: str + """ + return cls.name + + def get_json_string_result(self) -> str: + """Return the stored json string result. + + :return: The json string result processed and stored + by this analyser + :rtype: str + """ + if self.json_string_result: + return self.json_string_result + return json.dumps(self.json_results) + + def set_json_string_result(self, string): + """Store the result of this analyser as json string result + for further processing in a later time. + + :param json_string: A json string variable storing the + processing result of the analyser for future use + :type json_string: str + """ + self.json_string_result = string + + def set_source_file_line(self, source_file: str, source_line: int): + """Configure the source file and source line for this analyser.""" + self.source_file = source_file + self.source_line = source_line + + def analysis_func(self, + table_of_contents: html_helpers.HtmlTableOfContents, + tables: List[str], + proj_profile: project_profile.MergedProjectProfile, + profiles: List[fuzzer_profile.FuzzerProfile], + basefolder: str, coverage_url: str, + conclusions: List[html_helpers.HTMLConclusion], + out_dir: str) -> str: + logger.info(' - Running analysis %s', self.get_name()) + + if not self.source_file or self.source_line <= 0: + logger.error('No valid source code or target line are provided') + return '' + + # Get all functions from the profiles + all_functions = list(proj_profile.all_functions.values()) + all_functions.extend(proj_profile.all_constructors.values()) + + # Generate SourceFile to Function Profile map and store in JSON Result + func_file_map: dict[str, list[function_profile.FunctionProfile]] = {} + for function in all_functions: + func_list = func_file_map.get(function.function_source_file, []) + func_list.append(function) + func_file_map[function.function_source_file] = func_list + + if os.sep in self.source_file: + # File path + target_func_list = func_file_map.get(self.source_file, []) + else: + # File name + target_func_list = [] + for key, value in func_file_map.items(): + if os.path.basename(key) == self.source_file: + target_func_list.extend(value) + + if not target_func_list: + logger.error( + 'Failed to locate the target source file %s from the project.', + self.source_file) + + result_list = [] + for func in target_func_list: + start = func.function_linenumber + end = func.function_line_number_end + if start <= self.source_line <= end: + logger.info('Found function %s from line %d in %s', + func.function_name, self.source_line, + self.source_file) + result_list.append(func.to_dict()) + + if result_list: + self.json_results['functions'] = result_list + result_json_path = os.path.join(out_dir, 'functions.json') + logger.info('Dumping result to %s', result_json_path) + with open(result_json_path, 'w') as f: + json.dump(self.json_results, f) + else: + logger.info('No functions found from line %d in %s', + self.source_line, self.source_file) + + return '' diff --git a/src/fuzz_introspector/analysis.py b/src/fuzz_introspector/analysis.py index 0d3c3aaa5..54f627878 100644 --- a/src/fuzz_introspector/analysis.py +++ b/src/fuzz_introspector/analysis.py @@ -19,12 +19,7 @@ import os import shutil -from typing import ( - Dict, - List, - Type, - Set, -) +from typing import (Dict, List, Type, Set, Union) from fuzz_introspector import (cfg_load, code_coverage, constants, data_loader, debug_info, html_helpers, json_report, utils) @@ -173,6 +168,11 @@ class AnalysisInterface(abc.ABC): json_string_result: str = "" display_html: bool = False + def set_additional_properties(self, properties: dict[str, Union[str, + int]]): + """Allow setting additional properties for this analysis.""" + self.properties = properties + @abc.abstractmethod def analysis_func(self, table_of_contents: html_helpers.HtmlTableOfContents, @@ -261,6 +261,11 @@ def get_all_analyses() -> List[Type[AnalysisInterface]]: return analyses.all_analyses +def get_all_standalone_analyses() -> List[Type[AnalysisInterface]]: + from fuzz_introspector import analyses + return analyses.standalone_analyses + + def callstack_get_parent(n: cfg_load.CalltreeCallsite, c: Dict[int, str]) -> str: return c[int(n.depth) - 1] diff --git a/src/fuzz_introspector/cli.py b/src/fuzz_introspector/cli.py index efdf9322c..7d2f3cdeb 100644 --- a/src/fuzz_introspector/cli.py +++ b/src/fuzz_introspector/cli.py @@ -88,7 +88,11 @@ def get_cmdline_parser() -> argparse.ArgumentParser: ], help=""" Analyses to run. Available options: - OptimalTargets, FuzzEngineInput, ThirdPartyAPICoverageAnalyser + AnnotatedCFG, BugDigestorAnalysis, FuzzCalltreeAnalysis, + FuzzDriverSynthesizerAnalysis, FuzzEngineInputAnalysis, + FilePathAnalyser, ThirdPartyAPICoverageAnalyser, + MetadataAnalysis, OptimalTargets, RuntimeCoverageAnalysis, + SinkCoverageAnalyser """) report_parser.add_argument("--enable-all-analyses", action='store_true', @@ -134,6 +138,46 @@ def get_cmdline_parser() -> argparse.ArgumentParser: required=True, help='Path to the second report') + # Standalone analyser + analyse_parser = subparsers.add_parser( + 'analyse', + help='Standlone analyser commands to run on the target project.') + + analyser_parser = analyse_parser.add_subparsers( + dest='analyser', + required=True, + help='Available analyser: SourceCodeLineAnalyser') + + source_code_line_analyser_parser = analyser_parser.add_parser( + 'SourceCodeLineAnalyser', + help=('Provide information in out-dir/function.json for the function' + ' found in the given target file and line number')) + source_code_line_analyser_parser.add_argument( + '--source-file', + default='', + type=str, + help='Target file path or name for SourceCodeLineAnalyser') + source_code_line_analyser_parser.add_argument( + '--source-line', + default=-1, + type=int, + help='Target line for SourceCodeLineAnalyser') + source_code_line_analyser_parser.add_argument( + '--target-dir', + type=str, + help='Directory holding source to analyse.', + required=True) + source_code_line_analyser_parser.add_argument( + '--language', + type=str, + help='Programming of the source code to analyse.', + choices=constants.LANGUAGES_SUPPORTED) + source_code_line_analyser_parser.add_argument( + '--out-dir', + default='', + type=str, + help='Folder to store analysis results.') + return parser @@ -176,6 +220,8 @@ def main() -> int: return_code = commands.light_analysis(args) elif args.command == 'full': return_code = commands.end_to_end(args) + elif args.command == 'analyse': + return_code = commands.analyse(args) else: return_code = constants.APP_EXIT_ERROR logger.info("Ending fuzz introspector post-processing") diff --git a/src/fuzz_introspector/commands.py b/src/fuzz_introspector/commands.py index 6f1f4b432..db0dc5ee8 100644 --- a/src/fuzz_introspector/commands.py +++ b/src/fuzz_introspector/commands.py @@ -18,11 +18,12 @@ import json import yaml import shutil -from typing import List, Optional +from typing import Optional from fuzz_introspector import analysis from fuzz_introspector import constants from fuzz_introspector import diff_report +from fuzz_introspector import html_helpers from fuzz_introspector import html_report from fuzz_introspector import utils @@ -54,6 +55,9 @@ def end_to_end(args) -> int: else: out_dir = os.getcwd() + if not os.path.exists(out_dir): + os.mkdir(out_dir) + if args.language == constants.LANGUAGES.JAVA: entrypoint = 'fuzzerTestOneInput' else: @@ -81,12 +85,12 @@ def end_to_end(args) -> int: def run_analysis_on_dir(target_folder: str, coverage_url: str, - analyses_to_run: List[str], + analyses_to_run: list[str], correlation_file: str, enable_all_analyses: bool, report_name: str, language: str, - output_json: Optional[List[str]] = None, + output_json: Optional[list[str]] = None, parallelise: bool = True, dump_files: bool = True, out_dir: str = '') -> int: @@ -150,3 +154,64 @@ def light_analysis(args) -> int: f.write(json.dumps(list(all_source_files))) return 0 + + +def analyse(args) -> int: + """Perform a light analysis using the chosen Analyser and return + json results.""" + # Retrieve the correct analyser + target_analyser = None + for analyser in analysis.get_all_standalone_analyses(): + if analyser.get_name() == args.analyser: + target_analyser = analysis.instantiate_analysis_interface(analyser) + break + + # Return error if analyser not found + if not target_analyser: + logger.error('Analyser %s not found.', args.analyser) + return constants.APP_EXIT_ERROR + + # Auto detect project language is not provided + if not args.language: + args.language = utils.detect_language(args.target_dir) + + # Prepare out directory + if args.out_dir: + out_dir = args.out_dir + else: + out_dir = os.getcwd() + + if not os.path.exists(out_dir): + os.mkdir(out_dir) + + # Fix entrypoint default for languages + if args.language == constants.LANGUAGES.JAVA: + entrypoint = 'fuzzerTestOneInput' + else: + entrypoint = 'LLVMFuzzerTestOneInput' + + # Run the frontend + oss_fuzz.analyse_folder(language=args.language, + directory=args.target_dir, + entrypoint=entrypoint, + out=out_dir) + + # Perform the FI backend project analysis from the frontend + introspection_proj = analysis.IntrospectionProject(args.language, out_dir, + '') + introspection_proj.load_data_files(True, '', out_dir) + + # Perform the chosen standalone analysis + if target_analyser.get_name() == 'SourceCodeLineAnalyser': + source_file = args.source_file + source_line = args.source_line + + target_analyser.set_source_file_line(source_file, source_line) + target_analyser.analysis_func(html_helpers.HtmlTableOfContents(), [], + introspection_proj.proj_profile, + introspection_proj.profiles, '', '', [], + out_dir) + + # TODO Add more analyser for standalone run + + return constants.APP_EXIT_SUCCESS diff --git a/src/fuzz_introspector/datatypes/function_profile.py b/src/fuzz_introspector/datatypes/function_profile.py index a882068d5..feca0f710 100644 --- a/src/fuzz_introspector/datatypes/function_profile.py +++ b/src/fuzz_introspector/datatypes/function_profile.py @@ -98,6 +98,45 @@ def __init__(self, elem: Dict[Any, Any]) -> None: self.new_unreached_complexity: int = 0 self.total_cyclomatic_complexity: int = 0 + def to_dict(self) -> Dict[str, Any]: + return { + "function_name": self.function_name, + "raw_function_name": self.raw_function_name, + "function_source_file": self.function_source_file, + "linkage_type": self.linkage_type, + "function_linenumber": self.function_linenumber, + "function_line_number_end": self.function_line_number_end, + "return_type": self.return_type, + "arg_count": self.arg_count, + "arg_types": self.arg_types, + "arg_names": self.arg_names, + "bb_count": self.bb_count, + "i_count": self.i_count, + "edge_count": self.edge_count, + "cyclomatic_complexity": self.cyclomatic_complexity, + "functions_reached": self.functions_reached, + "function_uses": self.function_uses, + "function_depth": self.function_depth, + "constants_touched": self.constants_touched, + "branch_profiles": + {k: str(v) + for k, v in self.branch_profiles.items()}, + "signature": self.signature, + "functions_called": self.functions_called, + "is_accessible": self.is_accessible, + "is_jvm_library": self.is_jvm_library, + "is_enum": self.is_enum, + "is_static": self.is_static, + "exceptions": self.exceptions, + "need_close": self.need_close, + "callsite": self.callsite, + "hitcount": self.hitcount, + "reached_by_fuzzers": self.reached_by_fuzzers, + "incoming_references": self.incoming_references, + "new_unreached_complexity": self.new_unreached_complexity, + "total_cyclomatic_complexity": self.total_cyclomatic_complexity + } + @property def has_source_file(self) -> bool: return len(self.function_source_file.strip()) > 0