Skip to content

Commit

Permalink
Merge branch 'ossf:main' into fix-api-for-analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
arthurscchan authored Jan 20, 2025
2 parents 8afcd97 + 8cf9aef commit 34ae096
Show file tree
Hide file tree
Showing 7 changed files with 211 additions and 322 deletions.
77 changes: 71 additions & 6 deletions src/fuzz_introspector/frontends/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,78 @@
#
################################################################################

from typing import Any, Optional
from typing import Any, Optional, Generic, TypeVar

from tree_sitter import Language, Parser
import tree_sitter_c
import tree_sitter_cpp
import tree_sitter_go
import tree_sitter_java
import tree_sitter_rust

class Project():
import logging

logger = logging.getLogger(name=__name__)

T = TypeVar('T', bound='SourceCodeFile')


class SourceCodeFile():
"""Class for holding file-specific information."""
LANGUAGE: dict[str, Language] = {
'c': Language(tree_sitter_c.language()),
'cpp': Language(tree_sitter_cpp.language()),
'c++': Language(tree_sitter_cpp.language()),
'go': Language(tree_sitter_go.language()),
'jvm': Language(tree_sitter_java.language()),
'rust': Language(tree_sitter_rust.language()),
}

def __init__(self,
language: str,
source_file: str,
entrypoint: str = '',
source_content: Optional[bytes] = None):
logger.info('Processing %s' % source_file)

self.root = None
self.source_file = source_file
self.language = language
self.entrypoint = entrypoint
self.tree_sitter_lang = self.LANGUAGE.get(language)
self.parser = Parser(self.tree_sitter_lang)

if source_content:
self.source_content = source_content
else:
with open(self.source_file, 'rb') as f:
self.source_content = f.read()

# Initialization ruotines
self.load_tree()

# Language specific process
self.language_specific_process()

def load_tree(self):
"""Load the the source code into a treesitter tree, and set
the root node."""
if not self.root:
self.root = self.parser.parse(self.source_content).root_node

def language_specific_process(self):
"""Dummy function to perform some specific processes in subclasses."""
pass

def has_libfuzzer_harness(self) -> bool:
"""Dummy function for source code files."""
return False


class Project(Generic[T]):
"""Wrapper for doing analysis of a collection of source files."""

def __init__(self, source_code_files: list[Any]):
def __init__(self, source_code_files: list[T]):
self.source_code_files = source_code_files

def dump_module_logic(self,
Expand All @@ -35,7 +100,7 @@ def dump_module_logic(self,

def extract_calltree(self,
source_file: str = '',
source_code: Optional[Any] = None,
source_code: Optional[T] = None,
function: Optional[str] = None,
visited_functions: Optional[set[str]] = None,
depth: int = 0,
Expand All @@ -48,14 +113,14 @@ def extract_calltree(self,
def get_reachable_functions(
self,
source_file: str = '',
source_code: Optional[Any] = None,
source_code: Optional[T] = None,
function: Optional[str] = None,
visited_functions: Optional[set[str]] = None) -> set[str]:
"""Get a list of reachable functions for a provided function name."""
# Dummy function for subclasses
return set()

def get_source_codes_with_harnesses(self) -> list[Any]:
def get_source_codes_with_harnesses(self) -> list[T]:
"""Gets the source codes that holds libfuzzer harnesses."""
harnesses = []
for source_code in self.source_code_files:
Expand Down
69 changes: 25 additions & 44 deletions src/fuzz_introspector/frontends/frontend_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,23 @@
################################################################################
"""Fuzz Introspector Light frontend"""

import os
from typing import Any, Optional

import os
import logging

from tree_sitter import Language, Parser
import tree_sitter_c
import yaml

from typing import Any, Optional, Set

from fuzz_introspector.frontends.datatypes import Project
from fuzz_introspector.frontends.datatypes import Project, SourceCodeFile

logger = logging.getLogger(name=__name__)

tree_sitter_languages = {'c': Language(tree_sitter_c.language())}

language_parsers = {'c': Parser(Language(tree_sitter_c.language()))}


class CProject(Project):
class CProject(Project['CSourceCodeFile']):
"""Wrapper for doing analysis of a collection of source files."""

def __init__(self, source_code_files: list['CSourceCodeFile']):
super().__init__(source_code_files)

def dump_module_logic(self,
report_name,
entry_function: str = '',
Expand Down Expand Up @@ -86,8 +81,8 @@ def dump_module_logic(self,
'functionLinenumberEnd'] = func_def.root.end_point.row
func_dict['linkageType'] = ''
func_dict['func_position'] = {
'start': source_code.root.start_point.row,
'end': source_code.root.end_point.row,
'start': func_def.root.start_point.row,
'end': func_def.root.end_point.row,
}
cc_str = 'CyclomaticComplexity'
func_dict[cc_str] = func_def.get_function_complexity()
Expand Down Expand Up @@ -130,9 +125,12 @@ def get_source_code_with_target(self, target_func_name):
return source_code
return None

def get_source_codes_with_harnesses(self) -> list['CSourceCodeFile']:
return super().get_source_codes_with_harnesses()

def extract_calltree(self,
source_file: str = '',
source_code: Optional[Any] = None,
source_code: Optional['CSourceCodeFile'] = None,
function: Optional[str] = None,
visited_functions: Optional[set[str]] = None,
depth: int = 0,
Expand Down Expand Up @@ -182,9 +180,9 @@ def extract_calltree(self,
def get_reachable_functions(
self,
source_file: str = '',
source_code: Optional[Any] = None,
source_code: Optional['CSourceCodeFile'] = None,
function: Optional[str] = None,
visited_functions: Optional[set[str]] = None) -> Set[str]:
visited_functions: Optional[set[str]] = None) -> set[str]:
"""Gets the reachable frunctions from a given function."""
# Create calltree from a given function
# Find the function in the source code
Expand Down Expand Up @@ -456,28 +454,17 @@ def callsites(self):
return callsites


class SourceCodeFile():
class CSourceCodeFile(SourceCodeFile):
"""Class for holding file-specific information."""

def __init__(self, source_file, language, source_content=""):
self.source_file = source_file
self.language = language
self.parser = language_parsers.get(self.language)
self.tree_sitter_lang = tree_sitter_languages[self.language]

self.root = None
def language_specific_process(self):
"""Perform some language specific processes in subclasses."""
self.function_names = []
self.line_range_pairs = []
self.struct_defs = []
self.typedefs = []
self.includes = set()

if source_content:
self.source_content = source_content
else:
with open(self.source_file, 'rb') as f:
self.source_content = f.read()

# List of function definitions in the source file.
self.func_defs = []

Expand All @@ -488,12 +475,6 @@ def __init__(self, source_file, language, source_content=""):
self._set_function_defintions()
self.extract_types()

def load_tree(self) -> None:
"""Load the the source code into a treesitter tree, and set
the root node."""
if self.language == 'c' and not self.root:
self.root = self.parser.parse(self.source_content).root_node

def extract_types(self):
"""Extracts the types of the source code"""
# Extract all structs
Expand Down Expand Up @@ -640,28 +621,28 @@ def get_linenumber(self, bytepos):


def load_treesitter_trees(source_files: list[str],
is_log: bool = True) -> list[SourceCodeFile]:
is_log: bool = True) -> CProject:
"""Creates treesitter trees for all files in a given list of source files."""
results = []

for code_file in source_files:
if not os.path.isfile(code_file):
continue

source_cls = SourceCodeFile(code_file, 'c')
source_cls = CSourceCodeFile('c', code_file)

if is_log:
if source_cls.has_libfuzzer_harness():
logger.info('harness: %s', code_file)

results.append(source_cls)

return results
return CProject(results)


def analyse_source_code(source_content: str) -> SourceCodeFile:
def analyse_source_code(source_content: str) -> CSourceCodeFile:
"""Returns a source abstraction based on a single source string."""
source_code = SourceCodeFile(source_file='in-memory string',
language='c',
source_content=source_content.encode())
source_code = CSourceCodeFile('c',
source_file='in-memory string',
source_content=source_content.encode())
return source_code
Loading

0 comments on commit 34ae096

Please sign in to comment.