-
Notifications
You must be signed in to change notification settings - Fork 250
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #94 from ennamarie19/feat/oss-fuzz-integration
OSS Fuzz Integration
- Loading branch information
Showing
4 changed files
with
249 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
name: CIFuzz | ||
on: | ||
push: | ||
branches: | ||
- master | ||
pull_request: | ||
permissions: {} | ||
jobs: | ||
Fuzzing: | ||
runs-on: ubuntu-latest | ||
permissions: | ||
security-events: write | ||
steps: | ||
- name: Build Fuzzers | ||
id: build | ||
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master | ||
with: | ||
oss-fuzz-project-name: 'textdistance' | ||
language: python | ||
- name: Run Fuzzers | ||
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master | ||
with: | ||
oss-fuzz-project-name: 'textdistance' | ||
language: python | ||
fuzz-seconds: 800 | ||
output-sarif: true | ||
- name: Upload Crash | ||
uses: actions/upload-artifact@v3 | ||
if: failure() && steps.build.outcome == 'success' | ||
with: | ||
name: artifacts | ||
path: ./out/artifacts | ||
- name: Upload Sarif | ||
if: always() && steps.build.outcome == 'success' | ||
uses: github/codeql-action/upload-sarif@v2 | ||
with: | ||
# Path to SARIF file relative to the root of the repository | ||
sarif_file: cifuzz-sarif/results.sarif | ||
checkout_path: cifuzz-sarif | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
cd "$SRC"/textdistance | ||
pip3 install . | ||
|
||
# Build fuzzers in $OUT | ||
for fuzzer in $(find fuzzing -name '*_fuzzer.py');do | ||
compile_python_fuzzer "$fuzzer" | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import contextlib | ||
import datetime | ||
import io | ||
import tempfile | ||
from typing import List, TypeVar | ||
|
||
import atheris | ||
|
||
T = TypeVar("T") | ||
|
||
|
||
class EnhancedFuzzedDataProvider(atheris.FuzzedDataProvider): | ||
def ConsumeRandomBytes(self) -> bytes: | ||
return self.ConsumeBytes(self.ConsumeIntInRange(0, self.remaining_bytes())) | ||
|
||
def ConsumeRandomString(self) -> str: | ||
return self.ConsumeUnicodeNoSurrogates( | ||
self.ConsumeIntInRange(0, self.remaining_bytes()) | ||
) | ||
|
||
def ConsumeRemainingString(self) -> str: | ||
return self.ConsumeUnicodeNoSurrogates(self.remaining_bytes()) | ||
|
||
def ConsumeRemainingBytes(self) -> bytes: | ||
return self.ConsumeBytes(self.remaining_bytes()) | ||
|
||
def ConsumeSublist(self, source: List[T]) -> List[T]: | ||
""" | ||
Returns a shuffled sub-list of the given list of len [1, len(source)] | ||
""" | ||
chosen = [elem for elem in source if self.ConsumeBool()] | ||
|
||
# Shuffle | ||
for i in range(len(chosen) - 1, 1, -1): | ||
j = self.ConsumeIntInRange(0, i) | ||
chosen[i], chosen[j] = chosen[j], chosen[i] | ||
|
||
return chosen or [self.PickValueInList(source)] | ||
|
||
def ConsumeDate(self) -> datetime.datetime: | ||
try: | ||
return datetime.datetime.fromtimestamp(self.ConsumeFloat()) | ||
except (OverflowError, OSError, ValueError): | ||
return datetime.datetime(year=1970, month=1, day=1) | ||
|
||
@contextlib.contextmanager | ||
def ConsumeMemoryFile( | ||
self, all_data: bool = False, as_bytes: bool = True | ||
) -> io.BytesIO: | ||
if all_data: | ||
file_data = ( | ||
self.ConsumeRemainingBytes() | ||
if as_bytes | ||
else self.ConsumeRemainingString() | ||
) | ||
else: | ||
file_data = ( | ||
self.ConsumeRandomBytes() if as_bytes else self.ConsumeRandomString() | ||
) | ||
|
||
file = io.BytesIO(file_data) if as_bytes else io.StringIO(file_data) | ||
yield file | ||
file.close() | ||
|
||
@contextlib.contextmanager | ||
def ConsumeTemporaryFile( | ||
self, suffix: str, all_data: bool = False, as_bytes: bool = True | ||
) -> str: | ||
if all_data: | ||
file_data = ( | ||
self.ConsumeRemainingBytes() | ||
if as_bytes | ||
else self.ConsumeRemainingString() | ||
) | ||
else: | ||
file_data = ( | ||
self.ConsumeRandomBytes() if as_bytes else self.ConsumeRandomString() | ||
) | ||
|
||
mode = "w+b" if as_bytes else "w+" | ||
tfile = tempfile.NamedTemporaryFile(mode=mode, suffix=suffix) | ||
tfile.write(file_data) | ||
tfile.seek(0) | ||
tfile.flush() | ||
yield tfile.name | ||
tfile.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
import itertools | ||
import sys | ||
from collections import defaultdict | ||
from typing import List, Dict, Type | ||
|
||
import atheris | ||
|
||
from fuzz_helpers import EnhancedFuzzedDataProvider | ||
from dataclasses import dataclass, field | ||
|
||
with atheris.instrument_imports(): | ||
import textdistance | ||
|
||
|
||
@dataclass | ||
class InitializationConstraints: | ||
""" | ||
Tracks if a given class has the qval and external construction parameters | ||
""" | ||
HAS_QVAL: bool = field(default=True) | ||
HAS_EXTERNAL: bool = field(default=True) | ||
|
||
|
||
@dataclass | ||
class FuzzTarget: | ||
""" | ||
Defines a class and method that is a possible fuzz candidate | ||
""" | ||
algo_cls: type | ||
fuzz_func_name: str | ||
|
||
|
||
ALGORITHMS = [textdistance.Hamming, textdistance.Bag, textdistance.Gotoh, textdistance.MLIPNS, textdistance.Levenshtein, | ||
textdistance.DamerauLevenshtein, textdistance.Jaro, textdistance.JaroWinkler, textdistance.StrCmp95, | ||
textdistance.NeedlemanWunsch, | ||
textdistance.SmithWaterman, textdistance.Jaccard, textdistance.Sorensen, | ||
textdistance.Tversky, textdistance.Overlap, textdistance.Cosine, textdistance.Tanimoto, | ||
textdistance.MongeElkan, | ||
textdistance.LCSSeq, textdistance.LCSStr, textdistance.RatcliffObershelp, textdistance.ArithNCD, | ||
textdistance.RLENCD, | ||
textdistance.BWTRLENCD, textdistance.SqrtNCD, textdistance.BZ2NCD, textdistance.LZMANCD, | ||
textdistance.ZLIBNCD, textdistance.MRA, textdistance.Editex, textdistance.Prefix, textdistance.Length, | ||
textdistance.Identity, | ||
textdistance.Matrix] | ||
|
||
FUZZ_METHODS = ["__call__", "distance", "similarity", "normalized_distance", "normalized_similarity"] | ||
|
||
FUZZ_TARGETS: List[FuzzTarget] = [] | ||
|
||
CONSTRAINT_MEMORY: Dict[Type, InitializationConstraints] = defaultdict(InitializationConstraints) | ||
|
||
|
||
def initialize_fuzz_options(): | ||
""" | ||
Initializes a cross-product of valid fuzzing targets and methods | ||
""" | ||
global FUZZ_TARGETS | ||
|
||
FUZZ_TARGETS = [FuzzTarget(algo, func) for algo, func in itertools.product(ALGORITHMS, FUZZ_METHODS) if | ||
hasattr(algo, func)] | ||
|
||
|
||
def pick_qval(fdp: EnhancedFuzzedDataProvider): | ||
""" | ||
Let atheris pick a qval to use for this current iteration (None, 1, or 2+) | ||
""" | ||
if fdp.ConsumeBool(): | ||
return fdp.ConsumeIntInRange(1, 100) | ||
else: | ||
return None | ||
|
||
|
||
def TestOneInput(data): | ||
fdp = EnhancedFuzzedDataProvider(data) | ||
|
||
# Pick a target | ||
fuzz_target: FuzzTarget = fdp.PickValueInList(FUZZ_TARGETS) | ||
constraints = CONSTRAINT_MEMORY[fuzz_target.algo_cls] | ||
|
||
try: | ||
if constraints.HAS_QVAL and constraints.HAS_EXTERNAL: | ||
algo = fuzz_target.algo_cls(qval=pick_qval(fdp), external=False) | ||
elif constraints.HAS_QVAL: | ||
algo = fuzz_target.algo_cls(qval=pick_qval(fdp)) | ||
elif constraints.HAS_EXTERNAL: | ||
algo = fuzz_target.algo_cls(external=False) | ||
else: | ||
algo = fuzz_target.algo_cls() | ||
except TypeError as e: | ||
# Update our memory on if a given parameter is invalid | ||
if 'qval' in str(e): | ||
constraints.HAS_QVAL = False | ||
elif 'external' in str(e): | ||
constraints.HAS_EXTERNAL = False | ||
return -1 | ||
|
||
try: | ||
getattr(algo, fuzz_target.fuzz_func_name)(fdp.ConsumeRandomString(), fdp.ConsumeRandomString()) | ||
except AttributeError as e: | ||
# Pops too often, just catch and ignore | ||
if 'split' in str(e): | ||
return -1 | ||
except ImportError: | ||
# Remove this algorithm from the list, since we don't have pre-reqs to use it | ||
FUZZ_TARGETS.remove(fuzz_target) | ||
return -1 | ||
|
||
|
||
def main(): | ||
initialize_fuzz_options() | ||
atheris.Setup(sys.argv, TestOneInput) | ||
atheris.Fuzz() | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |