Skip to content

Commit

Permalink
minor general fixes, adapted and fixed pytest, updated CHANGES_RELEVA…
Browse files Browse the repository at this point in the history
…NT_FILE rule relevance to 8
  • Loading branch information
sacca97 authored and copernico committed Oct 7, 2022
1 parent 7779249 commit ccb8022
Show file tree
Hide file tree
Showing 10 changed files with 61 additions and 59 deletions.
8 changes: 6 additions & 2 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ name: Python

on:
push:
branches: [fix-pytest]
branches:
- main
- test-fix-nlp
pull_request:
branches: [fix-pytest]
branches:
- main
- text-fix-nlp

jobs:
build:
Expand Down
16 changes: 6 additions & 10 deletions prospector/client/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,10 +230,8 @@ def main(argv): # noqa: C901

vulnerability_id = args.vulnerability_id
repository_url = args.repository

vuln_descr = args.descr

filter_extensions = "*." + args.filter_extensions
filter_extensions = args.filter_extensions

# if no backend the filters on the advisory do not work
use_nvd = False
Expand All @@ -255,18 +253,16 @@ def main(argv): # noqa: C901
max_candidates = args.max_candidates
modified_files = args.modified_files.split(",") if args.modified_files else []
advisory_keywords = (
args.advisory_keywords.split(",")
if args.advisory_keywords is not None
else []
args.advisory_keywords.split(",") if args.advisory_keywords else []
)

publication_date = ""
if args.pub_date != "":
publication_date = args.pub_date + "T00:00Z"
# if the date is forced manually, the time interval can
# be restricted
# time_limit_before = int(time_limit_before / 5)
# time_limit_after = int(time_limit_after / 2)
# if the date is forced manually, the time interval can
# be restricted
# time_limit_before = int(time_limit_before / 5)
# time_limit_after = int(time_limit_after / 2)

git_cache = os.getenv("GIT_CACHE", default=GIT_CACHE)

Expand Down
17 changes: 9 additions & 8 deletions prospector/client/cli/prospector_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,13 +252,13 @@ def save_preprocessed_commits(backend_address, payload):

# TODO: Cleanup many parameters should be recovered from the advisory record object
def get_candidates(
advisory_record,
repository,
tag_interval,
version_interval,
time_limit_before,
time_limit_after,
filter_extensions,
advisory_record: AdvisoryRecord,
repository: Git,
tag_interval: str,
version_interval: str,
time_limit_before: int,
time_limit_after: int,
filter_extensions: str,
) -> List[str]:
with ExecutionTimer(
core_statistics.sub_collection(name="retrieval of commit candidates")
Expand All @@ -277,6 +277,7 @@ def get_candidates(
with ConsoleWriter("Candidate commit retrieval"):
prev_tag = None
following_tag = None

if tag_interval != "":
prev_tag, following_tag = tag_interval.split(":")
elif version_interval != "":
Expand All @@ -289,7 +290,7 @@ def get_candidates(
if advisory_record.published_timestamp:
since = advisory_record.published_timestamp - time_limit_before
until = advisory_record.published_timestamp + time_limit_after

# Here i need to strip the github tags of useless stuff
candidates = repository.get_commits(
since=since,
until=until,
Expand Down
7 changes: 2 additions & 5 deletions prospector/datamodel/advisory.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
LOCAL_NVD_REST_ENDPOINT = "http://localhost:8000/nvd/vulnerabilities/"
NVD_REST_ENDPOINT = "https://services.nvd.nist.gov/rest/json/cves/2.0?cveId="


# TODO: refactor and clean
class AdvisoryRecord(BaseModel):
"""
Expand Down Expand Up @@ -86,9 +87,7 @@ def analyze(

if self.from_nvd:
self.get_advisory(self.vulnerability_id, self.nvd_rest_endpoint)

self.versions = union_of(self.versions, extract_versions(self.description))

self.affected_products = union_of(
self.affected_products, extract_products(self.description)
)
Expand Down Expand Up @@ -216,9 +215,7 @@ def build_advisory_record(
advisory_record.analyze(
use_nvd=use_nvd,
fetch_references=fetch_references,
# relevant_extensions=filter_extensions.split(".")[
# 1
# ], # the *. is added early in the main and is needed multiple times in the git so let's leave it there
relevant_extensions=filter_extensions,
)
_logger.debug(f"{advisory_record.keywords=}")

Expand Down
11 changes: 1 addition & 10 deletions prospector/datamodel/advisory_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,22 +96,13 @@ def test_adv_record_keywords():

def test_build():
record = build_advisory_record(
"CVE-2014-0050", "", "", "", "", True, "", "", "", "*.java"
"CVE-2014-0050", "", "", "", "", True, "", "", "", "java"
)
assert "MultipartStream" in record.paths
assert record.vulnerability_id == "CVE-2014-0050"


@skip(reason="Slow connections make it fail")
def test_filenames_extraction():
cve = {
"CVE-2014-0050": "MultipartStream",
"CVE-2021-22696": "JwtRequestCodeFilter", # Should match JwtRequestCodeFilter
"CVE-2021-27582": "OAuthConfirmationController",
"CVE-2021-29425": "FileNameUtils",
"CVE-2021-30468": "JsonMapObjectReaderWriter",
}

result1 = build_advisory_record(
"CVE-2014-0050", "", "", "", "", True, "", "", "", ""
)
Expand Down
20 changes: 8 additions & 12 deletions prospector/datamodel/nlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ def extract_versions(text: str) -> List[str]:
"""
Extract all versions mentioned in the text
"""
return re.findall(r"[0-9]+\.[0-9]+[0-9a-z.]*", text)
return list(set(re.findall(r"(\d+(?:\.\d+)+)", text))) # Should be more accurate
# return re.findall(r"[0-9]+\.[0-9]+[0-9a-z.]*", text)


def extract_products(text: str) -> List[str]:
Expand All @@ -50,7 +51,7 @@ def extract_affected_filenames(
) -> List[str]:
paths = set()
for word in text.split():
res = word.strip("_,.:;-+!?()]}'\"")
res = word.strip("_,.:;-+!?()]}@'\"")
res = extract_filename_from_path(res)
res = check_file_class_method_names(res, extensions)
if res:
Expand All @@ -59,18 +60,14 @@ def extract_affected_filenames(
return list(paths)


# TODO: enhanche this with extensions
# If looks like a path-to-file try to get the filename.extension or just filename
# TODO: enhanche this
# Now we just try a split by / and then we pass everything to the other checker, it might be done better
def extract_filename_from_path(text: str) -> str:
return text.split("/")[-1]
# Pattern //path//to//file or \\path\\to\\file, extract file
# res = re.search(r"^(?:(?:\/{,2}|\\{,2})([\w\-\.]+))+$", text)
# if res:
# return res.group(1), True
# # Else simply return the text
# return text, False
res = text.split("/")

return res[-1] # , len(res) > 1
# return res.group(1)


def check_file_class_method_names(text: str, relevant_extensions: List[str]) -> str:
Expand Down Expand Up @@ -158,8 +155,7 @@ def extract_jira_references(repository: str, text: str) -> Dict[str, str]:
refs = dict()
for result in re.finditer(r"[A-Z]+-\d+", text):
id = result.group()
url = JIRA_ISSUE_URL + id
content = fetch_url(url, False)
content = fetch_url(JIRA_ISSUE_URL + id, False)
if not content:
return {"": ""}
refs[id] = "".join(
Expand Down
3 changes: 2 additions & 1 deletion prospector/datamodel/test_nlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ def test_extract_special_terms():
)


@pytest.mark.skip(reason="Outdated")
def test_adv_record_path_extraction_no_real_paths():
result = extract_affected_filenames(ADVISORY_TEXT)
result = extract_affected_filenames(ADVISORY_TEXT_1)

assert result == []

Expand Down
20 changes: 18 additions & 2 deletions prospector/git/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def get_commits(
cmd.append("^" + exclude_ancestors_of)

if filter_files:
cmd.append(filter_files)
cmd.append("*." + filter_files)

if find_in_code:
cmd.append('-S"%s"' % find_in_code)
Expand All @@ -261,11 +261,25 @@ def get_commits(
try:
_logger.debug(" ".join(cmd))
out = self._exec.run(cmd, cache=True)
# cmd_test = ["git", "diff", "--name-only", self._id + "^.." + self._id]
# out = self._exec.run(cmd, cache=True)
except Exception:
_logger.error("Git command failed, cannot get commits", exc_info=True)
out = []

out = [l.strip() for l in out]
# res = []
# try:
# for id in out:
# cmd = ["git", "diff", "--name-only", id + "^.." + id]
# o = self._exec.run(cmd, cache=True)
# for f in o:
# if "mod_auth_digest" in f:
# res.append(id)
# except Exception:
# _logger.error("Changed files retrieval failed", exc_info=True)
# res = out

return out

def get_commits_between_two_commit(self, commit_id_from: str, commit_id_to: str):
Expand Down Expand Up @@ -360,6 +374,8 @@ def get_commit_id_for_tag(self, tag):
except subprocess.CalledProcessError as exc:
_logger.error("Git command failed." + str(exc.output), exc_info=True)
sys.exit(1)
# else:
# return commit_id.strip()
if not commit_id:
return None
return commit_id.strip()
Expand Down Expand Up @@ -458,7 +474,7 @@ def get_diff(self, context_size: int = 1, filter_files: str = ""):
self._id + "^.." + self._id,
]
if filter_files:
cmd.append(filter_files)
cmd.append("*." + filter_files)
self._attributes["diff"] = self._exec.run(cmd, cache=True)
except Exception:
_logger.error(
Expand Down
2 changes: 1 addition & 1 deletion prospector/git/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_get_commits_in_time_interval_filter_extension():
repo.clone()

results = repo.get_commits(
since="1615441712", until="1617441712", filter_files="*.xml"
since="1615441712", until="1617441712", filter_files="xml"
)

print("Found %d commits" % len(results))
Expand Down
16 changes: 8 additions & 8 deletions prospector/rules/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def apply_rule_references_jira_issue(candidate: Commit, _) -> str:
return None


def apply_rule_changes_relevant_path(
def apply_rule_changes_relevant_file(
candidate: Commit, advisory_record: AdvisoryRecord
) -> str:
"""
Expand All @@ -154,16 +154,16 @@ def apply_rule_changes_relevant_path(
"""
explanation_template = "This commit touches the following relevant paths: {}"

relevant_paths = set(
relevant_files = set(
[
path
for path in candidate.changed_files
file
for file in candidate.changed_files
for adv_path in advisory_record.paths
if adv_path in path
if adv_path in file
]
)
if len(relevant_paths):
return explanation_template.format(", ".join(relevant_paths))
if len(relevant_files):
return explanation_template.format(", ".join(relevant_files))

return None

Expand Down Expand Up @@ -356,7 +356,7 @@ def apply_rule_small_commit(candidate: Commit, advisory_record: AdvisoryRecord)
"SEC_KEYWORD_IN_COMMIT_MSG": Rule(apply_rule_security_keyword_in_msg, 5),
"GH_ISSUE_IN_COMMIT_MSG": Rule(apply_rule_references_ghissue, 3),
"JIRA_ISSUE_IN_COMMIT_MSG": Rule(apply_rule_references_jira_issue, 3),
"CH_REL_PATH": Rule(apply_rule_changes_relevant_path, 5),
"CHANGES_RELEVANT_FILE": Rule(apply_rule_changes_relevant_file, 8),
"COMMIT_IN_ADV": Rule(apply_rule_commit_mentioned_in_adv, 10),
"COMMIT_IN_REFERENCE": Rule(apply_rule_commit_mentioned_in_reference, 8),
"VULN_IN_LINKED_ISSUE": Rule(apply_rule_vuln_mentioned_in_linked_issue, 8),
Expand Down

0 comments on commit ccb8022

Please sign in to comment.