Skip to content

Commit

Permalink
Allow repology usage as library
Browse files Browse the repository at this point in the history
- Make it possible to call repology-related code from the repology
  package.
- Change vulnxscan and nix_outdated so they call the replogoy-related
  code from the repology package instead of creating a new process
  with exec_cmd to invoke repology_cli or repology_cve.

Signed-off-by: Henri Rosten <[email protected]>
  • Loading branch information
henrirosten committed Nov 30, 2023
1 parent 8865646 commit 80ae584
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 88 deletions.
8 changes: 4 additions & 4 deletions src/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,14 +200,14 @@ def version_distance(v1, v2):
re_vsplit = re.compile(r".*?(?P<ver_beg>[0-9][0-9]*)(?P<ver_end>.*)$")
match = re.match(re_vsplit, v1_clean)
if not match:
LOG.warning("Unexpected v1 version '%s'", v1)
LOG.debug("Unexpected v1 version '%s'", v1)
return 0.0
v1_major = match.group("ver_beg")
v1_minor = match.group("ver_end").replace(".", "")
v1_float = float(v1_major + "." + v1_minor)
match = re.match(re_vsplit, v2_clean)
if not match:
LOG.warning("Unexpected v2 version '%s'", v2)
LOG.debug("Unexpected v2 version '%s'", v2)
return 0.0
v2_major = match.group("ver_beg")
v2_minor = match.group("ver_end").replace(".", "")
Expand All @@ -226,7 +226,7 @@ def parse_version(ver_str):
re_ver = re.compile(r".*?(?P<ver_beg>[0-9][0-9.]*)(?P<ver_end>.*)$")
match = re_ver.match(ver_str)
if not match:
LOG.warning("Unable to parse version '%s'", ver_str)
LOG.debug("Unable to parse version '%s'", ver_str)
return None
ver_beg = match.group("ver_beg").rstrip(".")
ver_end = match.group("ver_end")
Expand All @@ -240,7 +240,7 @@ def parse_version(ver_str):
ver = re.sub(r"\.+", ".", ver)
LOG.log(LOG_SPAM, "%s --> %s", ver_str, ver)
if not ver:
LOG.warning("Invalid version '%s'", ver_str)
LOG.debug("Invalid version '%s'", ver_str)
return None
return packaging.version.parse(ver)

Expand Down
23 changes: 11 additions & 12 deletions src/nixupdate/nix_outdated.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from argparse import ArgumentParser
from tabulate import tabulate
from sbomnix.sbomdb import SbomDb
import repology.repology_cli
from common.utils import (
LOG,
LOG_SPAM,
Expand Down Expand Up @@ -86,15 +87,15 @@ def _generate_sbom(target_path, runtime=True, buildtime=False):

def _run_repology_cli(sbompath):
LOG.info("Running repology_cli")
prefix = "repology_"
suffix = ".csv"
with NamedTemporaryFile(delete=False, prefix=prefix, suffix=suffix) as f:
cmd = (
"repology_cli "
f"--sbom_cdx={sbompath} --repository=nix_unstable --out={f.name}"
)
exec_cmd(cmd.split())
return f.name
repology_cli = repology.repology_cli.Repology()
args = []
args.append("--repository=nix_unstable")
args.append(f"--sbom_cdx={sbompath}")
return repology_cli.query(
repology.repology_cli.getargs(args),
stdout_report=False,
file_report=False,
)


def _run_nix_visualize(targt_path):
Expand Down Expand Up @@ -258,9 +259,7 @@ def main():
sbom_path = _generate_sbom(target_path_abs, runtime, args.buildtime)
LOG.info("Using SBOM '%s'", sbom_path)

repology_out_path = _run_repology_cli(sbom_path)
LOG.info("Using repology out: '%s'", repology_out_path)
df_repology = df_from_csv_file(repology_out_path)
df_repology = _run_repology_cli(sbom_path)
df_log(df_repology, LOG_SPAM)

if not args.buildtime:
Expand Down
25 changes: 25 additions & 0 deletions src/repology/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# SPDX-FileCopyrightText: 2023 Technology Innovation Institute (TII)
#
# SPDX-License-Identifier: Apache-2.0

# pylint: disable=unnecessary-pass

"""Repology exceptions"""


class RepologyError(Exception):
"""Base class for exceptions raised in the repology modules"""

pass


class RepologyNoMatchingPackages(RepologyError):
"""Raised when no matching repology packages found"""

pass


class RepologyUnexpectedResponse(RepologyError):
"""Raised when repology sends unexpected response"""

pass
86 changes: 51 additions & 35 deletions src/repology/repology_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
""" Command-line interface to repology.org """

import os
import sys
import pathlib
import json
import re
Expand All @@ -24,6 +23,7 @@
import numpy as np
import pandas as pd
from tabulate import tabulate
import repology.exceptions
from common.utils import (
LOG,
LOG_SPAM,
Expand All @@ -43,8 +43,14 @@ def _pkg_str(str_obj):
raise ArgumentTypeError("Value must be a non-empty string")


def getargs():
"""Parse command line arguments"""
def getargs(args=None):
"""
Parse arguments: by default parses the sys.argv if `args` is not
specified, otherwise, parses arguments from the `args` list of strings.
This is simply a wrapper for function ArgumentParser.parse_args(),
returning argument attributes in argparse.Namespace object.
"""
desc = "Command line client to query repology.org for package information."
epil = (
f"Example: ./{os.path.basename(__file__)} --pkg_search 'firef' "
Expand Down Expand Up @@ -90,6 +96,8 @@ def getargs():
optional.add_argument("--verbose", help=helps, type=int, default=1)
helps = "Path to output report file (default: ./repology_report.csv)"
optional.add_argument("--out", help=helps, default="repology_report.csv")
if args:
return parser.parse_args(args)
return parser.parse_args()


Expand Down Expand Up @@ -174,20 +182,20 @@ def _sbom_fields(self):
self.df.drop("name", axis=1, inplace=True)

def _get_resp(self, query):
LOG.info("GET: %s", query)
LOG.debug("GET: %s", query)
resp = self.session.get(query, headers=self.headers)
LOG.debug("resp.status_code: %s", resp.status_code)
if resp.status_code == 404:
LOG.fatal("No matching packages found")
sys.exit(1)
raise repology.exceptions.RepologyNoMatchingPackages
resp.raise_for_status()
return resp

def _report(self, args):
def _report(self, args, console_report=True):
"""Generate result report to console and to csv file"""
if self.df.empty:
LOG.warning("No matching packages found")
sys.exit(1)
LOG.debug("No matching packages found")
raise repology.exceptions.RepologyNoMatchingPackages
if self.df_sbom is not None:
self._sbom_fields()
self.df["sbom_version_classify"] = self.df.apply(_sbom_row_classify, axis=1)
Expand All @@ -201,26 +209,28 @@ def _report(self, args):
df = df[~df.status.isin(["IGNORED", "NO_VERSION"])]
df = df.drop_duplicates(keep="first")
# Write the console report
table = tabulate(
df,
headers="keys",
tablefmt="orgtbl",
numalign="center",
showindex=False,
)
LOG.info(
"Repology package info, packages:%s\n\n%s\n\n"
"For more details, see: %s\n",
df.shape[0],
table,
self.urlq,
)
if args.stats:
self._stats_repology()
if self.df_sbom is not None:
self._stats_sbom()
# Write the full report to csv file
df_to_csv_file(self.df, args.out)
if console_report:
table = tabulate(
df,
headers="keys",
tablefmt="orgtbl",
numalign="center",
showindex=False,
)
LOG.info(
"Repology package info, packages:%s\n\n%s\n\n"
"For more details, see: %s\n",
df.shape[0],
table,
self.urlq,
)
if args.stats:
self._stats_repology()
if self.df_sbom is not None:
self._stats_sbom()
if args.out is not None:
# Write the full report to csv file
df_to_csv_file(self.df, args.out)

def _stats_sbom(self):
df = self.df.copy()
Expand Down Expand Up @@ -332,8 +342,8 @@ def _parse_pkg_search_resp(self, resp, repo, pkg_stop=None):
for idx, header in enumerate(projects_table.thead.find_all("th")):
headers[header.text] = idx
if not headers:
LOG.fatal("Unexpected response")
sys.exit(1)
LOG.fatal("Unexpected response, missing headers")
raise repology.exceptions.RepologyUnexpectedResponse
LOG.log(LOG_SPAM, headers)
projects_table_rows = projects_table.tbody.find_all("tr")
rows = 0
Expand Down Expand Up @@ -475,7 +485,7 @@ def _query_sbom_cdx(self, args):
LOG.debug("Package: %s", cmp)
if not cmp.name:
LOG.fatal("Missing package name: %s", cmp)
sys.exit(1)
raise repology.exceptions.RepologyUnexpectedResponse
pkg_id = f"{args.repository}:{cmp.name}"
if pkg_id in self.processed:
LOG.debug("Package '%s' in sbom already processed", cmp.name)
Expand Down Expand Up @@ -513,16 +523,19 @@ def _query_sbom_cdx(self, args):
self._packages_to_df(args, re_pkg_internal=cmp.name)
self.urlq = self.url_projects

def query(self, args):
def query(self, args, stdout_report=True, file_report=True):
"""Query package information from repology.org"""
if not file_report:
args.out = None
if args.pkg_search:
self._query_pkg_search(args)
elif args.pkg_exact:
self._query_pkg_exact(args)
elif args.sbom_cdx:
self._query_sbom_cdx(args)
self._packages_to_df(args, re_pkg_internal=args.pkg_exact)
self._report(args)
self._report(args, console_report=stdout_report)
return self.df.copy(deep=True)


################################################################################
Expand Down Expand Up @@ -555,8 +568,11 @@ def main():
"""main entry point"""
args = getargs()
set_log_verbosity(args.verbose)
repology = Repology()
repology.query(args)
repology_cli = Repology()
try:
repology_cli.query(args)
except repology.exceptions.RepologyNoMatchingPackages:
LOG.warning("No matching packages found")


################################################################################
Expand Down
23 changes: 14 additions & 9 deletions src/repology/repology_cve.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import numpy as np
import pandas as pd
from tabulate import tabulate
import repology.exceptions
from common.utils import (
LOG,
LOG_SPAM,
Expand Down Expand Up @@ -77,7 +78,7 @@ def _parse_cve_resp(resp, pkg_name, pkg_version):
headers[header.text] = idx
if not headers or "CVE ID" not in headers:
LOG.fatal("Unexpected response")
sys.exit(1)
raise repology.exceptions.RepologyUnexpectedResponse
LOG.log(LOG_SPAM, headers)
cve_table_rows = cve_table.tbody.find_all("tr")
cve_dict = {}
Expand Down Expand Up @@ -118,7 +119,7 @@ def _is_affected(version, affected_ver_str):
version_local = parse_version(version)
if not version_local:
LOG.fatal("Unexpected local version string: %s", version)
sys.exit(1)
raise repology.exceptions.RepologyError
# Pad with spaces to simplify regexps
affected_ver_str = f" {affected_ver_str} "
# Match version group
Expand All @@ -131,7 +132,7 @@ def _is_affected(version, affected_ver_str):
for impacted_group in matches:
if len(impacted_group) != 4:
LOG.fatal("Unexpected version group: %s", affected_ver_str)
sys.exit(1)
raise repology.exceptions.RepologyUnexpectedResponse
# impacted_group[0] = beg
beg_ind = impacted_group[0]
# impacted_group[1] = begver
Expand Down Expand Up @@ -168,7 +169,7 @@ def _is_affected(version, affected_ver_str):


def _report(df):
if df.empty:
if df is None or df.empty:
LOG.warning("No matching vulnerabilities found")
sys.exit(0)
# Write the console report
Expand All @@ -182,20 +183,24 @@ def _report(df):
LOG.info("Repology affected CVE(s)\n\n%s\n\n", table)


def _query_cve(pkg_name, pkg_version):
def query_cve(pkg_name, pkg_version):
"""
Return vulnerabilities known to repology that impact the given package name
and version. Results are returned in pandas dataframe.
"""
session = CachedLimiterSession(per_second=1, expire_after=7200)
ua_product = "repology_cli/0"
ua_comment = "(https://github.com/tiiuae/sbomnix/tree/main/scripts/repology)"
headers = {"User-Agent": f"{ua_product} {ua_comment}"}
pkg = urllib.parse.quote(pkg_name)
ver = urllib.parse.quote(pkg_version)
query = f"https://repology.org/project/{pkg}/cves?version={ver}"
LOG.info("GET: %s", query)
LOG.debug("GET: %s", query)
resp = session.get(query, headers=headers)
LOG.debug("resp.status_code: %s", resp.status_code)
if resp.status_code == 404:
LOG.fatal("Package '%s' not found", pkg_name)
sys.exit(1)
LOG.warning("Repology package '%s' not found", pkg_name)
return None
resp.raise_for_status()
return _parse_cve_resp(resp, pkg_name, pkg_version)

Expand All @@ -207,7 +212,7 @@ def main():
"""main entry point"""
args = getargs()
set_log_verbosity(args.verbose)
df = _query_cve(args.PKG_NAME, args.PKG_VERSION)
df = query_cve(args.PKG_NAME, args.PKG_VERSION)
_report(df)
df_to_csv_file(df, args.out)

Expand Down
Loading

0 comments on commit 80ae584

Please sign in to comment.