From 595e891f18913afff4b5cd26ecf042b2f8b962fe Mon Sep 17 00:00:00 2001 From: Rex P <106129829+another-rex@users.noreply.github.com> Date: Thu, 28 Nov 2024 11:30:44 +1100 Subject: [PATCH] fix: oss-fuzz + exporter error (#2925) For some reason, recently the [EMPTY] ecosystem exporter started failing. The failure is because oss-fuzz special case in the to_vulnerability() code always assumes that there is a project name available. This is not the case for withdrawn, public, INVALID status entries, throwing an exception. I'm not clear on why suddenly it started happening today, when the record has not been changed since 2021. It could have something to do with the Key change done recently, perhaps it was failing at an earlier error because the key was invalid. Added some additional logging as well to make it easier to debug the exact record that caused this error next time. --- docker/exporter/exporter.py | 14 +++++++++----- osv/models.py | 2 +- osv/sources.py | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/docker/exporter/exporter.py b/docker/exporter/exporter.py index 222fef5dba9..8d708979502 100755 --- a/docker/exporter/exporter.py +++ b/docker/exporter/exporter.py @@ -102,16 +102,20 @@ def _export_ecosystem_to_bucket(self, ecosystem: str, work_dir: str): files_to_zip = [] @ndb.tasklet - def _export_to_file_and_zipfile(bug): + def _export_to_file_and_zipfile(bug: osv.Bug): """Write out a bug record to both a single file and the zip file.""" if not bug.public or bug.status == osv.BugStatus.UNPROCESSED: return - file_path = os.path.join(ecosystem_dir, bug.id() + '.json') - vulnerability = yield bug.to_vulnerability_async(include_source=True) - osv.write_vulnerability(vulnerability, file_path) + try: + file_path = os.path.join(ecosystem_dir, bug.id() + '.json') + vulnerability = yield bug.to_vulnerability_async(include_source=True) + osv.write_vulnerability(vulnerability, file_path) - files_to_zip.append(file_path) + files_to_zip.append(file_path) + except Exception: + logging.error('Failed to export bug: "%s"', bug.id()) + raise # This *should* pause here until # all the exports have been written to disk. diff --git a/osv/models.py b/osv/models.py index 6eb997eaa31..659509de35a 100644 --- a/osv/models.py +++ b/osv/models.py @@ -503,7 +503,7 @@ def _pre_put_hook(self): # pylint: disable=arguments-differ if not self.key: # pylint: disable=access-member-before-definition source_repo = get_source_repository(self.source) if not source_repo: - raise ValueError(f'Invalid source {self.source}') + raise ValueError(f'{self.db_id} has invalid source {self.source}') if source_repo.db_prefix and not any( self.db_id.startswith(prefix) for prefix in source_repo.db_prefix): diff --git a/osv/sources.py b/osv/sources.py index a064f52e125..9b27cb9dc88 100644 --- a/osv/sources.py +++ b/osv/sources.py @@ -334,7 +334,7 @@ def sha256_bytes(data): def source_path(source_repo, bug): """Get the source path for an osv.Bug.""" source_name, source_id = parse_source_id(bug.source_id) - if source_name == 'oss-fuzz': + if source_name == 'oss-fuzz' and len(bug.project) > 0: path = os.path.join(bug.project[0], bug.id() + source_repo.extension) if source_repo.directory_path: path = os.path.join(source_repo.directory_path, path)