diff --git a/conftest.py b/conftest.py index 81dee94bf..16153bd38 100644 --- a/conftest.py +++ b/conftest.py @@ -276,6 +276,18 @@ def mock_storage(mocker): return storage_server +@pytest.fixture +def mock_archive_storage(mocker): + m = mocker.patch("shared.api_archive.archive.StorageService") + use_archive = mocker.patch( + "shared.django_apps.core.models.should_write_data_to_storage_config_check" + ) + use_archive.return_value = True + storage_server = MemoryStorageService({}) + m.return_value = storage_server + return storage_server + + @pytest.fixture def mock_smtp(mocker): m = mocker.patch("services.smtp.SMTPService") diff --git a/database/models/core.py b/database/models/core.py index ba410d68b..85010bc3b 100644 --- a/database/models/core.py +++ b/database/models/core.py @@ -447,7 +447,6 @@ class Pull(CodecovBaseModel): commentid = Column(types.Text) bundle_analysis_commentid = Column(types.Text) diff = Column(postgresql.JSON) - flare = Column(postgresql.JSON) author_id = Column("author", types.Integer, ForeignKey("owners.ownerid")) behind_by = Column(types.Integer) behind_by_commit = Column(types.Text) @@ -457,6 +456,22 @@ class Pull(CodecovBaseModel): Repository, backref=backref("pulls", cascade="delete", lazy="dynamic") ) + def should_write_to_storage(self) -> bool: + if self.repository is None or self.repository.owner is None: + return False + is_codecov_repo = self.repository.owner.username == "codecov" + return should_write_data_to_storage_config_check( + master_switch_key="pull_flare", + is_codecov_repo=is_codecov_repo, + repoid=self.repository.repoid, + ) + + _flare = Column("flare", postgresql.JSON) + _flare_storage_path = Column("flare_storage_path", types.Text, nullable=True) + flare = ArchiveField( + should_write_to_storage_fn=should_write_to_storage, default_value_class=dict + ) + __table_args__ = (Index("pulls_repoid_pullid", "repoid", "pullid", unique=True),) def __repr__(self): @@ -503,16 +518,6 @@ def external_id(self): def id(self): return self.id_ - def should_write_to_storage(self) -> bool: - if self.repository is None or self.repository.owner is None: - return False - is_codecov_repo = self.repository.owner.username == "codecov" - return should_write_data_to_storage_config_check( - master_switch_key="pull_flare", - is_codecov_repo=is_codecov_repo, - repoid=self.repository.repoid, - ) - @cached_property def is_first_coverage_pull(self): """ @@ -536,12 +541,6 @@ def is_first_coverage_pull(self): return first_pull_with_coverage.id_ == self.id_ return True - _flare = Column("flare", postgresql.JSON) - _flare_storage_path = Column("flare_storage_path", types.Text, nullable=True) - flare = ArchiveField( - should_write_to_storage_fn=should_write_to_storage, default_value_class=dict - ) - class CommitNotification(CodecovBaseModel): __tablename__ = "commit_notifications" diff --git a/tasks/flare_cleanup.py b/tasks/flare_cleanup.py index 610ad8f47..207731d8c 100644 --- a/tasks/flare_cleanup.py +++ b/tasks/flare_cleanup.py @@ -38,12 +38,13 @@ def run_cron_task(self, db_session, batch_size=1000, limit=10000, *args, **kwarg _flare__isnull=False ).exclude(_flare={}) - # Process in batches using an offset + # Process in batches total_updated = 0 - offset = 0 - while offset < limit: + start = 0 + while start < limit: + stop = start + batch_size if start + batch_size < limit else limit batch = non_open_pulls_with_flare_in_db.values_list("id", flat=True)[ - offset : offset + batch_size + start:stop ] if not batch: break @@ -51,7 +52,7 @@ def run_cron_task(self, db_session, batch_size=1000, limit=10000, *args, **kwarg _flare=None ) total_updated += n_updated - offset += batch_size + start = stop log.info(f"FlareCleanupTask cleared {total_updated} database flares") @@ -60,12 +61,13 @@ def run_cron_task(self, db_session, batch_size=1000, limit=10000, *args, **kwarg _flare_storage_path__isnull=False ) - # Process archive deletions in batches using an offset + # Process archive deletions in batches total_updated = 0 - offset = 0 - while offset < limit: + start = 0 + while start < limit: + stop = start + batch_size if start + batch_size < limit else limit batch = non_open_pulls_with_flare_in_archive.values_list("id", flat=True)[ - offset : offset + batch_size + start:stop ] if not batch: break @@ -86,7 +88,7 @@ def run_cron_task(self, db_session, batch_size=1000, limit=10000, *args, **kwarg _flare_storage_path=None ) total_updated += n_updated - offset += batch_size + start = stop log.info(f"FlareCleanupTask cleared {total_updated} Archive flares") diff --git a/tasks/tests/unit/test_flare_cleanup.py b/tasks/tests/unit/test_flare_cleanup.py index 984084c75..06d3968ab 100644 --- a/tasks/tests/unit/test_flare_cleanup.py +++ b/tasks/tests/unit/test_flare_cleanup.py @@ -1,4 +1,3 @@ -import json from unittest.mock import call from shared.django_apps.core.models import Pull, PullStates @@ -15,25 +14,11 @@ def test_get_min_seconds_interval_between_executions(self): ) assert FlareCleanupTask.get_min_seconds_interval_between_executions() > 17000 - def test_successful_run(self, transactional_db, mocker): + def test_successful_run(self, transactional_db, mocker, mock_archive_storage): mock_logs = mocker.patch("logging.Logger.info") - mock_archive_service = mocker.patch( - "shared.django_apps.utils.model_utils.ArchiveService" - ) archive_value_for_flare = {"some": "data"} - mock_archive_service.return_value.read_file.return_value = json.dumps( - archive_value_for_flare - ) - mock_path = "path/to/written/object" - mock_archive_service.return_value.write_json_data_to_storage.return_value = ( - mock_path - ) - mock_archive_service_in_task = mocker.patch( - "tasks.flare_cleanup.ArchiveService" - ) - mock_archive_service_in_task.return_value.delete_file.return_value = None - local_value_for_flare = {"test": "test"} + open_pull_with_local_flare = PullFactory( state=PullStates.OPEN.value, _flare=local_value_for_flare, @@ -55,25 +40,29 @@ def test_successful_run(self, transactional_db, mocker): open_pull_with_archive_flare = PullFactory( state=PullStates.OPEN.value, _flare=None, - _flare_storage_path=mock_path, repository=RepositoryFactory(), ) + open_pull_with_archive_flare.flare = archive_value_for_flare + open_pull_with_archive_flare.save() + open_pull_with_archive_flare.refresh_from_db() assert open_pull_with_archive_flare.flare == archive_value_for_flare assert open_pull_with_archive_flare._flare is None - assert open_pull_with_archive_flare._flare_storage_path == mock_path + assert open_pull_with_archive_flare._flare_storage_path is not None merged_pull_with_archive_flare = PullFactory( state=PullStates.MERGED.value, _flare=None, - _flare_storage_path=mock_path, repository=RepositoryFactory(), ) + merged_pull_with_archive_flare.flare = archive_value_for_flare + merged_pull_with_archive_flare.save() + merged_pull_with_archive_flare.refresh_from_db() assert merged_pull_with_archive_flare.flare == archive_value_for_flare assert merged_pull_with_archive_flare._flare is None - assert merged_pull_with_archive_flare._flare_storage_path == mock_path + assert merged_pull_with_archive_flare._flare_storage_path is not None task = FlareCleanupTask() - task.run_cron_task(transactional_db) + task.manual_run() mock_logs.assert_has_calls( [ @@ -102,7 +91,7 @@ def test_successful_run(self, transactional_db, mocker): ) assert open_pull_with_archive_flare.flare == archive_value_for_flare assert open_pull_with_archive_flare._flare is None - assert open_pull_with_archive_flare._flare_storage_path == mock_path + assert open_pull_with_archive_flare._flare_storage_path is not None merged_pull_with_archive_flare = Pull.objects.get( id=merged_pull_with_archive_flare.id @@ -114,7 +103,7 @@ def test_successful_run(self, transactional_db, mocker): mock_logs.reset_mock() # check that once these pulls are corrected they are not corrected again task = FlareCleanupTask() - task.run_cron_task(transactional_db) + task.manual_run() mock_logs.assert_has_calls( [ @@ -123,3 +112,93 @@ def test_successful_run(self, transactional_db, mocker): call("FlareCleanupTask cleared 0 Archive flares"), ] ) + + def test_limits_on_manual_run(self, transactional_db, mocker, mock_archive_storage): + mock_logs = mocker.patch("logging.Logger.info") + local_value_for_flare = {"test": "test"} + archive_value_for_flare = {"some": "data"} + + oldest_to_newest_pulls_with_local_flare = [] + for i in range(5): + merged_pull_with_local_flare = PullFactory( + state=PullStates.MERGED.value, + _flare=local_value_for_flare, + repository=RepositoryFactory(), + ) + assert merged_pull_with_local_flare.flare == local_value_for_flare + assert merged_pull_with_local_flare._flare == local_value_for_flare + assert merged_pull_with_local_flare._flare_storage_path is None + oldest_to_newest_pulls_with_local_flare.append( + merged_pull_with_local_flare.id + ) + + oldest_to_newest_pulls_with_archive_flare = [] + for i in range(5): + merged_pull_with_archive_flare = PullFactory( + state=PullStates.MERGED.value, + _flare=None, + repository=RepositoryFactory(), + ) + merged_pull_with_archive_flare.flare = archive_value_for_flare + merged_pull_with_archive_flare.save() + assert merged_pull_with_archive_flare.flare == archive_value_for_flare + assert merged_pull_with_archive_flare._flare is None + assert merged_pull_with_archive_flare._flare_storage_path is not None + oldest_to_newest_pulls_with_archive_flare.append( + merged_pull_with_archive_flare.id + ) + + everything_in_archive_storage = mock_archive_storage.list_folder_contents( + bucket_name="archive" + ) + assert len(everything_in_archive_storage) == 5 + + task = FlareCleanupTask() + task.manual_run(limit=3) + + mock_logs.assert_has_calls( + [ + call("Starting FlareCleanupTask"), + call("FlareCleanupTask cleared 3 database flares"), + call("FlareCleanupTask cleared 3 Archive flares"), + ] + ) + + # there is a cache for flare on the object (all ArchiveFields have this), + # so get a fresh copy of each object without the cached value + should_be_cleared = oldest_to_newest_pulls_with_local_flare[:3] + should_not_be_cleared = oldest_to_newest_pulls_with_local_flare[3:] + for pull_id in should_be_cleared: + pull = Pull.objects.get(id=pull_id) + assert pull.flare == {} + assert pull._flare is None + assert pull._flare_storage_path is None + + for pull_id in should_not_be_cleared: + pull = Pull.objects.get(id=pull_id) + assert pull.flare == local_value_for_flare + assert pull._flare == local_value_for_flare + assert pull._flare_storage_path is None + + everything_in_archive_storage = mock_archive_storage.list_folder_contents( + bucket_name="archive" + ) + assert len(everything_in_archive_storage) == 2 + file_names_in_archive_storage = [ + file["name"] for file in everything_in_archive_storage + ] + + should_be_cleared = oldest_to_newest_pulls_with_archive_flare[:3] + should_not_be_cleared = oldest_to_newest_pulls_with_archive_flare[3:] + for pull_id in should_be_cleared: + pull = Pull.objects.get(id=pull_id) + assert pull.flare == {} + assert pull._flare is None + assert pull._flare_storage_path is None + + for pull_id in should_not_be_cleared: + pull = Pull.objects.get(id=pull_id) + assert pull.flare == archive_value_for_flare + assert pull._flare is None + assert pull._flare_storage_path is not None + assert pull._flare_storage_path in file_names_in_archive_storage