From 544f7104f6338825a1eb0bcb65852cd34ea5f218 Mon Sep 17 00:00:00 2001 From: Aidan McMahon-Smith Date: Tue, 17 Oct 2023 15:18:13 +0200 Subject: [PATCH] Add limit to unassociate [RHELDST-20725] We want to perform garbage collection in batches to reduce the amount of resources it consumes and avoid out of memory exceptions. Unit association criteria has a limit field which we could use for batching requests. --- pubtools/pulplib/_impl/client/client.py | 5 ++++- pubtools/pulplib/_impl/fake/client.py | 6 ++++-- pubtools/pulplib/_impl/model/repository/base.py | 8 +++++++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/pubtools/pulplib/_impl/client/client.py b/pubtools/pulplib/_impl/client/client.py index 33affacd..673189d5 100644 --- a/pubtools/pulplib/_impl/client/client.py +++ b/pubtools/pulplib/_impl/client/client.py @@ -775,7 +775,7 @@ def _do_associate(self, src_repo_id, dest_repo_id, criteria=None, raw_options=No self._do_request, method="POST", url=url, json=body ) - def _do_unassociate(self, repo_id, criteria=None): + def _do_unassociate(self, repo_id, criteria=None, limit=None): url = os.path.join( self._url, "pulp/api/v2/repositories/%s/actions/unassociate/" % repo_id ) @@ -800,6 +800,9 @@ def _do_unassociate(self, repo_id, criteria=None): else: body["criteria"]["filters"] = {"unit": pulp_search.filters} + if limit: + body["criteria"]["limit"] = limit + LOG.debug("Submitting %s unassociate: %s", url, body) return self._task_executor.submit( diff --git a/pubtools/pulplib/_impl/fake/client.py b/pubtools/pulplib/_impl/fake/client.py index 05489472..4622e086 100644 --- a/pubtools/pulplib/_impl/fake/client.py +++ b/pubtools/pulplib/_impl/fake/client.py @@ -404,7 +404,7 @@ def do_next_upload(checksum, size): return out - def _do_unassociate(self, repo_id, criteria=None): + def _do_unassociate(self, repo_id, criteria=None, limit=None): repo_f = self.get_repository(repo_id) if repo_f.exception(): return repo_f @@ -431,7 +431,9 @@ def _do_unassociate(self, repo_id, criteria=None): for unit_with_key in units_with_key: unit = unit_with_key["unit"] - if match_object(criteria, unit): + if match_object(criteria, unit) and ( + not limit or len(removed_units) <= limit + ): removed_units.add(unit) else: kept_keys.add(unit_with_key["key"]) diff --git a/pubtools/pulplib/_impl/model/repository/base.py b/pubtools/pulplib/_impl/model/repository/base.py index 508a0ace..92e96f49 100644 --- a/pubtools/pulplib/_impl/model/repository/base.py +++ b/pubtools/pulplib/_impl/model/repository/base.py @@ -696,7 +696,13 @@ def remove_content(self, criteria=None, **kwargs): Matcher.in_(type_ids), # Criteria.with_field_in is deprecated ) - return f_proxy(self._client._do_unassociate(self.id, criteria=criteria)) + return f_proxy( + self._client._do_unassociate( + self.id, + criteria=criteria, + limit=kwargs.get("limit"), + ) + ) @classmethod def from_data(cls, data):