Skip to content

Commit

Permalink
Add limit to unassociate [RHELDST-20725]
Browse files Browse the repository at this point in the history
We want to perform garbage collection in batches to reduce the amount of
resources it consumes and avoid out of memory exceptions. Unit
association criteria has a limit field which we could use for
batching requests.
  • Loading branch information
amcmahon-rh committed Oct 18, 2023
1 parent 6f26dd3 commit 971533f
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 5 deletions.
5 changes: 4 additions & 1 deletion pubtools/pulplib/_impl/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,7 @@ def _do_associate(self, src_repo_id, dest_repo_id, criteria=None, raw_options=No
self._do_request, method="POST", url=url, json=body
)

def _do_unassociate(self, repo_id, criteria=None):
def _do_unassociate(self, repo_id, criteria=None, limit=None):
url = os.path.join(
self._url, "pulp/api/v2/repositories/%s/actions/unassociate/" % repo_id
)
Expand All @@ -800,6 +800,9 @@ def _do_unassociate(self, repo_id, criteria=None):
else:
body["criteria"]["filters"] = {"unit": pulp_search.filters}

if limit:
body["criteria"]["limit"] = limit

LOG.debug("Submitting %s unassociate: %s", url, body)

return self._task_executor.submit(
Expand Down
6 changes: 4 additions & 2 deletions pubtools/pulplib/_impl/fake/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ def do_next_upload(checksum, size):

return out

def _do_unassociate(self, repo_id, criteria=None):
def _do_unassociate(self, repo_id, criteria=None, limit=None):
repo_f = self.get_repository(repo_id)
if repo_f.exception():
return repo_f
Expand All @@ -431,7 +431,9 @@ def _do_unassociate(self, repo_id, criteria=None):

for unit_with_key in units_with_key:
unit = unit_with_key["unit"]
if match_object(criteria, unit):
if match_object(criteria, unit) and (
not limit or len(removed_units) < limit
):
removed_units.add(unit)
else:
kept_keys.add(unit_with_key["key"])
Expand Down
8 changes: 7 additions & 1 deletion pubtools/pulplib/_impl/model/repository/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,7 +696,13 @@ def remove_content(self, criteria=None, **kwargs):
Matcher.in_(type_ids), # Criteria.with_field_in is deprecated
)

return f_proxy(self._client._do_unassociate(self.id, criteria=criteria))
return f_proxy(
self._client._do_unassociate(
self.id,
criteria=criteria,
limit=kwargs.get("limit"),
)
)

@classmethod
def from_data(cls, data):
Expand Down
33 changes: 32 additions & 1 deletion tests/fake/test_fake_remove_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,42 @@ def test_can_remove_empty():
assert not task.units


def test_can_remove_content():
def test_limited_remove_content():
"""repo.remove() succeeds and removes expected units inserted via controller."""
controller = FakeController()
client = controller.client

rpm_units = [
RpmUnit(name="gliba", version="1.0", release="1", arch="x86_64"),
RpmUnit(name="glibb", version="1.0", release="1", arch="x86_64"),
RpmUnit(name="glibc", version="1.0", release="1", arch="x86_64"),
RpmUnit(name="glibd", version="1.0", release="1", arch="x86_64"),
]

repo = YumRepository(id="repo1")
controller.insert_repository(repo)
controller.insert_units(repo, rpm_units)

remove_rpms = client.get_repository("repo1").remove_content(
type_ids=["rpm"], limit=3
)

assert len(remove_rpms) == 1
task = remove_rpms[0]

# It should have completed successfully
assert task.completed
assert task.succeeded

# It should have removed (only) RPM units
assert len(task.units) == 3


def test_can_remove_content():
"""repo.remove() can remove a limited number of units."""
controller = FakeController()
client = controller.client

rpm_units = [
RpmUnit(name="bash", version="4.0", release="1", arch="x86_64"),
RpmUnit(name="glibc", version="5.0", release="1", arch="x86_64"),
Expand Down
32 changes: 32 additions & 0 deletions tests/repository/test_remove_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,38 @@ def test_remove_with_criteria(fast_poller, requests_mocker, client):
}


def test_remove_with_limit(fast_poller, requests_mocker, client):
"""Remove succeeds when given a critria/filter for removal"""
repo = Repository(id="some-repo")
repo.__dict__["_client"] = client

requests_mocker.post(
"https://pulp.example.com/pulp/api/v2/repositories/some-repo/actions/unassociate/",
[
{"json": {"spawned_tasks": [{"task_id": "task1"}]}},
],
)

requests_mocker.post(
"https://pulp.example.com/pulp/api/v2/tasks/search/",
[
{"json": [{"task_id": "task1", "state": "finished"}]},
],
)

assert repo.remove_content(type_ids=["type1", "type2"], limit=1).result() == [
Task(id="task1", completed=True, succeeded=True)
]

# It should have included the limit in the post request
assert requests_mocker.request_history[0].json() == {
"criteria": {
"type_ids": ["type1", "type2"],
"limit": 1,
}
}


def test_remove_fail_without_type_id(fast_poller, client):
"""Remove fails when a critria is provided without unit type"""
repo = Repository(id="some-repo")
Expand Down

0 comments on commit 971533f

Please sign in to comment.