Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix IPU being blocked by resource limitations #1256

Merged
merged 1 commit into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions commands/command_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import os
import re
import resource

from leapp.exceptions import CommandError
from leapp.utils import path
Expand Down Expand Up @@ -140,3 +141,40 @@ def vet_upgrade_path(args):
flavor=flavor,
choices=','.join(supported_target_versions)))
return (target_release, flavor)


def set_resource_limits():
"""
Set resource limits for the maximum number of open file descriptors and the maximum writable file size.

:raises: `CommandError` if the resource limits cannot be set
"""

def set_resource_limit(resource_type, soft, hard):
rtype_string = (
'open file descriptors' if resource_type == resource.RLIMIT_NOFILE
else 'writable file size' if resource_type == resource.RLIMIT_FSIZE
else 'unknown resource'
)
try:
resource.setrlimit(resource_type, (soft, hard))
except ValueError as err:
raise CommandError(
'Failure occurred while attempting to set soft limit higher than the hard limit. '
'Resource type: {}, error: {}'.format(rtype_string, err)
)
except OSError as err:
raise CommandError(
'Failed to set resource limit. Resource type: {}, error: {}'.format(rtype_string, err)
)

soft_nofile, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
soft_fsize, _ = resource.getrlimit(resource.RLIMIT_FSIZE)
nofile_limit = 1024*16
fsize_limit = resource.RLIM_INFINITY

if soft_nofile < nofile_limit:
set_resource_limit(resource.RLIMIT_NOFILE, nofile_limit, nofile_limit)

if soft_fsize != fsize_limit:
set_resource_limit(resource.RLIMIT_FSIZE, fsize_limit, fsize_limit)
2 changes: 2 additions & 0 deletions commands/preupgrade/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ def preupgrade(args, breadcrumbs):
except LeappError as exc:
raise CommandError(exc.message)

command_utils.set_resource_limits()

workflow = repositories.lookup_workflow('IPUWorkflow')()
util.warn_if_unsupported(configuration)
util.process_whitelist_experimental(repositories, workflow, configuration, logger)
Expand Down
60 changes: 60 additions & 0 deletions commands/tests/test_upgrade_paths.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import resource

import mock
import pytest

Expand Down Expand Up @@ -50,3 +52,61 @@ def test_vet_upgrade_path(mock_open, monkeypatch):
monkeypatch.setenv('LEAPP_DEVEL_TARGET_RELEASE', '9.0')
args = mock.Mock(target='1.2')
assert command_utils.vet_upgrade_path(args) == ('9.0', 'default')


def _mock_getrlimit_factory(nofile_limits=(1024, 4096), fsize_limits=(1024, 4096)):
"""
Factory function to create a mock `getrlimit` function with configurable return values.
The default param values are lower than the expected values.

:param nofile_limits: Tuple representing (soft, hard) limits for `RLIMIT_NOFILE`
:param fsize_limits: Tuple representing (soft, hard) limits for `RLIMIT_FSIZE`
:return: A mock `getrlimit` function
"""
def mock_getrlimit(resource_type):
if resource_type == resource.RLIMIT_NOFILE:
return nofile_limits
if resource_type == resource.RLIMIT_FSIZE:
return fsize_limits
return (0, 0)

return mock_getrlimit


@pytest.mark.parametrize("nofile_limits, fsize_limits, expected_calls", [
# Case where both limits need to be increased
((1024, 4096), (1024, 4096), [
(resource.RLIMIT_NOFILE, (1024*16, 1024*16)),
(resource.RLIMIT_FSIZE, (resource.RLIM_INFINITY, resource.RLIM_INFINITY))
]),
# Case where neither limit needs to be changed
((1024*16, 1024*16), (resource.RLIM_INFINITY, resource.RLIM_INFINITY), [])
])
def test_set_resource_limits_increase(monkeypatch, nofile_limits, fsize_limits, expected_calls):
setrlimit_called = []

def mock_setrlimit(resource_type, limits):
setrlimit_called.append((resource_type, limits))

monkeypatch.setattr(resource, "getrlimit", _mock_getrlimit_factory(nofile_limits, fsize_limits))
monkeypatch.setattr(resource, "setrlimit", mock_setrlimit)

command_utils.set_resource_limits()

assert setrlimit_called == expected_calls


@pytest.mark.parametrize("errortype, expected_message", [
(OSError, "Failed to set resource limit"),
(ValueError, "Failure occurred while attempting to set soft limit higher than the hard limit")
])
def test_set_resource_limits_exceptions(monkeypatch, errortype, expected_message):
monkeypatch.setattr(resource, "getrlimit", _mock_getrlimit_factory())

def mock_setrlimit(*args, **kwargs):
raise errortype("mocked error")

monkeypatch.setattr(resource, "setrlimit", mock_setrlimit)

with pytest.raises(CommandError, match=expected_message):
command_utils.set_resource_limits()
3 changes: 3 additions & 0 deletions commands/upgrade/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ def upgrade(args, breadcrumbs):
repositories = util.load_repositories()
except LeappError as exc:
raise CommandError(exc.message)

command_utils.set_resource_limits()

workflow = repositories.lookup_workflow('IPUWorkflow')(auto_reboot=args.reboot)
util.process_whitelist_experimental(repositories, workflow, configuration, logger)
util.warn_if_unsupported(configuration)
Expand Down