Skip to content

Commit

Permalink
sshdriver: Prevent timeout from deadlock
Browse files Browse the repository at this point in the history
Using Popen.wait() on a process that has output sent to a pipe can
potentially deadlock if the process produces enough output to fill the
pipe, since it will stall and never terminate waiting for the pipe to
have more space.

Instead, use Popen.communicate() with the timeout parameter. This will
consume all output until EOF (preventing the process from stalling due
to a full pipe), and then check the return code. In the event of a
timeout error, Popen.communicate() doesn't loose any data, so it's safe
to call it again after the Popen.kill() in the exception handler.

This likely was done this way because the timeout parameter was new in
Python 3.3, but this shouldn't be a concern anymore

Signed-off-by: Joshua Watt <[email protected]>
  • Loading branch information
JoshuaWatt committed Sep 7, 2023
1 parent f9ca024 commit b8f059f
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 5 deletions.
11 changes: 6 additions & 5 deletions labgrid/driver/sshdriver.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,8 @@ def _start_own_master_once(self, timeout):

try:
subprocess_timeout = timeout + 5
return_value = self.process.wait(timeout=subprocess_timeout)
if return_value != 0:
stdout, _ = self.process.communicate(timeout=subprocess_timeout)
stdout, _ = self.process.communicate(timeout=subprocess_timeout)
if self.process.returncode != 0:
stdout = stdout.split(b"\n")
for line in stdout:
self.logger.warning("ssh: %s", line.rstrip().decode(encoding="utf-8", errors="replace"))
Expand All @@ -155,12 +154,14 @@ def _start_own_master_once(self, timeout):
pass

raise ExecutionError(
f"Failed to connect to {self.networkservice.address} with {' '.join(args)}: return code {return_value}", # pylint: disable=line-too-long
f"Failed to connect to {self.networkservice.address} with {' '.join(args)}: return code {self.process.returncode}", # pylint: disable=line-too-long
stdout=stdout,
)
except subprocess.TimeoutExpired:
self.process.kill()
stdout, _ = self.process.communicate()
raise ExecutionError(
f"Subprocess timed out [{subprocess_timeout}s] while executing {args}",
f"Subprocess timed out [{subprocess_timeout}s] while executing {args}: {stdout}",
)
finally:
if self.networkservice.password and os.path.exists(pass_file):
Expand Down
4 changes: 4 additions & 0 deletions tests/test_sshdriver.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ def ssh_driver_mocked_and_activated(target, mocker):
instance_mock = mocker.MagicMock()
popen.return_value = instance_mock
instance_mock.wait = mocker.MagicMock(return_value=0)
instance_mock.communicate = mocker.MagicMock(return_value=(b"", b""))
instance_mock.returncode = 0
SSHDriver(target, "ssh")
s = target.get_driver("SSHDriver")
return s
Expand All @@ -35,6 +37,8 @@ def test_create(target, mocker):
instance_mock = mocker.MagicMock()
popen.return_value = instance_mock
instance_mock.wait = mocker.MagicMock(return_value=0)
instance_mock.communicate = mocker.MagicMock(return_value=(b"", b""))
instance_mock.returncode = 0
s = SSHDriver(target, "ssh")
assert isinstance(s, SSHDriver)

Expand Down

0 comments on commit b8f059f

Please sign in to comment.