Skip to content

Commit

Permalink
Improvement to long_running method
Browse files Browse the repository at this point in the history
During unit testing, it has been observed that the method does not
honor the timeout when there is a continuous stream of reads.

For example, if the below command is executed
```
node.exec(cmd="while [ -f /ceph-qe-ready ]; do echo 'Writing...' ; done",
    long_running=True,
    timeout=10
)
```
then the timeout of 10 seconds is never honoured due to never
ending reads.

In this patch, the loop around data reading is removed and a
singular controlled loop is implemented.

Signed-off-by: Pragadeeswaran Sathyanarayanan <[email protected]>
  • Loading branch information
psathyan committed Jul 18, 2024
1 parent df4b17e commit c83af7c
Showing 1 changed file with 20 additions and 9 deletions.
29 changes: 20 additions & 9 deletions ceph/ceph.py
Original file line number Diff line number Diff line change
Expand Up @@ -1145,6 +1145,18 @@ class CommandFailed(Exception):
pass


class TimeoutException(Exception):
"""Operation timeout exception."""

pass


def check_timeout(end_time, timeout):
"""Raises an exception when current time is greater"""
if timeout and datetime.datetime.now() >= end_time:
raise TimeoutException("Command exceed the allocated execution time.")


class RolesContainer(object):
"""
Container for single or multiple node roles.
Expand Down Expand Up @@ -1521,7 +1533,7 @@ def long_running(self, **kw):
channel.settimeout(timeout)

# A mismatch between stdout and stderr streams have been observed hence
# combining the streams and logging is set to debug level only.
# combining the streams and logging is set to debug level.
channel.set_combine_stderr(True)
channel.exec_command(cmd)

Expand All @@ -1532,29 +1544,28 @@ def long_running(self, **kw):

while not channel.exit_status_ready():
# Prevent high resource consumption
sleep(2)

sleep(1)
if channel.recv_ready():
data = channel.recv(1024)
while data:
for line in data.splitlines():
logger.debug(line)

check_timeout(_end_time, timeout)
data = channel.recv(1024)

# time check - raise exception when exceeded.
if timeout and datetime.datetime.now() > _end_time:
channel.close()
raise SocketTimeoutException(
f"{cmd} failed to complete within {timeout}s"
)
check_timeout(_end_time, timeout)

logger.info(f"Command completed on {datetime.datetime.now()}")
return channel.recv_exit_status()

except socket.timeout as terr:
logger.error(f"Command failed to execute within {timeout} seconds.")
raise SocketTimeoutException(terr)
except TimeoutException as tex:
channel.close()
logger.error(f"{cmd} failed to execute within {timeout}s.")
raise CommandFailed(tex)
except BaseException as be: # noqa
logger.exception(be)
raise CommandFailed(be)
Expand Down

0 comments on commit c83af7c

Please sign in to comment.