Skip to content

Commit

Permalink
Long running command force timeout
Browse files Browse the repository at this point in the history
Earlier the timeout was being applied to per operation in the
channel. This patch set removes the timeout per operation and
instead applies it for the command execution.

Signed-off-by: Pragadeeswaran Sathyanarayanan <[email protected]>
  • Loading branch information
psathyan committed Jul 17, 2024
1 parent 2aec92d commit 9464db6
Showing 1 changed file with 16 additions and 3 deletions.
19 changes: 16 additions & 3 deletions ceph/ceph.py
Original file line number Diff line number Diff line change
Expand Up @@ -1270,14 +1270,14 @@ def __connect(self):
self.__outage_start_time = None
return
except Exception as e:
logger.warning(f"Connection outage to {self.ip_address}: \n{e}")
logger.warning(f"Error in connecting to {self.ip_address}: \n{e}")
if not self.__outage_start_time:
self.__outage_start_time = datetime.datetime.now()

logger.debug("Retrying connection in 10 seconds")
sleep(10)

raise AssertionError(f"Unable to establish connection with {self.ip_address}")
raise AssertionError(f"Unable to establish a connection with {self.ip_address}")

@property
def transport(self):
Expand Down Expand Up @@ -1518,13 +1518,19 @@ def long_running(self, **kw):

try:
channel = ssh().get_transport().open_session()
channel.settimeout(timeout)

# A mismatch between stdout and stderr streams have been observed hence
# combining the streams and logging is set to debug level only.
channel.set_combine_stderr(True)
channel.exec_command(cmd)

# Channel timeout is per operation. Waiting for a specified duration or
# command execution completion.
if timeout:
_end_time = datetime.datetime.now() + datetime.timedelta(
seconds=timeout
)

while not channel.exit_status_ready():
# Prevent high resource consumption
sleep(2)
Expand All @@ -1537,8 +1543,15 @@ def long_running(self, **kw):

data = channel.recv(1024)

if not timeout and _end_time > datetime.datetime.now():
channel.close()
raise SocketTimeoutException(
f"{cmd} failed to complete within {timeout}s"
)

logger.info(f"Command completed on {datetime.datetime.now()}")
return channel.recv_exit_status()

except socket.timeout as terr:
logger.error(f"Command failed to execute within {timeout} seconds.")
raise SocketTimeoutException(terr)
Expand Down

0 comments on commit 9464db6

Please sign in to comment.