Skip to content

Commit

Permalink
Merge pull request red-hat-storage#3895 from psathyan/fixSSHTimeout
Browse files Browse the repository at this point in the history
Fix long_running method timeout
  • Loading branch information
mergify[bot] authored Jul 17, 2024
2 parents 1116f48 + 9464db6 commit 93d0509
Showing 1 changed file with 16 additions and 3 deletions.
19 changes: 16 additions & 3 deletions ceph/ceph.py
Original file line number Diff line number Diff line change
Expand Up @@ -1270,14 +1270,14 @@ def __connect(self):
self.__outage_start_time = None
return
except Exception as e:
logger.warning(f"Connection outage to {self.ip_address}: \n{e}")
logger.warning(f"Error in connecting to {self.ip_address}: \n{e}")
if not self.__outage_start_time:
self.__outage_start_time = datetime.datetime.now()

logger.debug("Retrying connection in 10 seconds")
sleep(10)

raise AssertionError(f"Unable to establish connection with {self.ip_address}")
raise AssertionError(f"Unable to establish a connection with {self.ip_address}")

@property
def transport(self):
Expand Down Expand Up @@ -1518,13 +1518,19 @@ def long_running(self, **kw):

try:
channel = ssh().get_transport().open_session()
channel.settimeout(timeout)

# A mismatch between stdout and stderr streams have been observed hence
# combining the streams and logging is set to debug level only.
channel.set_combine_stderr(True)
channel.exec_command(cmd)

# Channel timeout is per operation. Waiting for a specified duration or
# command execution completion.
if timeout:
_end_time = datetime.datetime.now() + datetime.timedelta(
seconds=timeout
)

while not channel.exit_status_ready():
# Prevent high resource consumption
sleep(2)
Expand All @@ -1537,8 +1543,15 @@ def long_running(self, **kw):

data = channel.recv(1024)

if not timeout and _end_time > datetime.datetime.now():
channel.close()
raise SocketTimeoutException(
f"{cmd} failed to complete within {timeout}s"
)

logger.info(f"Command completed on {datetime.datetime.now()}")
return channel.recv_exit_status()

except socket.timeout as terr:
logger.error(f"Command failed to execute within {timeout} seconds.")
raise SocketTimeoutException(terr)
Expand Down

0 comments on commit 93d0509

Please sign in to comment.