From 5b82fd7d379067f59414efce2140c2219ce80adb Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Tue, 21 May 2024 08:11:56 +0000 Subject: [PATCH 1/7] Quote the command correctly when source_bashrc is not set --- sky/utils/command_runner.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sky/utils/command_runner.py b/sky/utils/command_runner.py index 3aa87eda138..2f71e4a6592 100644 --- a/sky/utils/command_runner.py +++ b/sky/utils/command_runner.py @@ -184,8 +184,9 @@ def _get_command_to_run( # cluster by 1 second. # sourcing ~/.bashrc is not required for internal executions command += [ - 'true && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore' - f' && ({cmd})' + shlex.quote( + 'true && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore' + f' && ({cmd})') ] if not separate_stderr: command.append('2>&1') From 9822e4465a2f9f31cb5e7ddfb6ce1a1adc89a5a9 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Tue, 21 May 2024 08:44:33 +0000 Subject: [PATCH 2/7] Remove unnecessary source bashrc --- sky/backends/cloud_vm_ray_backend.py | 5 ++--- sky/provision/instance_setup.py | 11 +++++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 6d2447fe89b..6ff9731033e 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -3175,7 +3175,8 @@ def _setup_node(node_id: int) -> None: f'{create_script_code} && {setup_cmd}', log_path=setup_log_path, process_stream=False, - source_bashrc=True, + # We do not source bashrc for setup, since bashrc is sourced + # in the script already. ) def error_message() -> str: @@ -3724,7 +3725,6 @@ def tail_managed_job_logs(self, process_stream=False, ssh_mode=command_runner.SshMode.INTERACTIVE, stdin=subprocess.DEVNULL, - source_bashrc=True, ) def tail_serve_logs(self, handle: CloudVmRayResourceHandle, @@ -3762,7 +3762,6 @@ def tail_serve_logs(self, handle: CloudVmRayResourceHandle, process_stream=False, ssh_mode=command_runner.SshMode.INTERACTIVE, stdin=subprocess.DEVNULL, - source_bashrc=True, ) def teardown_no_lock(self, diff --git a/sky/provision/instance_setup.py b/sky/provision/instance_setup.py index 1e5e6285fef..6de916b2d1c 100644 --- a/sky/provision/instance_setup.py +++ b/sky/provision/instance_setup.py @@ -197,10 +197,10 @@ def _setup_node(runner: command_runner.CommandRunner, log_path: str): cmd, stream_logs=False, log_path=log_path, - require_outputs=True, - # Installing depencies requires source bashrc to access the PATH - # in bashrc. - source_bashrc=True) + require_outputs=True + # No need to source bashrc, as it should be sourced in the setup + # commands already. + ) retry_cnt = 0 while returncode == 255 and retry_cnt < _MAX_RETRY: # Got network connection issue occur during setup. This could @@ -214,8 +214,7 @@ def _setup_node(runner: command_runner.CommandRunner, log_path: str): returncode, stdout, stderr = runner.run(cmd, stream_logs=False, log_path=log_path, - require_outputs=True, - source_bashrc=True) + require_outputs=True) if not returncode: break From 4b045ea4c92dbb38bad552400985b1c9d5ae7957 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Tue, 21 May 2024 08:50:48 +0000 Subject: [PATCH 3/7] format --- sky/provision/instance_setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/provision/instance_setup.py b/sky/provision/instance_setup.py index 6de916b2d1c..150ce9c3846 100644 --- a/sky/provision/instance_setup.py +++ b/sky/provision/instance_setup.py @@ -200,7 +200,7 @@ def _setup_node(runner: command_runner.CommandRunner, log_path: str): require_outputs=True # No need to source bashrc, as it should be sourced in the setup # commands already. - ) + ) retry_cnt = 0 while returncode == 255 and retry_cnt < _MAX_RETRY: # Got network connection issue occur during setup. This could From 1e2576eeec079d961c665c30054cc83e8437d0a3 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Tue, 21 May 2024 09:23:51 +0000 Subject: [PATCH 4/7] Fix setup script for conda --- sky/provision/instance_setup.py | 11 ++++++----- sky/skylet/constants.py | 11 +++++++---- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/sky/provision/instance_setup.py b/sky/provision/instance_setup.py index 150ce9c3846..2e07f026616 100644 --- a/sky/provision/instance_setup.py +++ b/sky/provision/instance_setup.py @@ -197,10 +197,10 @@ def _setup_node(runner: command_runner.CommandRunner, log_path: str): cmd, stream_logs=False, log_path=log_path, - require_outputs=True - # No need to source bashrc, as it should be sourced in the setup - # commands already. - ) + require_outputs=True, + # Installing dependencies requires source bashrc to access + # conda. + source_bashrc=True) retry_cnt = 0 while returncode == 255 and retry_cnt < _MAX_RETRY: # Got network connection issue occur during setup. This could @@ -214,7 +214,8 @@ def _setup_node(runner: command_runner.CommandRunner, log_path: str): returncode, stdout, stderr = runner.run(cmd, stream_logs=False, log_path=log_path, - require_outputs=True) + require_outputs=True, + source_bashrc=True) if not returncode: break diff --git a/sky/skylet/constants.py b/sky/skylet/constants.py index 578629ea3e2..0f2d7540007 100644 --- a/sky/skylet/constants.py +++ b/sky/skylet/constants.py @@ -91,15 +91,18 @@ # AWS's Deep Learning AMI's default conda environment. CONDA_INSTALLATION_COMMANDS = ( 'which conda > /dev/null 2>&1 || ' - '(wget -nc https://repo.anaconda.com/miniconda/Miniconda3-py310_23.11.0-2-Linux-x86_64.sh -O Miniconda3-Linux-x86_64.sh && ' # pylint: disable=line-too-long + '{ wget -nc https://repo.anaconda.com/miniconda/Miniconda3-py310_23.11.0-2-Linux-x86_64.sh -O Miniconda3-Linux-x86_64.sh && ' # pylint: disable=line-too-long 'bash Miniconda3-Linux-x86_64.sh -b && ' 'eval "$(~/miniconda3/bin/conda shell.bash hook)" && conda init && ' - 'conda config --set auto_activate_base true); ' - 'grep "# >>> conda initialize >>>" ~/.bashrc || conda init;' + 'conda config --set auto_activate_base true && ' + # Use $(echo ~) instead of ~ to avoid the error "no such file or directory". + # Also, not using $HOME to avoid the error HOME variable not set. + f'echo "$(echo ~)/miniconda3/bin/python" > {SKY_PYTHON_PATH_FILE}; }}; ' + 'grep "# >>> conda initialize >>>" ~/.bashrc || ' + '{ conda init && source ~/.bashrc; };' '(type -a python | grep -q python3) || ' 'echo \'alias python=python3\' >> ~/.bashrc;' '(type -a pip | grep -q pip3) || echo \'alias pip=pip3\' >> ~/.bashrc;' - 'source ~/.bashrc;' # Writes Python path to file if it does not exist or the file is empty. f'[ -s {SKY_PYTHON_PATH_FILE} ] || which python3 > {SKY_PYTHON_PATH_FILE};') From f25f623548350a88d7d21ca94b6361506d53b131 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Tue, 21 May 2024 09:29:46 +0000 Subject: [PATCH 5/7] Add comment --- sky/utils/command_runner.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sky/utils/command_runner.py b/sky/utils/command_runner.py index 2f71e4a6592..ee9e75860ec 100644 --- a/sky/utils/command_runner.py +++ b/sky/utils/command_runner.py @@ -432,10 +432,12 @@ def run( cmd, process_stream, separate_stderr, - # A hack to remove the following bash warnings (twice): + # A hack to remove the following SSH warning+bash warnings (twice): + # Warning: Permanently added 'xx.xx.xx.xx' to the list of known hosts. # bash: cannot set terminal process group # bash: no job control in this shell - skip_lines=5 if source_bashrc else 0, + # When not source_bashrc, the bash warning will only show once. + skip_lines=5 if source_bashrc else 3, source_bashrc=source_bashrc) command = base_ssh_command + [shlex.quote(command_str)] From 68f7ebd95e38f823d0152e3e1881d05e69dcfb4c Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Tue, 21 May 2024 17:22:11 +0000 Subject: [PATCH 6/7] format --- sky/utils/command_runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sky/utils/command_runner.py b/sky/utils/command_runner.py index ee9e75860ec..0b8ffc74171 100644 --- a/sky/utils/command_runner.py +++ b/sky/utils/command_runner.py @@ -433,7 +433,8 @@ def run( process_stream, separate_stderr, # A hack to remove the following SSH warning+bash warnings (twice): - # Warning: Permanently added 'xx.xx.xx.xx' to the list of known hosts. + # Warning: Permanently added 'xx.xx.xx.xx' to the list of known + # hosts. # bash: cannot set terminal process group # bash: no job control in this shell # When not source_bashrc, the bash warning will only show once. From ac0a57a06e57b1af94d77904d989c5610ee8a7d6 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Wed, 22 May 2024 09:40:31 +0000 Subject: [PATCH 7/7] address comments --- sky/utils/command_runner.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sky/utils/command_runner.py b/sky/utils/command_runner.py index 0b8ffc74171..3ed69d0e2a1 100644 --- a/sky/utils/command_runner.py +++ b/sky/utils/command_runner.py @@ -184,9 +184,8 @@ def _get_command_to_run( # cluster by 1 second. # sourcing ~/.bashrc is not required for internal executions command += [ - shlex.quote( - 'true && export OMP_NUM_THREADS=1 PYTHONWARNINGS=ignore' - f' && ({cmd})') + shlex.quote('true && export OMP_NUM_THREADS=1 ' + f'PYTHONWARNINGS=ignore && ({cmd})') ] if not separate_stderr: command.append('2>&1') @@ -433,8 +432,7 @@ def run( process_stream, separate_stderr, # A hack to remove the following SSH warning+bash warnings (twice): - # Warning: Permanently added 'xx.xx.xx.xx' to the list of known - # hosts. + # Warning: Permanently added 'xx.xx.xx.xx' to the list of known... # bash: cannot set terminal process group # bash: no job control in this shell # When not source_bashrc, the bash warning will only show once.