Skip to content

Commit f07a018

Browse files
authored
Move sub-slicing validation to kueue-manager (#756)
* feat: move sub-slicing validation to kueue-manager * style: remove unused import * style: remove unused import * feat: refactor to use free function
1 parent d1906ac commit f07a018

File tree

4 files changed

+56
-18
lines changed

4 files changed

+56
-18
lines changed

src/xpk/commands/workload.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@
2626
get_cluster_credentials,
2727
setup_k8s_env,
2828
)
29-
from ..core.commands import run_command_with_updates, run_commands, run_command_for_value
30-
from ..core.kueue_manager import KueueManager, SUB_SLICE_TOPOLOGY_NAME
29+
from ..core.commands import run_command_with_updates, run_commands
30+
from ..core.kueue_manager import KueueManager, has_sub_slicing_enabled
3131
from ..core.config import (VERTEX_TENSORBOARD_FEATURE_FLAG, XPK_CURRENT_VERSION)
3232
from ..core.docker_container import (
3333
get_main_container_docker_image,
@@ -683,17 +683,14 @@ def workload_create(args) -> None:
683683

684684

685685
def _validate_sub_slicing_availability():
686-
return_code, value = run_command_for_value(
687-
command='kubectl get topology', task='Get defined topologies'
688-
)
689-
686+
return_code, sub_slicing_enabled = has_sub_slicing_enabled()
690687
if return_code != 0:
691688
xpk_print(
692689
'Error: Unable to validate sub-slicing support on a given cluster.'
693690
)
694691
xpk_exit(1)
695692

696-
if SUB_SLICE_TOPOLOGY_NAME not in value:
693+
if not sub_slicing_enabled:
697694
xpk_print(
698695
'Error: Cluster has not been not set up for Sub-slicing. Please enable'
699696
' --sub-slicing in "cluster create" command first.'

src/xpk/commands/workload_test.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ def test_validate_sub_slicing_availability_exits_when_getting_topologies_fails(
7171
xpk_print: MagicMock, mocker
7272
):
7373
mocker.patch(
74-
'xpk.commands.workload.run_command_for_value',
75-
return_value=(1, ''),
74+
'xpk.commands.workload.has_sub_slicing_enabled',
75+
return_value=(1, None),
7676
)
7777
with pytest.raises(SystemExit):
7878
_validate_sub_slicing_availability()
@@ -87,8 +87,8 @@ def test_validate_sub_slicing_availability_exits_when_subslicing_topology_is_not
8787
xpk_print: MagicMock, mocker
8888
):
8989
mocker.patch(
90-
'xpk.commands.workload.run_command_for_value',
91-
return_value=(0, ''),
90+
'xpk.commands.workload.has_sub_slicing_enabled',
91+
return_value=(0, False),
9292
)
9393
with pytest.raises(SystemExit):
9494
_validate_sub_slicing_availability()
@@ -103,8 +103,8 @@ def test_validate_sub_slicing_availability_exits_when_kueue_version_cannot_be_de
103103
xpk_print: MagicMock, mocker
104104
):
105105
mocker.patch(
106-
'xpk.commands.workload.run_command_for_value',
107-
return_value=(0, 'sub-slice-topology'),
106+
'xpk.commands.workload.has_sub_slicing_enabled',
107+
return_value=(0, True),
108108
)
109109
mocker.patch(
110110
'xpk.commands.workload.KueueManager.get_installed_kueue_version',
@@ -120,8 +120,8 @@ def test_validate_sub_slicing_availability_exits_when_kueue_version_does_not_mee
120120
xpk_print: MagicMock, mocker
121121
):
122122
mocker.patch(
123-
'xpk.commands.workload.run_command_for_value',
124-
return_value=(0, 'sub-slice-topology'),
123+
'xpk.commands.workload.has_sub_slicing_enabled',
124+
return_value=(0, True),
125125
)
126126
mocker.patch(
127127
'xpk.commands.workload.KueueManager.get_installed_kueue_version',
@@ -137,8 +137,8 @@ def test_validate_sub_slicing_availability_does_nothing_when_cluster_is_correctl
137137
mocker,
138138
):
139139
mocker.patch(
140-
'xpk.commands.workload.run_command_for_value',
141-
return_value=(0, 'sub-slice-topology'),
140+
'xpk.commands.workload.has_sub_slicing_enabled',
141+
return_value=(0, True),
142142
)
143143
mocker.patch(
144144
'xpk.commands.workload.KueueManager.get_installed_kueue_version',

src/xpk/core/kueue_manager.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,3 +421,14 @@ def __update_kueue_resources_if_necessary(self) -> int:
421421
if return_code != 0:
422422
xpk_print(f"{task} returned ERROR {return_code}")
423423
return return_code
424+
425+
426+
def has_sub_slicing_enabled() -> tuple[int, bool | None]:
427+
return_code, value = run_command_for_value(
428+
command="kubectl get topology", task="Get defined topologies"
429+
)
430+
431+
if return_code != 0:
432+
return return_code, None
433+
434+
return return_code, SUB_SLICE_TOPOLOGY_NAME in value

src/xpk/core/kueue_manager_test.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import yaml
2020
from unittest.mock import MagicMock, patch
2121

22-
from xpk.core.kueue_manager import KueueConfig, KueueManager
22+
from xpk.core.kueue_manager import KueueConfig, KueueManager, has_sub_slicing_enabled
2323
from xpk.core.system_characteristics import AcceleratorType, SystemCharacteristics
2424
from packaging.version import Version
2525

@@ -562,6 +562,36 @@ def _trigger_installation(self, kueue_config: KueueConfig) -> str:
562562
return manifest
563563

564564

565+
@patch("xpk.core.kueue_manager.run_command_for_value")
566+
def test_has_sub_slicing_enabled_returns_exit_code_when_command_fails(
567+
mock_run_for_value,
568+
):
569+
mock_run_for_value.return_value = (1, None)
570+
return_code, result = has_sub_slicing_enabled()
571+
assert return_code == 1
572+
assert result is None
573+
574+
575+
@patch("xpk.core.kueue_manager.run_command_for_value")
576+
def test_has_sub_slicing_enabled_returns_false_when_sub_slicing_topology_is_not_present(
577+
mock_run_for_value,
578+
):
579+
mock_run_for_value.return_value = (0, "")
580+
return_code, result = has_sub_slicing_enabled()
581+
assert return_code == 0
582+
assert result is False
583+
584+
585+
@patch("xpk.core.kueue_manager.run_command_for_value")
586+
def test_has_sub_slicing_enabled_returns_true_when_sub_slicing_topology_is_not_present(
587+
mock_run_for_value,
588+
):
589+
mock_run_for_value.return_value = (0, "sub-slice-topology")
590+
return_code, result = has_sub_slicing_enabled()
591+
assert return_code == 0
592+
assert result is True
593+
594+
565595
T = TypeVar("T")
566596

567597

0 commit comments

Comments
 (0)