Skip to content

Commit

Permalink
fix: move to Blocked state when HugePages are not available (#69)
Browse files Browse the repository at this point in the history
  • Loading branch information
dariofaccin authored Jan 3, 2024
1 parent bea95b6 commit 6649241
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 20 deletions.
24 changes: 12 additions & 12 deletions lib/charms/kubernetes_charm_libraries/v0/hugepages_volumes_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def _hugepages_volumes_func_from_config(self) -> list[HugePagesVolume]:

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 2
LIBPATCH = 3

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -215,11 +215,11 @@ def _statefulset_contains_requested_volumes(
Returns:
bool: Whether the StatefulSet contains the given volumes.
"""
if not statefulset_spec.template.spec.volumes:
if not statefulset_spec.template.spec.volumes: # type: ignore[union-attr]
return False
return all(
[
requested_volume in statefulset_spec.template.spec.volumes
requested_volume in statefulset_spec.template.spec.volumes # type: ignore[union-attr] # noqa E501
for requested_volume in requested_volumes
]
)
Expand All @@ -243,7 +243,7 @@ def _pod_contains_requested_volumemounts(
container = self._get_container(container_name=container_name, containers=containers)
return all(
[
requested_volumemount in container.volumeMounts
requested_volumemount in container.volumeMounts # type: ignore[operator]
for requested_volumemount in requested_volumemounts
]
)
Expand All @@ -267,15 +267,15 @@ def _pod_resources_are_set(
container = self._get_container(container_name=container_name, containers=containers)
if requested_resources.limits:
for limit, value in requested_resources.limits.items():
if not container.resources.limits:
if not container.resources.limits: # type: ignore[union-attr]
return False
if container.resources.limits.get(limit) != value:
if container.resources.limits.get(limit) != value: # type: ignore[union-attr]
return False
if requested_resources.requests:
for request, value in requested_resources.requests.items():
if not container.resources.requests:
if not container.resources.requests: # type: ignore[union-attr]
return False
if container.resources.requests.get(request) != value:
if container.resources.requests.get(request) != value: # type: ignore[union-attr]
return False
return True

Expand Down Expand Up @@ -368,7 +368,7 @@ def list_volumemounts(self, statefulset_name: str, container_name: str) -> list[
)
containers: Iterable[Container] = statefulset.spec.template.spec.containers # type: ignore[attr-defined] # noqa: E501
container = self._get_container(container_name=container_name, containers=containers)
return container.volumeMounts
return container.volumeMounts # type: ignore[return-value]

def list_container_resources(
self, statefulset_name: str, container_name: str
Expand Down Expand Up @@ -398,7 +398,7 @@ def list_container_resources(
Container
] = statefulset.spec.template.spec.containers # type: ignore[attr-defined] # noqa: E501
container = self._get_container(container_name=container_name, containers=containers)
return container.resources
return container.resources # type: ignore[return-value]


class KubernetesHugePagesPatchCharmLib(Object):
Expand Down Expand Up @@ -672,8 +672,8 @@ def _generate_resource_requirements_to_be_replaced(self) -> ResourceRequirements
if current_resources.requests
else {}
)
new_limits = dict(new_limits.items() | additional_resources.limits.items())
new_requests = dict(new_requests.items() | additional_resources.requests.items())
new_limits = dict(new_limits.items() | additional_resources.limits.items()) # type: ignore[union-attr] # noqa E501
new_requests = dict(new_requests.items() | additional_resources.requests.items()) # type: ignore[union-attr] # noqa E501
new_resources = ResourceRequirements(
limits=new_limits, requests=new_requests, claims=current_resources.claims
)
Expand Down
37 changes: 31 additions & 6 deletions lib/charms/kubernetes_charm_libraries/v0/multus.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def _on_config_changed(self, event: EventBase):

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 10
LIBPATCH = 12


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -351,7 +351,7 @@ def patch_statefulset(
)
)
if privileged:
container.securityContext.privileged = True
container.securityContext.privileged = True # type: ignore[union-attr]
statefulset_delta = StatefulSet(
spec=StatefulSetSpec(
selector=statefulset.spec.selector, # type: ignore[attr-defined]
Expand Down Expand Up @@ -489,12 +489,12 @@ def _pod_is_patched(
bool
"""
if not self._annotations_contains_multus_networks(
annotations=pod.metadata.annotations,
annotations=pod.metadata.annotations, # type: ignore[arg-type,union-attr]
network_annotations=network_annotations,
):
return False
if not self._container_security_context_is_set(
containers=pod.spec.containers,
containers=pod.spec.containers, # type: ignore[union-attr]
container_name=container_name,
cap_net_admin=cap_net_admin,
privileged=privileged,
Expand Down Expand Up @@ -537,12 +537,29 @@ def _container_security_context_is_set(
"""
for container in containers:
if container.name == container_name:
if cap_net_admin and "NET_ADMIN" not in container.securityContext.capabilities.add:
if cap_net_admin and "NET_ADMIN" not in container.securityContext.capabilities.add: # type: ignore[operator,union-attr] # noqa E501
return False
if privileged and not container.securityContext.privileged:
if privileged and not container.securityContext.privileged: # type: ignore[union-attr] # noqa E501
return False
return True

def multus_is_available(self) -> bool:
"""Check whether Multus is enabled leveraging existence of NAD custom resource.
Returns:
bool: Whether Multus is enabled
"""
try:
list(self.client.list(res=NetworkAttachmentDefinition, namespace=self.namespace))
except httpx.HTTPStatusError as e:
if e.response.status_code == 404:
return False
else:
raise KubernetesMultusError(
"Unexpected outcome when checking for Multus availability"
)
return True


class KubernetesMultusCharmLib(Object):
"""Class to be instantiated by charms requiring Multus networking."""
Expand Down Expand Up @@ -726,3 +743,11 @@ def _on_remove(self, event: RemoveEvent) -> None:
def delete_pod(self) -> None:
"""Delete the pod."""
self.kubernetes.delete_pod(self._pod)

def multus_is_available(self) -> bool:
"""Check whether Multus is enabled leveraging existence of NAD custom resource.
Returns:
bool: Whether Multus is enabled
"""
return self.kubernetes.multus_is_available()
33 changes: 32 additions & 1 deletion src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from lightkube.core.client import Client
from lightkube.models.core_v1 import ServicePort, ServiceSpec
from lightkube.models.meta_v1 import ObjectMeta
from lightkube.resources.core_v1 import Service
from lightkube.resources.core_v1 import Node, Service
from ops import RemoveEvent
from ops.charm import CharmBase, CharmEvents
from ops.framework import EventBase, EventSource
Expand All @@ -55,6 +55,7 @@
PROMETHEUS_PORT = 8080
PFCP_PORT = 8805
REQUIRED_CPU_EXTENSIONS = ["avx2", "rdrand"]
REQUIRED_CPU_EXTENSIONS_HUGEPAGES = ["pdpe1gb"]

# The default field manager set when using kubectl to create resources
DEFAULT_FIELD_MANAGER = "controller"
Expand Down Expand Up @@ -135,6 +136,7 @@ def __init__(self, *args):
hugepages_volumes_func=self._volumes_request_func_from_config,
refresh_event=self.on.hugepages_volumes_config_changed,
)
self.framework.observe(self.on.update_status, self._on_config_changed)
self.framework.observe(self.on.config_changed, self._on_config_changed)
self.framework.observe(self.on.bessd_pebble_ready, self._on_bessd_pebble_ready)
self.framework.observe(self.on.config_storage_attached, self._on_bessd_pebble_ready)
Expand Down Expand Up @@ -367,6 +369,16 @@ def _on_config_changed(self, event: EventBase):
"""Handler for config changed events."""
if not self.unit.is_leader():
return
if self._hugepages_is_enabled():
if not self._cpu_is_compatible_for_hugepages():
raise IncompatibleCPUError(
"\nCPU is not compatible!\n"
"Please use a CPU that has the following capabilities: "
f"{', '.join(REQUIRED_CPU_EXTENSIONS + REQUIRED_CPU_EXTENSIONS_HUGEPAGES)}"
)
if not self._hugepages_are_available():
self.unit.status = BlockedStatus("Not enough HugePages available")
return
if invalid_configs := self._get_invalid_configs():
self.unit.status = BlockedStatus(
f"The following configurations are not valid: {invalid_configs}"
Expand Down Expand Up @@ -788,6 +800,25 @@ def _get_cpu_extensions() -> list[str]:
del cpu_flags[0]
return cpu_flags

def _cpu_is_compatible_for_hugepages(self) -> bool:
return all(
required_extension in self._get_cpu_extensions()
for required_extension in REQUIRED_CPU_EXTENSIONS_HUGEPAGES
)

@staticmethod
def _hugepages_are_available() -> bool:
"""Checks whether HugePages are available in the K8S nodes.
Returns:
bool: Whether HugePages are available in the K8S nodes
"""
client = Client()
nodes = client.list(Node)
if not nodes:
return False
return all([node.status.allocatable.get("hugepages-1Gi", "0") >= "2Gi" for node in nodes]) # type: ignore[union-attr] # noqa E501

def _get_access_nad_config(self) -> Dict[Any, Any]:
"""Get access interface NAD config.
Expand Down
80 changes: 79 additions & 1 deletion tests/unit/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from charms.kubernetes_charm_libraries.v0.multus import ( # type: ignore[import]
NetworkAttachmentDefinition,
)
from lightkube.models.core_v1 import ServicePort, ServiceSpec
from lightkube.models.core_v1 import Node, NodeStatus, ServicePort, ServiceSpec
from lightkube.models.meta_v1 import ObjectMeta
from lightkube.resources.core_v1 import Service
from ops import testing
Expand Down Expand Up @@ -770,6 +770,84 @@ def test_when_remove_then_external_service_is_deleted(self, patch_client):
namespace=self.namespace,
)

@patch("charm.check_output")
@patch("charm.Client", new=Mock)
@patch(f"{HUGEPAGES_LIBRARY_PATH}.KubernetesHugePagesPatchCharmLib.is_patched")
def test_given_cpu_not_supporting_required_hugepages_instructions_when_hugepages_enabled_then_incompatiblecpuerror_is_raised( # noqa: E501
self, patch_hugepages_is_patched, patched_check_output
):
patch_hugepages_is_patched.return_value = False
patched_check_output.return_value = b"Flags: ssse3 fma cx16 rdrand"

with self.assertRaises(IncompatibleCPUError):
self.harness.update_config(key_values={"enable-hugepages": True})

@patch("charm.check_output")
@patch("lightkube.core.client.GenericSyncClient", new=Mock)
@patch("lightkube.core.client.Client.list")
@patch(f"{HUGEPAGES_LIBRARY_PATH}.KubernetesHugePagesPatchCharmLib.is_patched")
def test_given_cpu_supporting_required_hugepages_instructions_when_hugepages_enabled_then_charm_goes_to_waiting_status( # noqa: E501
self, patch_hugepages_is_patched, patch_list, patched_check_output
):
patch_hugepages_is_patched.return_value = True
patched_check_output.return_value = b"Flags: avx2 ssse3 fma cx16 rdrand pdpe1gb"
patch_list.side_effect = [
[Node(status=NodeStatus(allocatable={"hugepages-1Gi": "3Gi"}))],
[],
]

self.harness.update_config(key_values={"enable-hugepages": True})

self.assertEqual(
self.harness.model.unit.status,
WaitingStatus("Waiting for bessd container to be ready"),
)

@patch("charm.check_output")
@patch("lightkube.core.client.GenericSyncClient", new=Mock)
@patch("lightkube.core.client.Client.list")
@patch(f"{HUGEPAGES_LIBRARY_PATH}.KubernetesHugePagesPatchCharmLib.is_patched")
def test_given_cpu_supporting_required_hugepages_instructions_and_not_available_hugepages_when_hugepages_enabled_then_charm_goes_to_blocked_status( # noqa: E501
self, patch_hugepages_is_patched, patch_list, patched_check_output
):
patch_hugepages_is_patched.return_value = False
patched_check_output.return_value = b"Flags: avx2 ssse3 fma cx16 rdrand pdpe1gb"
patch_list.return_value = [Node(status=NodeStatus(allocatable={"hugepages-1Gi": "1Gi"}))]

self.harness.update_config(key_values={"enable-hugepages": True})

self.assertEqual(
self.harness.model.unit.status, BlockedStatus("Not enough HugePages available")
)

@patch("charm.check_output")
@patch("lightkube.core.client.GenericSyncClient", new=Mock)
@patch("lightkube.core.client.Client.list")
@patch(f"{HUGEPAGES_LIBRARY_PATH}.KubernetesHugePagesPatchCharmLib.is_patched")
def test_given_hugepages_not_available_then_hugepages_available_when_update_status_then_charm_goes_to_waiting_status( # noqa: E501
self, patch_hugepages_is_patched, patch_list, patched_check_output
):
patch_hugepages_is_patched.return_value = True
patched_check_output.return_value = b"Flags: avx2 ssse3 fma cx16 rdrand pdpe1gb"
patch_list.side_effect = [
[Node(status=NodeStatus(allocatable={"hugepages-1Gi": "1Gi"}))],
[Node(status=NodeStatus(allocatable={"hugepages-1Gi": "2Gi"}))],
[],
]

self.harness.update_config(key_values={"enable-hugepages": True})

self.assertEqual(
self.harness.model.unit.status, BlockedStatus("Not enough HugePages available")
)

self.harness.charm.on.update_status.emit()

self.assertEqual(
self.harness.model.unit.status,
WaitingStatus("Waiting for bessd container to be ready"),
)

@patch(f"{HUGEPAGES_LIBRARY_PATH}.KubernetesHugePagesPatchCharmLib.is_patched")
def test_given_default_config_when_network_attachment_definitions_from_config_is_called_then_no_interface_mtu_specified_in_nad( # noqa: E501
self,
Expand Down

0 comments on commit 6649241

Please sign in to comment.