Skip to content

Commit

Permalink
the waiter is buggy for a nodegroup
Browse files Browse the repository at this point in the history
The waiter will continue waiting when the nodes are ready.
I had pointed this out and wrote a custom funciton and it
got replaced by the waiter at some point. :/ this will
revert it back, but I need to test it first./

Signed-off-by: vsoch <[email protected]>
  • Loading branch information
vsoch committed Dec 5, 2023
1 parent 27a3ff1 commit 9d068ce
Showing 1 changed file with 16 additions and 5 deletions.
21 changes: 16 additions & 5 deletions kubescaler/scaler/aws/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,22 @@ def _create_nodegroup(self, node_group, nodegroup_name):
if node_group is None:
raise ValueError("Could not create nodegroup")

# DO NOT USE THE WAITER or uncomment, it is buggy and does not work.
# self.wait_for_nodegroup(nodegroup_name)
self.wait_for_nodes()

# Retrieve the same metadata if we had retrieved it
return self.eks.describe_nodegroup(
clusterName=self.cluster_name, nodegroupName=nodegroup_name
)

def wait_for_nodegroup(self, nodegroup_name):
"""
Wait for the nodegroup to create.
In practice, this waiter is very buggy. It's easier sometimes
to query the nodes and check their status is Ready.
"""
try:
print(f"Waiting for {nodegroup_name} nodegroup...")
waiter = self.eks.get_waiter("nodegroup_active")
Expand All @@ -788,11 +804,6 @@ def _create_nodegroup(self, node_group, nodegroup_name):
print(f"Waiting for nodegroup creation exceeded wait time: {e}")
time.sleep(180)

# Retrieve the same metadata if we had retrieved it
return self.eks.describe_nodegroup(
clusterName=self.cluster_name, nodegroupName=nodegroup_name
)

def delete_stack(self, stack_name):
"""
Delete a stack and wait for it to be deleted
Expand Down

0 comments on commit 9d068ce

Please sign in to comment.