Skip to content

Commit

Permalink
Add resource leaks integ test
Browse files Browse the repository at this point in the history
Signed-off-by: Judy Ng <[email protected]>
  • Loading branch information
judysng committed Jan 12, 2024
1 parent 70ebb8c commit fa13db4
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 0 deletions.
7 changes: 7 additions & 0 deletions tests/integration-tests/configs/develop.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,10 @@ test-suites:
- regions: ["eu-west-1"]
oss: ["alinux2"]
schedulers: ["slurm"]
resource_leaks:
test_resource_leaks.py::test_resource_leaks:
dimensions:
- regions: [ "us-east-1" ]
instances: [ "t2.micro" ]
oss: [ "alinux2" ]
schedulers: [ "slurm" ]
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import logging

import pytest
from assertpy import assert_that
from remote_command_executor import RemoteCommandExecutor
from utils import get_compute_nodes_instance_ips

from tests.common.assertions import assert_head_node_is_running


@pytest.mark.usefixtures("instance", "os", "scheduler")
def test_resource_leaks(
region,
pcluster_config_reader,
s3_bucket_factory,
clusters_factory,
test_datadir,
scheduler_commands_factory,
):
total_sleep_time = 1800 # 30 minutes
loop_sleep_time = 300 # 5 minutes

cluster_config = pcluster_config_reader()
cluster = clusters_factory(cluster_config)
assert_head_node_is_running(region, cluster)
remote_command_executor = RemoteCommandExecutor(cluster)

compute_node_instance_ip = get_compute_nodes_instance_ips(cluster.name, region)[0]
lsof_cmd = f"ssh -q {compute_node_instance_ip} 'sudo lsof -p $(pgrep computemgtd) | wc -l'"
sleep_cmd = f"ssh -q {compute_node_instance_ip} 'sleep {loop_sleep_time}'"

logging.info("Checking the number of file descriptors...")
initial_no_file_descs = remote_command_executor.run_remote_command(lsof_cmd).stdout
logging.info("Initial number of file descriptors: %s", initial_no_file_descs)

curr_no_file_descs = initial_no_file_descs
for _ in range(total_sleep_time // loop_sleep_time):
remote_command_executor.run_remote_command(sleep_cmd)
curr_no_file_descs = remote_command_executor.run_remote_command(lsof_cmd).stdout
logging.info("Number of file descriptors after sleeping: %s", curr_no_file_descs)

assert_that(initial_no_file_descs).is_equal_to(curr_no_file_descs)
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
Image:
Os: {{ os }}
HeadNode:
InstanceType: {{ instance }}
Networking:
SubnetId: {{ public_subnet_id }}
Ssh:
KeyName: {{ key_name }}
Imds:
Secured: {{ imds_secured }}
Scheduling:
Scheduler: slurm
SlurmQueues:
- Name: queue-1
ComputeResources:
- Name: compute-resource-1
Instances:
- InstanceType: c5.large
MinCount: 1
MaxCount: 1
Networking:
SubnetIds:
- {{ private_subnet_id }}

0 comments on commit fa13db4

Please sign in to comment.