Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix scheduler integration tests #517

Merged
merged 2 commits into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 14 additions & 20 deletions python/lib/scheduler/dmod/test/it_RedisBackedJobManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ def test_save_job_1_c(self):
self._job_manager.save_job(job)
original_cpu_count = job.cpu_count
# Then update the record
job._cpu_count += 100
job.cpu_count += 100
self._job_manager.save_job(job)
updated_cpu_count = job.cpu_count
# Get the updated record
Expand Down Expand Up @@ -419,16 +419,16 @@ def test_request_allocations_1_c(self):
# Should be one allocation for single-node
self.assertTrue(isinstance(created_job.allocations, tuple))

# Test request_allocations for a job with a single-node allocation paradigm gets back a tuple with one node
def test_request_allocations_1_d(self):
""" Test request_allocations for job w/ single-node paradigm gets back tuple with only one allocation host. """
example_index = 0
expected_job, created_job = self._exec_job_manager_create_from_expected(example_index)
# We will need to adjust the status
created_job.status = JobStatus(JobExecPhase.MODEL_EXEC, JobExecStep.AWAITING_ALLOCATION)
self._job_manager.request_allocations(created_job)
allocations = created_job.allocations
# Should be one allocation for single-node
self.assertEqual(len(allocations), 1)
# Should be only one unique host value among all allocation for single-node
self.assertEqual(1, len(set(alloc.hostname for alloc in allocations)))

# Test request_allocations for a job with a single-node allocation paradigm gets back a proper allocation of cpus
def test_request_allocations_1_e(self):
Expand All @@ -438,8 +438,7 @@ def test_request_allocations_1_e(self):
created_job.status = JobStatus(JobExecPhase.MODEL_EXEC, JobExecStep.AWAITING_ALLOCATION)
self._job_manager.request_allocations(created_job)
allocations = created_job.allocations
# Should be one allocation for single-node, with right number of cpus
self.assertEqual(allocations[0].cpu_count, created_job.cpu_count)
self.assertEqual(created_job.cpu_count, sum(alloc.cpu_count for alloc in allocations))

# Test request_allocations for a job with a single-node allocation paradigm gets back a proper allocation of memory
def test_request_allocations_1_f(self):
Expand All @@ -449,8 +448,7 @@ def test_request_allocations_1_f(self):
created_job.status = JobStatus(JobExecPhase.MODEL_EXEC, JobExecStep.AWAITING_ALLOCATION)
self._job_manager.request_allocations(created_job)
allocations = created_job.allocations
# Should be one allocation for single-node, with right number of cpus
self.assertEqual(allocations[0].memory, created_job.memory_size)
self.assertEqual(created_job.memory_size, sum(alloc.memory for alloc in allocations))

# Test request_allocations for a job with a fill-nodes allocation paradigm succeeds
def test_request_allocations_2_a(self):
Expand All @@ -473,14 +471,15 @@ def test_request_allocations_2_b(self):

# Test request_allocations for a job with a fill-nodes allocation paradigm gets back a tuple with one node
def test_request_allocations_2_c(self):
""" Test request_allocations for job w/ fill-nodes gets back allocations on same host when they all fit. """
example_index = 2
expected_job, created_job = self._exec_job_manager_create_from_expected(example_index)
# We will need to adjust the status
created_job.status = JobStatus(JobExecPhase.MODEL_EXEC, JobExecStep.AWAITING_ALLOCATION)
self._job_manager.request_allocations(created_job)
allocations = created_job.allocations
# Should be one allocation for fill-nodes
self.assertEqual(len(allocations), 1)
# Should be only one unique host value among all allocation for fill-nodes in this example (they should all fit)
self.assertEqual(1, len(set(alloc.hostname for alloc in allocations)))

# Test request_allocations for a job with a fill-nodes allocation paradigm gets back a proper allocation of cpus
def test_request_allocations_2_d(self):
Expand All @@ -490,8 +489,7 @@ def test_request_allocations_2_d(self):
created_job.status = JobStatus(JobExecPhase.MODEL_EXEC, JobExecStep.AWAITING_ALLOCATION)
self._job_manager.request_allocations(created_job)
allocations = created_job.allocations
# Should be one allocation for fill-nodes, with right number of cpus
self.assertEqual(allocations[0].cpu_count, created_job.cpu_count)
self.assertEqual(created_job.cpu_count, sum(alloc.cpu_count for alloc in allocations))

# Test request_allocations for a job with a fill-nodes allocation paradigm gets back a proper allocation of memory
def test_request_allocations_2_e(self):
Expand All @@ -501,8 +499,7 @@ def test_request_allocations_2_e(self):
created_job.status = JobStatus(JobExecPhase.MODEL_EXEC, JobExecStep.AWAITING_ALLOCATION)
self._job_manager.request_allocations(created_job)
allocations = created_job.allocations
# Should be one allocation for fill-nodes, with right number of cpus
self.assertEqual(allocations[0].memory, created_job.memory_size)
self.assertEqual(created_job.memory_size, sum(alloc.memory for alloc in allocations))

# Test request_allocations for a job with a round-robin allocation paradigm succeeds
def test_request_allocations_3_a(self):
Expand Down Expand Up @@ -530,7 +527,7 @@ def test_request_allocations_3_c(self):
created_job.status = JobStatus(JobExecPhase.MODEL_EXEC, JobExecStep.AWAITING_ALLOCATION)
self._job_manager.request_allocations(created_job)
allocations = created_job.allocations
self.assertEqual(len(allocations), 3)
self.assertEqual(3, len(set(alloc.hostname for alloc in allocations)))

# Test request_allocations for a job with a fill-nodes allocation paradigm gets back a proper allocation of cpus
def test_request_allocations_3_d(self):
Expand All @@ -540,10 +537,7 @@ def test_request_allocations_3_d(self):
created_job.status = JobStatus(JobExecPhase.MODEL_EXEC, JobExecStep.AWAITING_ALLOCATION)
self._job_manager.request_allocations(created_job)
allocations = created_job.allocations
cpu_total = 0
for a in allocations:
cpu_total += a.cpu_count
self.assertEqual(cpu_total, created_job.cpu_count)
self.assertEqual(created_job.cpu_count, sum(alloc.cpu_count for alloc in allocations))

# Test request_allocations for a job with a fill-nodes allocation paradigm gets back a proper allocation of memory
def test_request_allocations_3_e(self):
Expand All @@ -559,7 +553,7 @@ def test_request_allocations_3_e(self):
#for a in allocations:
# mem_total += a.memory
mem_total = allocations[0].memory
self.assertEqual(mem_total, created_job.memory_size)
self.assertEqual(created_job.memory_size, sum(alloc.memory for alloc in allocations))

def test_release_allocations_1_a(self):
example_index = 0
Expand Down
20 changes: 15 additions & 5 deletions python/lib/scheduler/dmod/test/setup_it_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,25 @@ it_redis_startup()

do_setup()
{

# Make sure the necessary Docker networks have been set up, as the tests will fail otherwise
docker_dev_init_swarm_network ${DOCKER_MPI_NET_NAME:=mpi-net} \
${DOCKER_MPI_NET_SUBNET:?Need to set MPI net subnet value for testing within .test_env} \
${DOCKER_MPI_NET_GATEWAY:?Need to set MPI net gateway value for testing within .test_env} \
${DOCKER_MPI_NET_VXLAN_ID:=4097}
if [ -z "${DOCKER_MPI_NET_VXLAN_ID:-}" ]; then
docker_dev_init_swarm_network ${DOCKER_MPI_NET_NAME:=mpi-net} \
${DOCKER_MPI_NET_SUBNET:?Need to set MPI net subnet value for testing within .test_env} \
${DOCKER_MPI_NET_GATEWAY:?Need to set MPI net gateway value for testing within .test_env} \
${DOCKER_MPI_NET_DRIVER:-overlay}
else
docker_dev_init_swarm_network ${DOCKER_MPI_NET_NAME:=mpi-net} \
${DOCKER_MPI_NET_SUBNET:?Need to set MPI net subnet value for testing within .test_env} \
${DOCKER_MPI_NET_GATEWAY:?Need to set MPI net gateway value for testing within .test_env} \
${DOCKER_MPI_NET_DRIVER:=macvlan} \
${DOCKER_MPI_NET_VXLAN_ID}
fi
# Then the requests-net
docker_dev_init_swarm_network ${DOCKER_REQUESTS_NET_NAME:=requests-net} \
${DOCKER_REQUESTS_NET_SUBNET:?Need to set requests net subnet value for testing within .test_env} \
${DOCKER_REQUESTS_NET_GATEWAY:?Need to set requests net gateway value for testing within .test_env}
${DOCKER_REQUESTS_NET_GATEWAY:?Need to set requests net gateway value for testing within .test_env} \
${DOCKER_REQUESTS_NET_DRIVER:-overlay}
# Need Docker container with Redis instance
it_redis_startup
}
Expand Down
Loading