Skip to content

Commit

Permalink
Fixing ngen worker entrypoint and image scripts.
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbartel authored and aaraney committed May 21, 2024
1 parent 1686247 commit 26eae74
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 19 deletions.
16 changes: 8 additions & 8 deletions docker/main/ngen/funcs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ init_script_mpi_vars()

init_ngen_executable_paths()
{
NGEN_SERIAL_EXECUTABLE="/ngen/ngen/cmake_build_serial/ngen"
NGEN_PARALLEL_EXECUTABLE="/ngen/ngen/cmake_build_parallel/ngen"
NGEN_SERIAL_EXECUTABLE="/dmod/bin/ngen-serial"
NGEN_PARALLEL_EXECUTABLE="/dmod/bin/ngen-parallel"
# This will be symlinked to the parallel one currently
NGEN_EXECUTABLE="/ngen/ngen/cmake_build/ngen"
NGEN_EXECUTABLE="/dmod/bin/ngen"
}

check_for_dataset_dir()
Expand Down Expand Up @@ -154,27 +154,27 @@ ngen_sanity_checks_and_derived_init()
# Run some sanity checks
# Use complement of valid range like this in a few places to catch non-integer values
if ! [ "${MPI_NODE_COUNT:-1}" -gt 0 ] 2>/dev/null; then
echo "Error: invalid value '${MPI_NODE_COUNT}' given for MPI node count" > 2>&1
>&2 echo "Error: invalid value '${MPI_NODE_COUNT}' given for MPI node count"
exit 1
fi
if ! [ "${WORKER_INDEX:-0}" -ge 0 ] 2>/dev/null; then
echo "Error: invalid value '${WORKER_INDEX}' given for MPI worker index/rank" > 2>&1
>&2 echo "Error: invalid value '${WORKER_INDEX}' given for MPI worker index/rank"
exit 1
fi

# Assume that any of these being present implies the job will run via multiple MPI processes
if [ -n "${MPI_NODE_COUNT:-}" ] || [ -n "${MPI_HOST_STRING:-}" ] || [ -n "${WORKER_INDEX:-}" ]; then
# ... and as such, they all must be present
if [ -z "${MPI_HOST_STRING:-}" ]; then
echo "Error: MPI host string not provided for job that will utilize MPI" > 2>&1
>&2 echo "Error: MPI host string not provided for job that will utilize MPI"
exit 1
fi
if [ -z "${MPI_NODE_COUNT:-}" ]; then
echo "Error: MPI node count not provided for job that will utilize MPI" > 2>&1
>&2 echo "Error: MPI node count not provided for job that will utilize MPI"
exit 1
fi
if [ -z "${WORKER_INDEX:-}" ]; then
echo "Error: MPI worker index not provided for job that will utilize MPI" > 2>&1
>&2 echo "Error: MPI worker index not provided for job that will utilize MPI"
exit 1
fi
# Also, require a partitioning config for any MPI job
Expand Down
22 changes: 11 additions & 11 deletions docker/main/ngen/ngen_entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,43 +4,43 @@
while [ ${#} -gt 0 ]; do
case "${1}" in
--config-dataset)
CONFIG_DATASET_NAME="${2:?}"
declare -x CONFIG_DATASET_NAME="${2:?}"
shift
;;
--host-string)
MPI_HOST_STRING="${2:?}"
declare -x MPI_HOST_STRING="${2:?}"
shift
;;
--hydrofabric-dataset)
HYDROFABRIC_DATASET_NAME="${2:?}"
declare -x HYDROFABRIC_DATASET_NAME="${2:?}"
shift
;;
--job-id)
JOB_ID="${2:?}"
declare -x JOB_ID="${2:?}"
shift
;;
--node-count)
MPI_NODE_COUNT="${2:?}"
declare -x MPI_NODE_COUNT="${2:?}"
shift
;;
--output-dataset)
OUTPUT_DATASET_NAME="${2:?}"
declare -x OUTPUT_DATASET_NAME="${2:?}"
shift
;;
--partition-dataset)
PARTITION_DATASET_NAME="${2:?}"
declare -x PARTITION_DATASET_NAME="${2:?}"
shift
;;
--worker-index)
WORKER_INDEX="${2:?}"
declare -x WORKER_INDEX="${2:?}"
shift
;;
esac
shift
done

# Get some universally applicable functions and constants
source ./funcs.sh
source /ngen/funcs.sh

ngen_sanity_checks_and_derived_init
init_script_mpi_vars
Expand All @@ -49,8 +49,8 @@ init_ngen_executable_paths
# Move to the output dataset mounted directory
cd ${OUTPUT_DATASET_DIR:?Output dataset directory not defined}
#Needed for routing
if [ ! -e /dmod/dataset/experiment_output ]; then
ln -s $(pwd) /dmod/dataset/experiment_output
if [ ! -e /dmod/datasets/linked_job_output ]; then
ln -s $(pwd) /dmod/datasets/linked_job_output
fi

# We can allow worker index to not be supplied when executing serially
Expand Down

0 comments on commit 26eae74

Please sign in to comment.