Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Job manager #9

Open
wants to merge 10 commits into
base: shifter
Choose a base branch
from
Open
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ RUN useradd -u $VC3USERID vc3user
RUN apt-get update && \
apt-get install -y vim-tiny && \
pip install --upgrade pip && \
pip install htcondor retrying
pip install htcondor==8.9.1 retrying

COPY CHANGES.rst README.rst setup.py /code/
COPY reana_job_controller/version.py /code/reana_job_controller/
Expand Down
30 changes: 19 additions & 11 deletions files/job_wrapper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ populate(){
if [ ! -x "$_CONDOR_SCRATCH_DIR/parrot_static_run" ]; then get_parrot; fi
mkdir -p "$_CONDOR_SCRATCH_DIR/$REANA_WORKFLOW_DIR"
local parent="$(dirname $REANA_WORKFLOW_DIR)"
$_CONDOR_SCRATCH_DIR/parrot_static_run -T 30 cp --no-clobber -r "/chirp/CONDOR/$REANA_WORKFLOW_DIR" "$_CONDOR_SCRATCH_DIR/$parent"
$_CONDOR_SCRATCH_DIR/parrot_static_run -T 4 cp --no-clobber -r "/chirp/CONDOR/$REANA_WORKFLOW_DIR" "$_CONDOR_SCRATCH_DIR/$parent"
}

find_module(){
Expand All @@ -51,7 +51,7 @@ find_module(){
find_container(){
declare -a search_list=("singularity" "shifter")
declare -a found_list=()
local default="singularity"
local default="shifter"
local cont_found=false


Expand Down Expand Up @@ -101,20 +101,24 @@ find_container(){
# Setting up cmd line args for singularity
# Print's stdout the argument line for running singularity utilizing
setup_singularity(){
# TODO: Cleanup calling of this function

# Send cache to $SCRATCH or to the condor scratch directory
# otherwise
if [ -z "$SCRATCH" ]; then
export SINGULARITY_CACHEDIR="$_CONDOR_SCRATCH_DIR"
CONTAINER_ENV="SINGULARITY_CACHEDIR=\"\$_CONDOR_SCRATCH_DIR\""
else
export SINGULARITY_CACHEDIR="$SCRATCH"
CONTAINER_ENV="SINGULARITY_CACHEDIR=\"\$SCRATCH\""
fi
echo "exec -B $REANA_WORKFLOW_DIR:/reana docker://$DOCKER_IMG"

CNTR_ARGUMENTS="exec -B ./$REANA_WORKFLOW_DIR:$REANA_WORKFLOW_DIR docker://$DOCKER_IMG"

}

# Setting up shifter. Pull the docker_img into the shifter image gateway
# and dump required arguments into stdout to be collected by a function call
setup_shifter(){
#TODO: Cleanup calling of this function
# Check for shifterimg
if [[ ! $(command -v shifterimg 2>/dev/null) ]]; then
echo "Error: shifterimg not found..." >&2
Expand All @@ -127,8 +131,8 @@ setup_shifter(){
exit 127
fi

# Put arguments into stdout to collect
echo "--image=docker:${DOCKER_IMG} --volume=${REANA_WORKFLOW_DIR}:/reana -- "
# Put arguments into stdout to collect.
echo "--image=docker:${DOCKER_IMG} --volume=$(pwd -P)/reana:/reana -- "
}

# Setting up the arguments to pass to a container technology.
Expand All @@ -141,9 +145,9 @@ setup_container(){
local container=$(basename "$CONTAINER_PATH")

if [ "$container" == "singularity" ]; then
cntr_arguments=$(setup_singularity)
setup_singularity
elif [ "$container" == "shifter" ]; then
cntr_arguments=$(setup_shifter)
CNTR_ARGUMENTS=$(setup_shifter)
else
echo "Error: Unrecognized container: $(basename $CONTAINER_PATH)" >&2
exit 127
Expand Down Expand Up @@ -171,7 +175,11 @@ setup_container
# temporary wrapper file named tmpjob.
tmpjob=$(mktemp -p .)
chmod +x $tmpjob
echo "$CONTAINER_PATH" "$cntr_arguments" "bash -c \"cd /reana; ${@:3}\"" > $tmpjob
if command -v aprun; then
echo -n "aprun -b -n 1 -- " > $tmpjob
fi

echo "$CONTAINER_ENV" "$CONTAINER_PATH" "$CNTR_ARGUMENTS" "${@:3} " >> $tmpjob
bash $tmpjob
res=$?
rm $tmpjob
Expand Down Expand Up @@ -202,6 +210,6 @@ fi

parent="$(dirname $REANA_WORKFLOW_DIR)"
# TODO: Check for parrot exit code and propagate it in case of errors.
./parrot_static_run -T 30 cp --no-clobber -r "$_CONDOR_SCRATCH_DIR/$REANA_WORKFLOW_DIR" "/chirp/CONDOR/$parent"
./parrot_static_run -T 4 cp --no-clobber -r "$_CONDOR_SCRATCH_DIR/$REANA_WORKFLOW_DIR" "/chirp/CONDOR/$parent"

exit $res
1 change: 1 addition & 0 deletions reana_job_controller/htcondor_job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ def execute(self):
for key, value in self.env_vars.items():
job_env += '; {0}={1}'.format(key, value)
sub['environment'] = job_env
sub['on_exit_remove'] = '(ExitBySignal == False) && ((ExitCode == 0) || (ExitCode !=0 && NumJobStarts > {0}))'.format(MAX_JOB_RESTARTS)
clusterid = submit(self.schedd, sub)
logging.warning("Submitting job clusterid: {0}".format(clusterid))
return str(clusterid)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@
'apispec>=0.21.0,<0.40',
'Flask>=0.11',
'kubernetes>=9.0.0',
'marshmallow>=2.13',
'marshmallow>2.13.0,<=2.20.1',
'reana-commons[kubernetes]>=0.5.0,<0.6.0',
'reana-db>=0.5.0,<0.6.0',
'urllib3<1.25,>=1.21.1',
'htcondor',
'htcondor==8.9.1',
]

packages = find_packages()
Expand Down