forked from ekfriis/farmout
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdag-wrapper.sh
executable file
·74 lines (50 loc) · 2.03 KB
/
dag-wrapper.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/bin/sh
JOBID=$1
RETURN=$2
shift 2
# The rest of the arguments are name=value pairs.
export $*
tmpdir=$(mktemp -d "farmout-dag-mail-XXXXXXXX")
trap "rm -rf $tmpdir" ERR EXIT
cat <<EOF > "$tmpdir/mail"
Hello-
The following farmout DAG workflow on $(hostname) returned ${RETURN}:
${OUTPUT_DAG_FILE}
You can find the final output in:
${OUTPUT_DIR}
The status of all of the jobs in this workflow can be found here:
${OUTPUT_DAG_FILE}.status
A list of files with the error output of your failed jobs can be found at the
bottom of this message.
If your DAG workflow exited with a status other than 0, you can resubmit only
the jobs that failed (and their dependencies) with the following command:
\$ farmoutAnalysisJobs --rescue-dag-file ${OUTPUT_DAG_FILE}
If some of the failed jobs are unrecoverable, you'll need to edit the DAG
workflow file and mark the unrecoverable jobs 'NOOP'. If the jobs that depend on
the unrecoverable job require its output, you may have to modify the dependent
jobs' submit files as well.
For more information about DAG workflows, please see the Condor manual:
http://www.cs.wisc.edu/condor/manual/v7.6/2_10DAGMan_Applications.html
For questions about farmout and its DAG support or for help debugging your jobs,
please contact [email protected].
Thanks!
EOF
pattern=$(sed -nre '/STATUS_ERROR/!d; s/^JOB (.*) STATUS_.*/\1/p' < "${OUTPUT_DAG_FILE}.status" | tr '\n' '|')
pattern="($pattern)"
sed -nre "s;^JOB $pattern (.*)/submit;\2/\1.err;p" < "${OUTPUT_DAG_FILE}" > "$tmpdir/errs"
if [ -s "$tmpdir/errs" ]; then
(
echo
echo "The following files contain the error output for your failed jobs:
echo
cat "$tmpdir/errs"
) >> "$tmpdir/mail"
fi
if [ "${RETURN}" -eq 0 ]; then
completed="successfully"
else [ "${RETURN}" -eq 1 ]; then
completed="with errors"
else
mail -s "farmout DAG completed $completed (${RETURN})" ${USER} < "$tmpdir/mail"
echo "$(date +%Y/%m%d %H:%M:%S) **** sent DAG notification to ${USER}" >> "${OUTPUT_DAG_FILE}.dagman.out"
exit ${RETURN}