Skip to content

Commit

Permalink
Stop making symlinks in ~/.blah_jobproxy_dir/. HTCONDOR-520
Browse files Browse the repository at this point in the history
To allow proxy refresh to work when the blahp makes a modified copy of
the proxy on submit, it creates a symlink to the "live" proxy file under
~/.blah_jobproxy_dir/, named with the job id. These symlinks are
frequently not cleaned up, and it's hard to guarantee that they are
always cleaned up.

The name for the modified proxy file is always the name of the original
file plus one of several well-known suffixes. We already assume that the
proxy refresh command will use the same proxy filename as the original
submit command. So instead of finding the live proxy file via a symlink
under HOME, we can check for the existence of the filename we would have
created at submit time.
  • Loading branch information
JaimeFrey committed May 26, 2021
1 parent a45daa9 commit 60ac754
Show file tree
Hide file tree
Showing 10 changed files with 54 additions and 186 deletions.
4 changes: 4 additions & 0 deletions src/BLClient.c
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,10 @@ main(int argc, char *argv[])

fgets(buffer, MAX_LINE-1, stdin);

/* The pbs/lsf_status.sh script now ignores the proxy removal
* message after the '/', since we no longer make symlinks in
* ~/.blah_jobproxy_dir.
*/
if(strstr(buffer,"/")==NULL){
if ((cp = strrchr (buffer, '\n')) != NULL){
*cp = '\0';
Expand Down
8 changes: 8 additions & 0 deletions src/BLParserLSF.c
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,10 @@ LookupAndSend(int m_sock)
}else{
t_wnode=make_message("WorkerNode=%s;",j2wn[id]);
}
/* This proxy removal message is now ignored by the
* lsf_status.sh script, since we no longer make symlinks in
* ~/.blah_jobproxy_dir.
*/
if(j2js[id] && ((strcmp(j2js[id],"3")==0) || (strcmp(j2js[id],"4")==0))){
pr_removal="Yes";
} else {
Expand Down Expand Up @@ -887,6 +891,10 @@ LookupAndSend(int m_sock)
}else{
t_wnode=make_message("WorkerNode=%s;",j2wn[id]);
}
/* This proxy removal message is now ignored by the
* lsf_status.sh script, since we no longer make symlinks in
* ~/.blah_jobproxy_dir.
*/
if(j2js[id] && ((strcmp(j2js[id],"3")==0) || (strcmp(j2js[id],"4")==0))){
pr_removal="Yes";
} else {
Expand Down
8 changes: 8 additions & 0 deletions src/BLParserPBS.c
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,10 @@ LookupAndSend(int m_sock)
pthread_mutex_lock(&write_mutex);
if(id>0 && j2js[id]!=NULL){

/* This proxy removal message is now ignored by the
* lsf_status.sh script, since we no longer make symlinks in
* ~/.blah_jobproxy_dir.
*/
if(j2js[id] && ((strcmp(j2js[id],"3")==0) || (strcmp(j2js[id],"4")==0))){
pr_removal="Yes";
} else {
Expand Down Expand Up @@ -1030,6 +1034,10 @@ LookupAndSend(int m_sock)
sysfatal("can't malloc out_buf in LookupAndSend: %r");
}

/* This proxy removal message is now ignored by the
* lsf_status.sh script, since we no longer make symlinks in
* ~/.blah_jobproxy_dir.
*/
if(j2js[id] && ((strcmp(j2js[id],"3")==0) || (strcmp(j2js[id],"4")==0))){
pr_removal="Yes";
} else {
Expand Down
11 changes: 0 additions & 11 deletions src/scripts/blah_common_submit_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -283,8 +283,6 @@ function bls_parse_submit_options ()
bls_opt_proxyrenew="no"
fi

bls_proxy_dir=~/.blah_jobproxy_dir

bls_opt_workdir=$PWD

#default values for polling interval and min proxy lifetime
Expand Down Expand Up @@ -518,7 +516,6 @@ function bls_setup_all_files ()
if [ "x$bls_opt_stgproxy" == "xyes" ] ; then
bls_proxy_local_file=${bls_opt_workdir}"/"`basename "$bls_opt_proxy_string"`;
[ -r "$bls_proxy_local_file" -a -f "$bls_proxy_local_file" ] || bls_proxy_local_file="$bls_opt_proxy_string"
[ -r "$bls_proxy_local_file" -a -f "$bls_proxy_local_file" ] || bls_proxy_local_file=/tmp/x509up_u`id -u`
if [ -r "$bls_proxy_local_file" -a -f "$bls_proxy_local_file" ] ; then
bls_proxy_remote_file=${bls_tmp_name}.proxy
bls_test_shared_dir "$bls_proxy_local_file"
Expand Down Expand Up @@ -888,12 +885,4 @@ function bls_wrap_up_submit ()
cd $bls_opt_temp_dir
# DEBUG: cp $bls_tmp_file /tmp
rm -f $bls_tmp_file
if [ "x$job_registry" == "x" ]; then
# Create a softlink to proxy file for proxy renewal
if [ -r "$bls_proxy_local_file" -a -f "$bls_proxy_local_file" ] ; then
[ -d "$bls_proxy_dir" ] || mkdir $bls_proxy_dir
ln -s $bls_proxy_local_file $bls_proxy_dir/$jobID.proxy
fi
fi
}
7 changes: 0 additions & 7 deletions src/scripts/condor_status.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
# limitations under the License.
#

proxy_dir=~/.blah_jobproxy_dir

. `dirname $0`/blah_load_config.sh

if [ "x$job_registry" != "x" ] ; then
Expand Down Expand Up @@ -196,11 +194,6 @@ function make_ad {
local exit_by_signal=$(echo $line | awk -F ',' '{print $8}')
local code_or_signal=$(echo $line | awk -F ',' '{print $9}')

# Clean up proxy renewal links if applicable
if [ "$status" == "3" -o "$status" == "4" ]; then
/bin/rm -f $proxy_dir/$job.proxy.norenew 2>/dev/null
fi

echo -n "[BatchjobId=\"$job\";JobStatus=$status;RemoteSysCpu=${remote_sys_cpu:-0};RemoteUserCpu=${remote_user_cpu:-0};BytesSent=${bytes_sent:-0};BytesRecvd=${bytes_recvd:-0};RemoteWallClockTime=${remote_wall_clock_time:-0};"
if [ "$status" == "4" ] ; then
if [ "$exit_by_signal" == "0" ] ; then
Expand Down
10 changes: 0 additions & 10 deletions src/scripts/condor_submit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -278,14 +278,4 @@ fi
# Clean temporary files -- There only temp file is the one we submit
rm -f $submit_file

# Create a softlink to proxy file for proxy renewal - local renewal
# of limited proxy only.

if [ "x$job_registry" == "x" ]; then
if [ -r "$bls_opt_proxy_string" -a -f "$bls_opt_proxy_string" ] ; then
[ -d "$bls_proxy_dir" ] || mkdir "$bls_proxy_dir"
ln -s "$bls_opt_proxy_string" "$bls_proxy_dir/$jobID.proxy.norenew"
fi
fi

exit $return_code
14 changes: 1 addition & 13 deletions src/scripts/lsf_status.sh
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ if [ "x$getcreamport" == "xyes" ] ; then
exit $retcode
fi

proxy_dir=~/.blah_jobproxy_dir
pars=$*

for reqfull in $pars ; do
Expand Down Expand Up @@ -185,10 +184,6 @@ END {
print "ExitCode=" exitcode ";"
}
print "]"
if (jobstatus == 3 || jobstatus == 4) {
system("rm " proxyDir "/" jobId ".proxy 2>/dev/null")
}
}
'
`
Expand Down Expand Up @@ -268,7 +263,7 @@ END {

job_data=`grep "$requested" $logs`

result=`echo "$job_data" | awk -v jobId=$requested -v proxyDir=$proxy_dir '
result=`echo "$job_data" | awk -v jobId=$requested '
BEGIN {
rex_queued = "\"JOB_NEW\" \"[0-9\.]+\" [0-9]+ " jobId
rex_running = "\"JOB_START\" \"[0-9\.]+\" [0-9]+ " jobId
Expand Down Expand Up @@ -356,9 +351,6 @@ END {
}
}
print "]"
if (jobstatus == 3 || jobstatus == 4) {
system("rm " proxyDir "/" jobId ".proxy 2>/dev/null")
}
}
' `

Expand All @@ -371,12 +363,8 @@ END {

if [ "x$usedBLParser" == "xyes" ] ; then

pr_removal=`echo $result | sed -e 's/^.*\///'`
result=`echo $result | sed 's/\/.*//'`
echo "0"$result
if [ "x$pr_removal" == "xYes" ] ; then
rm -f ${proxy_dir}/${requested}.proxy 2>/dev/null
fi
usedBLParser="no"
fi
logs=""
Expand Down
13 changes: 1 addition & 12 deletions src/scripts/pbs_status.sh
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ if [ "x$getcreamport" == "xyes" ] ; then
fi

pars=$*
proxy_dir=~/.blah_jobproxy_dir

for reqfull in $pars ; do
requested=""
Expand Down Expand Up @@ -228,9 +227,6 @@ END {
print "ExitCode=" exitcode ";"
}
print "]"
if (jobstatus == 3 || jobstatus == 4) {
system("rm " proxyDir "/" jobId ".proxy 2>/dev/null")
}
}
'
Expand Down Expand Up @@ -278,7 +274,7 @@ END {
usedBLParser="no"
logs="$logpath/$logfile `find $logpath -type f -newer $logpath/$logfile`"
log_data=`grep "$reqjob" $logs`
result=`echo "$log_data" | awk -v jobId="$reqjob" -v wn="$workernode" -v proxyDir="$proxy_dir" '
result=`echo "$log_data" | awk -v jobId="$reqjob" -v wn="$workernode" '
BEGIN {
rex_queued = jobId ";Job Queued "
rex_running = jobId ";Job Run "
Expand Down Expand Up @@ -335,9 +331,6 @@ END {
print "ExitCode = " exitcode ";"
}
print "]"
if (jobstatus == 3 || jobstatus == 4) {
system("rm " proxyDir "/" jobId ".proxy 2>/dev/null")
}
}
' `

Expand All @@ -350,7 +343,6 @@ END {
fi
fi #close if on pbs_BLParser
if [ "x$usedBLParser" == "xyes" ] ; then
pr_removal=`echo $result | sed -e 's/^.*\///'`
result=`echo $result | sed 's/\/.*//'`

resstatus=`echo $result|sed "s/\[.*JobStatus=\([^;]*\).*/\1/"`;
Expand All @@ -370,9 +362,6 @@ END {
echo "0"$result "Workernode=\"$workernode\";]"
fi

if [ "x$pr_removal" == "xYes" ] ; then
rm -f ${proxy_dir}/${reqjob}.proxy 2>/dev/null
fi
usedBLParser="no"
fi
fi #close of if-else on $pbs_nologaccess
Expand Down
6 changes: 1 addition & 5 deletions src/scripts/slurm_status.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ done
shift `expr $OPTIND - 1`

pars=$*
proxy_dir=~/.blah_jobproxy_dir

for reqfull in $pars ; do
reqjob=`echo $reqfull | sed -e 's/^.*\///'`
Expand All @@ -63,7 +62,7 @@ for reqfull in $pars ; do

result=`${slurm_binpath}/scontrol $cluster_arg show job $reqjob 2>$staterr`
stat_exit_code=$?
result=`echo "$result" | awk -v job_id=$reqjob -v proxy_dir=$proxy_dir '
result=`echo "$result" | awk -v job_id=$reqjob '
BEGIN {
blah_status = 4
slurm_status = ""
Expand Down Expand Up @@ -101,9 +100,6 @@ END {
print "ExitCode=" exit_code ";"
}
print "]\n"
if ( blah_status == 3 || blah_status == 4 ) {
#system( "rm " proxy_dir "/" job_id ".proxy 2>/dev/null" )
}
}
'
`
Expand Down
Loading

0 comments on commit 60ac754

Please sign in to comment.