Skip to content

Commit

Permalink
Merge pull request #5 from RoanKanninga/master
Browse files Browse the repository at this point in the history
cleaning up scripts when pipeline is finished
  • Loading branch information
RoanKanninga authored Jan 5, 2017
2 parents 40dce3a + c271b80 commit d76cda0
Show file tree
Hide file tree
Showing 14 changed files with 395 additions and 70 deletions.
65 changes: 65 additions & 0 deletions automated_RNA_generate_template.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/bin/bash

module load NGS_RNA/VERSIONFROMSTARTPIPELINESCRIPT
module list

HOST=$(hostname)
##Running script for checking the environment variables

ENVIRONMENT="${HOST%%.*}"
TMPDIR=$(basename $(cd ../../ && pwd ))
GROUP=$(basename $(cd ../../../ && pwd ))

PROJECT=$1
RUNID="run01"

WORKDIR="/groups/${GROUP}/${TMPDIR}"
BUILD=$2
SPECIES=$3
PIPELINE=$4

WORKFLOW=${EBROOTNGS_RNA}/workflow_${PIPELINE}.csv

if [ -f .compute.properties ];
then
rm .compute.properties
fi

if [ -f ${GAF}/generatedscripts/${PROJECT}/out.csv ];
then
rm -rf ${GAF}/generatedscripts/${PROJECT}/out.csv
fi

perl ${EBROOTNGS_RNA}/convertParametersGitToMolgenis.pl ${EBROOTNGS_RNA}/parameters.csv > \
${WORKDIR}/generatedscripts/${PROJECT}/parameters.csv

perl ${EBROOTNGS_RNA}/convertParametersGitToMolgenis.pl ${EBROOTNGS_RNA}/parameters.${BUILD}.csv > \
${WORKDIR}/generatedscripts/${PROJECT}/parameters.${BUILD}.csv

perl ${EBROOTNGS_RNA}/convertParametersGitToMolgenis.pl ${EBROOTNGS_RNA}/parameters.${SPECIES}.csv > \
${WORKDIR}/generatedscripts/${PROJECT}/parameters.${SPECIES}.csv

perl ${EBROOTNGS_RNA}/convertParametersGitToMolgenis.pl ${EBROOTNGS_RNA}/parameters.${ENVIRONMENT}.csv > \
${WORKDIR}/generatedscripts/${PROJECT}/parameters.${ENVIRONMENT}.csv

sh ${EBROOTMOLGENISMINCOMPUTE}/molgenis_compute.sh \
-p ${WORKDIR}/generatedscripts/${PROJECT}/parameters.csv \
-p ${WORKDIR}/generatedscripts/${PROJECT}/parameters.${BUILD}.csv \
-p ${WORKDIR}/generatedscripts/${PROJECT}/parameters.${SPECIES}.csv \
-p ${WORKDIR}/generatedscripts/${PROJECT}/parameters.${ENVIRONMENT}.csv \
-p ${WORKDIR}/generatedscripts/${PROJECT}/${PROJECT}.csv \
-p ${EBROOTNGS_RNA}/chromosomes.${SPECIES}.csv \
-w ${EBROOTNGS_RNA}/create_in-house_ngs_projects_workflow.csv \
-rundir ${WORKDIR}/generatedscripts/${PROJECT}/scripts \
--runid ${RUNID} \
--weave \
--generate \
-o "workflowpath=${WORKFLOW};outputdir=scripts/jobs;\
groupname=${GROUP};\
mainParameters=${WORKDIR}/generatedscripts/${PROJECT}/parameters.csv;\
ngsversion=$(module list | grep -o -P 'NGS_RNA(.+)');\
worksheet=${WORKDIR}/generatedscripts/${PROJECT}/${PROJECT}.csv;\
parameters_build=${WORKDIR}/generatedscripts/${PROJECT}/parameters.${BUILD}.csv;\
parameters_species=${WORKDIR}/generatedscripts/${PROJECT}/parameters.${SPECIES}.csv;\
parameters_chromosomes=${EBROOTNGS_RNA}/chromosomes.${SPECIES}.csv;\
parameters_environment=${WORKDIR}/generatedscripts/${PROJECT}/parameters.${ENVIRONMENT}.csv;"
2 changes: 1 addition & 1 deletion automated_generate_template.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ BATCH=$2
##Some error handling
function errorExitandCleanUp()
{
echo "TRAPPED"
echo "${PROJECT} TRAPPED"
if [ ! -f /groups/${GROUP}/${TMPDIRECTORY}/logs/${PROJECT}.generating.failed.mailed ]
then
mailTo="[email protected]"
Expand Down
Empty file modified calculon.cfg
100644 → 100755
Empty file.
138 changes: 138 additions & 0 deletions copyRawDataToCluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#!/bin/bash

set -e
set -u

GAT=$1
groupname=$2
gattacaAddress="${GAT}.gcc.rug.nl"
echo $gattacaAddress
MYINSTALLATIONDIR=$( cd -P "$( dirname "$0" )" && pwd )

##source config file (zinc-finger.gcc.rug.nl.cfg, leucine-zipper.gcc.rug.nl, calculon.hpc.rug.nl OR gattaca.cfg)
myhost=$(hostname)
. ${MYINSTALLATIONDIR}/${groupname}.cfg
. ${MYINSTALLATIONDIR}/${myhost}.cfg
. ${MYINSTALLATIONDIR}/sharedConfig.cfg

### VERVANG DOOR UMCG-ATEAMBOT USER
ssh ${groupname}-ateambot@${gattacaAddress} "ls ${GATTACA}/Samplesheets/*.csv" > ${SAMPLESHEETSDIR}/allSampleSheets_${GAT}.txt

gattacaSamplesheets=()

while read line
do
gattacaSamplesheets+=("${line} ")
done<${SAMPLESHEETSDIR}/allSampleSheets_${GAT}.txt

echo "Logfiles will be written to $LOGDIR"

for line in ${gattacaSamplesheets[@]}
do
echo "working on $line"
csvFile=$(basename $line)
filePrefix="${csvFile%.*}"
LOGGER=${LOGDIR}/${filePrefix}/${filePrefix}.copyToCluster.logger

trap finish HUP INT QUIT TERM EXIT ERR

FINISHED="no"
OLDIFS=$IFS
IFS=_
set $filePrefix
sequencer=$2
run=$3
IFS=$OLDIFS

if ssh ${groupname}-ateambot@${gattacaAddress} ls ${GATTACA}/logs/${filePrefix}_Demultiplexing.finished 1> /dev/null 2>&1
then
### Demultiplexing is finished
if [ ! -d ${LOGDIR}/${filePrefix}/ ]
then
mkdir ${LOGDIR}/${filePrefix}/
fi

printf ""
else
continue;
fi

function finish {
if [ -f ${LOGDIR}/${filePrefix}/${filePrefix}.copyToCluster.locked ]
then
echo "${filePrefix} TRAPPED"
rm ${LOGDIR}/${filePrefix}/${filePrefix}.copyToCluster.locked
exit 1
fi

}

if [ -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToCluster ]
then
continue;
fi

if [ -f ${LOGDIR}/${filePrefix}/${filePrefix}.copyToCluster.locked ]
then
exit 0
fi
touch ${LOGDIR}/${filePrefix}/${filePrefix}.copyToCluster.locked

## Check if samplesheet is copied
copyRawGatToCluster="${groupname}-ateambot@${gattacaAddress}:${GATTACA}/runs/run_${run}_${sequencer}/results/${filePrefix}* ${RAWDATADIR}/$filePrefix"

if [[ ! -f ${SAMPLESHEETSDIR}/$csvFile || ! -f $LOGDIR/${filePrefix}/${filePrefix}.SampleSheetCopied ]]
then
scp ${groupname}-ateambot@${gattacaAddress}:${GATTACA}/Samplesheets/${csvFile} ${SAMPLESHEETSDIR}
touch $LOGDIR/${filePrefix}/${filePrefix}.SampleSheetCopied
fi
## Check if data is already copied to Cluster

if [ ! -d ${RAWDATADIR}/$filePrefix ]
then
mkdir -p ${RAWDATADIR}/${filePrefix}/Info
echo "Copying data to Cluster.." >> $LOGGER
rsync -r -a ${copyRawGatToCluster}
fi


if [[ -d ${RAWDATADIR}/$filePrefix && ! -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToCluster ]]
then
##Compare how many files are on both the servers in the directory
countFilesRawDataDirTmp=$(ls ${RAWDATADIR}/${filePrefix}/${filePrefix}* | wc -l)
countFilesRawDataDirGattaca=$(ssh ${groupname}-ateambot@${gattacaAddress} "ls ${GATTACA}/runs/run_${run}_${sequencer}/results/${filePrefix}* | wc -l ")

rsync -r ${groupname}-ateambot@${gattacaAddress}:/groups/umcg-lab/scr01/sequencers/${filePrefix}/InterOp ${RAWDATADIR}/${filePrefix}/Info/
rsync ${groupname}-ateambot@${gattacaAddress}:/groups/umcg-lab/scr01/sequencers/${filePrefix}/RunInfo.xml ${RAWDATADIR}/${filePrefix}/Info/
rsync ${groupname}-ateambot@${gattacaAddress}:/groups/umcg-lab/scr01/sequencers/${filePrefix}/*unParameters.xml ${RAWDATADIR}/${filePrefix}/Info/

if [ ${countFilesRawDataDirTmp} -eq ${countFilesRawDataDirGattaca} ]
then
cd ${RAWDATADIR}/${filePrefix}/
for i in $(ls *.fq.gz.md5 )
do
if md5sum -c $i
then

awk '{print $2" CHECKED, and is correct"}' $i >> $LOGGER
else
echo "md5sum check failed, the copying will start again" >> $LOGGER
rsync -r -a ${copyRawGatToCluster}
echo -e "data copied to Cluster \n" >> $LOGGER

fi
done
touch $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToCluster
touch ${filePrefix}.md5sums.checked

else
echo "Retry: Copying data to Cluster" >> $LOGGER
rsync -r -a ${copyRawGatToCluster}
echo "data copied to Cluster" >> $LOGGER
fi
fi
rm ${LOGDIR}/${filePrefix}/${filePrefix}.copyToCluster.locked
done

trap - EXIT
exit 0
27 changes: 15 additions & 12 deletions copyRawDataToDiagnosticsCluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,6 @@ do
filePrefix="${csvFile%.*}"
LOGGER=${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.logger

if [ ! -d ${LOGDIR}/${filePrefix}/ ]
then
mkdir ${LOGDIR}/${filePrefix}/
fi

function finish {
if [ -f ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked ]
then
echo "TRAPPED"
rm ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked
fi
}
trap finish HUP INT QUIT TERM EXIT ERR

FINISHED="no"
Expand All @@ -58,11 +46,26 @@ do
if ssh umcg-ateambot@${gattacaAddress} ls ${GATTACA}/logs/${filePrefix}_Demultiplexing.finished 1> /dev/null 2>&1
then
### Demultiplexing is finished
if [ ! -d ${LOGDIR}/${filePrefix}/ ]
then
mkdir ${LOGDIR}/${filePrefix}/
fi

printf ""
else
continue;
fi

function finish {
if [ -f ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked ]
then
echo "${filePrefix} TRAPPED"
rm ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked
exit 1
fi

}

if [ -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToDiagnosticsCluster ]
then
continue;
Expand Down
52 changes: 40 additions & 12 deletions copyRawDataToPrm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@ myhost=$(hostname)
ls ${SAMPLESHEETSDIR}/*.csv > ${SAMPLESHEETSDIR}/allSampleSheets_DiagnosticsCluster.txt
pipeline="dna"

function finish {
echo "TRAPPED"
rm -f ${LOGDIR}/copyDataToPrm.sh.locked
}
trap finish HUP INT QUIT TERM EXIT ERR

ARR=()
Expand Down Expand Up @@ -79,12 +75,24 @@ do

done<${LOGDIR}/TMP/${filePrefix}.unique.projects

function finish {
echo "${filePrefix} TRAPPED"
rm -f ${LOGDIR}/copyDataToPrm.sh.locked
}


copyRawDiagnosticsClusterToPrm=""
makeRawDataDir=""

copyRawDiagnosticsClusterToPrm="${RAWDATADIR}/${filePrefix}/* ${groupname}[email protected]:${RAWDATADIRPRM}/${filePrefix}"
makeRawDataDir=$(ssh ${groupname}[email protected] "sh ${RAWDATADIRPRM}/../checkRawData.sh ${RAWDATADIRPRM} ${filePrefix}")

if [[ -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToDiagnosticsCluster && ! -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToPrm ]]
if [ ${myhost} == "calculon" ]
then
copyRawDiagnosticsClusterToPrm="${RAWDATADIR}/${filePrefix}/* ${RAWDATADIRPRM}/${filePrefix}"
makeRawDataDir=$(sh ${RAWDATADIRPRM}/../checkRawData.sh ${RAWDATADIRPRM} ${filePrefix})
else
copyRawDiagnosticsClusterToPrm="${RAWDATADIR}/${filePrefix}/* ${groupname}[email protected]:${RAWDATADIRPRM}/${filePrefix}"
makeRawDataDir=$(ssh ${groupname}[email protected] "sh ${RAWDATADIRPRM}/../checkRawData.sh ${RAWDATADIRPRM} ${filePrefix}")
fi
if [[ -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToCluster && ! -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToPrm ]]
then
echo "working on ${filePrefix}"
countFilesRawDataDirTmp=$(ls ${RAWDATADIR}/${filePrefix}/${filePrefix}* | wc -l)
Expand All @@ -96,19 +104,39 @@ do
fi
if [ "${makeRawDataDir}" == "t" ]
then
countFilesRawDataDirPrm=$(ssh ${groupname}[email protected] "ls ${RAWDATADIRPRM}/${filePrefix}/${filePrefix}* | wc -l")
countFilesRawDataDirPrm=""
if [ ${myhost} == "calculon" ]
then
countFilesRawDataDirPrm=$(ls ${RAWDATADIRPRM}/${filePrefix}/${filePrefix}* | wc -l)
else
countFilesRawDataDirPrm=$(ssh ${groupname}[email protected] "ls ${RAWDATADIRPRM}/${filePrefix}/${filePrefix}* | wc -l")
fi
if [ ${countFilesRawDataDirTmp} -eq ${countFilesRawDataDirPrm} ]
then
COPIEDTOPRM=$(ssh ${groupname}[email protected] "sh ${RAWDATADIRPRM}/../check.sh ${RAWDATADIRPRM} ${filePrefix}")
COPIEDTOPRM=""
if [ ${myhost} == "calculon" ]
then
COPIEDTOPRM=$(sh ${RAWDATADIRPRM}/../check.sh ${RAWDATADIRPRM} ${filePrefix})
else
COPIEDTOPRM=$(ssh ${groupname}[email protected] "sh ${RAWDATADIRPRM}/../check.sh ${RAWDATADIRPRM} ${filePrefix}")
fi

if [[ "${COPIEDTOPRM}" == *"FAILED"* ]]
then
echo "md5sum check failed, the copying will start again" >> ${LOGGER}
rsync -r -av ${copyRawDiagnosticsClusterToPrm} >> $LOGGER 2>&1
echo "copy failed" >> $LOGDIR/${filePrefix}/${filePrefix}.failed
elif [[ "${COPIEDTOPRM}" == *"PASS"* ]]
then
scp ${SAMPLESHEETSDIR}/${csvFile} ${groupname}[email protected]:${RAWDATADIRPRM}/${filePrefix}/
scp ${SAMPLESHEETSDIR}/${csvFile} ${groupname}[email protected]:${SAMPLESHEETSPRMDIR}
if [ ${myhost} == "calculon" ]
then
scp ${SAMPLESHEETSDIR}/${csvFile} ${groupname}-dm@localhost:${RAWDATADIRPRM}/${filePrefix}/
scp ${SAMPLESHEETSDIR}/${csvFile} ${groupname}-dm@localhost:${SAMPLESHEETSPRMDIR}
else
scp ${SAMPLESHEETSDIR}/${csvFile} ${groupname}[email protected]:${RAWDATADIRPRM}/${filePrefix}/
scp ${SAMPLESHEETSDIR}/${csvFile} ${groupname}[email protected]:${SAMPLESHEETSPRMDIR}

fi
echo "finished copying data to calculon" >> ${LOGGER}

echo "finished with rawdata" >> ${LOGDIR}/${filePrefix}/${filePrefix}.copyToPrm.logger
Expand Down
Empty file modified leucine-zipper.gcc.rug.nl.cfg
100644 → 100755
Empty file.
6 changes: 3 additions & 3 deletions mailError.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ do
elif [ "${groupname}" == "umcg-gd" ]
then
echo "mailTo is umcg-gd"
if [ -f /groups/umcg-gd/${tmpDirectory}/logs/mailinglistDiagnostiek.txt ]
if [ -f /groups/umcg-gd/${tmpDirectory}/logs/mailinglistDiagnostiekCrash.txt ]
then
mailTo=$(cat /groups/umcg-gd/${tmpDirectory}/logs/mailinglistDiagnostiek.txt)
mailTo=$(cat /groups/umcg-gd/${tmpDirectory}/logs/mailinglistDiagnostiekCrash.txt)
else
echo "mailingListDiagnostiek.txt bestaat niet!!"
echo "mailingListDiagnostiekCrash.txt bestaat niet!!"
exit 0
fi
fi
Expand Down
11 changes: 11 additions & 0 deletions pipelineFinished.sh
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,17 @@ do
exit 0
fi
fi
cd ${PROJECTSDIR}/${projectName}/*/jobs/
zip -gr ${PROJECTSDIR}/${projectName}/*/jobs//allJobs.zip *.err
zip -gr ${PROJECTSDIR}/${projectName}/*/jobs//allJobs.zip *.out
zip -gr ${PROJECTSDIR}/${projectName}/*/jobs//allJobs.zip *.sh.finished
zip -gr ${PROJECTSDIR}/${projectName}/*/jobs//allJobs.zip *.env
zip -gr ${PROJECTSDIR}/${projectName}/*/jobs//allJobs.zip *.sh
zip -gr ${PROJECTSDIR}/${projectName}/*/jobs//allJobs.zip molgenis.*

echo "all files in the jobs directory are now zipped into one file"

rm ${PROJECTSDIR}/${projectName}/*/jobs/*{err,out,sh.finished,env,sh,CORRECT}
printf "The results can be found: ${PROJECTSDIR}/${projectName} \n\nCheers from the GCC :)"| mail -s "NGS_DNA pipeline is finished for project ${projectName} on `date +%d/%m/%Y` `date +%H:%M`" ${mailTo}
touch ${LOGDIR}/${projectName}/${projectName}.pipeline.finished.mailed

Expand Down
2 changes: 0 additions & 2 deletions sharedConfig.cfg
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,3 @@ SAMPLESHEETSPRMDIR="/groups/${GROUP}/prm02/rawdata/Samplesheets"
SAMPLESHEETSDIR="${WORKDIR}/Samplesheets"
GATTACA="/groups/${GROUP}/scr01/"
ONTVANGER="[email protected]"
DNA="NGS_DNA/3.2.2-Molgenis-Compute-v16.04.1-Java-1.8.0_45"
RNA="NGS_RNA/3.2.2-Molgenis-Compute-v15.12.4-Java-1.8.0_45"
Loading

0 comments on commit d76cda0

Please sign in to comment.