diff --git a/automated_RNA_generate_template.sh b/automated_RNA_generate_template.sh new file mode 100644 index 00000000..e6c7f261 --- /dev/null +++ b/automated_RNA_generate_template.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +module load NGS_RNA/VERSIONFROMSTARTPIPELINESCRIPT +module list + +HOST=$(hostname) +##Running script for checking the environment variables + +ENVIRONMENT="${HOST%%.*}" +TMPDIR=$(basename $(cd ../../ && pwd )) +GROUP=$(basename $(cd ../../../ && pwd )) + +PROJECT=$1 +RUNID="run01" + +WORKDIR="/groups/${GROUP}/${TMPDIR}" +BUILD=$2 +SPECIES=$3 +PIPELINE=$4 + +WORKFLOW=${EBROOTNGS_RNA}/workflow_${PIPELINE}.csv + +if [ -f .compute.properties ]; +then + rm .compute.properties +fi + +if [ -f ${GAF}/generatedscripts/${PROJECT}/out.csv ]; +then + rm -rf ${GAF}/generatedscripts/${PROJECT}/out.csv +fi + +perl ${EBROOTNGS_RNA}/convertParametersGitToMolgenis.pl ${EBROOTNGS_RNA}/parameters.csv > \ +${WORKDIR}/generatedscripts/${PROJECT}/parameters.csv + +perl ${EBROOTNGS_RNA}/convertParametersGitToMolgenis.pl ${EBROOTNGS_RNA}/parameters.${BUILD}.csv > \ +${WORKDIR}/generatedscripts/${PROJECT}/parameters.${BUILD}.csv + +perl ${EBROOTNGS_RNA}/convertParametersGitToMolgenis.pl ${EBROOTNGS_RNA}/parameters.${SPECIES}.csv > \ +${WORKDIR}/generatedscripts/${PROJECT}/parameters.${SPECIES}.csv + +perl ${EBROOTNGS_RNA}/convertParametersGitToMolgenis.pl ${EBROOTNGS_RNA}/parameters.${ENVIRONMENT}.csv > \ +${WORKDIR}/generatedscripts/${PROJECT}/parameters.${ENVIRONMENT}.csv + +sh ${EBROOTMOLGENISMINCOMPUTE}/molgenis_compute.sh \ +-p ${WORKDIR}/generatedscripts/${PROJECT}/parameters.csv \ +-p ${WORKDIR}/generatedscripts/${PROJECT}/parameters.${BUILD}.csv \ +-p ${WORKDIR}/generatedscripts/${PROJECT}/parameters.${SPECIES}.csv \ +-p ${WORKDIR}/generatedscripts/${PROJECT}/parameters.${ENVIRONMENT}.csv \ +-p ${WORKDIR}/generatedscripts/${PROJECT}/${PROJECT}.csv \ +-p ${EBROOTNGS_RNA}/chromosomes.${SPECIES}.csv \ +-w ${EBROOTNGS_RNA}/create_in-house_ngs_projects_workflow.csv \ +-rundir ${WORKDIR}/generatedscripts/${PROJECT}/scripts \ +--runid ${RUNID} \ +--weave \ +--generate \ +-o "workflowpath=${WORKFLOW};outputdir=scripts/jobs;\ +groupname=${GROUP};\ +mainParameters=${WORKDIR}/generatedscripts/${PROJECT}/parameters.csv;\ +ngsversion=$(module list | grep -o -P 'NGS_RNA(.+)');\ +worksheet=${WORKDIR}/generatedscripts/${PROJECT}/${PROJECT}.csv;\ +parameters_build=${WORKDIR}/generatedscripts/${PROJECT}/parameters.${BUILD}.csv;\ +parameters_species=${WORKDIR}/generatedscripts/${PROJECT}/parameters.${SPECIES}.csv;\ +parameters_chromosomes=${EBROOTNGS_RNA}/chromosomes.${SPECIES}.csv;\ +parameters_environment=${WORKDIR}/generatedscripts/${PROJECT}/parameters.${ENVIRONMENT}.csv;" diff --git a/automated_generate_template.sh b/automated_generate_template.sh index 9d15945f..092b9d84 100755 --- a/automated_generate_template.sh +++ b/automated_generate_template.sh @@ -25,7 +25,7 @@ BATCH=$2 ##Some error handling function errorExitandCleanUp() { - echo "TRAPPED" + echo "${PROJECT} TRAPPED" if [ ! -f /groups/${GROUP}/${TMPDIRECTORY}/logs/${PROJECT}.generating.failed.mailed ] then mailTo="helpdesk.gcc.groningen@gmail.com" diff --git a/calculon.cfg b/calculon.cfg old mode 100644 new mode 100755 diff --git a/copyRawDataToCluster.sh b/copyRawDataToCluster.sh new file mode 100755 index 00000000..51162ffd --- /dev/null +++ b/copyRawDataToCluster.sh @@ -0,0 +1,138 @@ +#!/bin/bash + +set -e +set -u + +GAT=$1 +groupname=$2 +gattacaAddress="${GAT}.gcc.rug.nl" +echo $gattacaAddress +MYINSTALLATIONDIR=$( cd -P "$( dirname "$0" )" && pwd ) + +##source config file (zinc-finger.gcc.rug.nl.cfg, leucine-zipper.gcc.rug.nl, calculon.hpc.rug.nl OR gattaca.cfg) +myhost=$(hostname) +. ${MYINSTALLATIONDIR}/${groupname}.cfg +. ${MYINSTALLATIONDIR}/${myhost}.cfg +. ${MYINSTALLATIONDIR}/sharedConfig.cfg + +### VERVANG DOOR UMCG-ATEAMBOT USER +ssh ${groupname}-ateambot@${gattacaAddress} "ls ${GATTACA}/Samplesheets/*.csv" > ${SAMPLESHEETSDIR}/allSampleSheets_${GAT}.txt + +gattacaSamplesheets=() + +while read line +do + gattacaSamplesheets+=("${line} ") +done<${SAMPLESHEETSDIR}/allSampleSheets_${GAT}.txt + +echo "Logfiles will be written to $LOGDIR" + +for line in ${gattacaSamplesheets[@]} +do +echo "working on $line" + csvFile=$(basename $line) + filePrefix="${csvFile%.*}" + LOGGER=${LOGDIR}/${filePrefix}/${filePrefix}.copyToCluster.logger + + trap finish HUP INT QUIT TERM EXIT ERR + + FINISHED="no" + OLDIFS=$IFS + IFS=_ + set $filePrefix + sequencer=$2 + run=$3 + IFS=$OLDIFS + + if ssh ${groupname}-ateambot@${gattacaAddress} ls ${GATTACA}/logs/${filePrefix}_Demultiplexing.finished 1> /dev/null 2>&1 + then + ### Demultiplexing is finished + if [ ! -d ${LOGDIR}/${filePrefix}/ ] + then + mkdir ${LOGDIR}/${filePrefix}/ + fi + + printf "" + else + continue; + fi + + function finish { + if [ -f ${LOGDIR}/${filePrefix}/${filePrefix}.copyToCluster.locked ] + then + echo "${filePrefix} TRAPPED" + rm ${LOGDIR}/${filePrefix}/${filePrefix}.copyToCluster.locked + exit 1 + fi + + } + + if [ -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToCluster ] + then + continue; + fi + + if [ -f ${LOGDIR}/${filePrefix}/${filePrefix}.copyToCluster.locked ] + then + exit 0 + fi + touch ${LOGDIR}/${filePrefix}/${filePrefix}.copyToCluster.locked + + ## Check if samplesheet is copied + copyRawGatToCluster="${groupname}-ateambot@${gattacaAddress}:${GATTACA}/runs/run_${run}_${sequencer}/results/${filePrefix}* ${RAWDATADIR}/$filePrefix" + + if [[ ! -f ${SAMPLESHEETSDIR}/$csvFile || ! -f $LOGDIR/${filePrefix}/${filePrefix}.SampleSheetCopied ]] + then + scp ${groupname}-ateambot@${gattacaAddress}:${GATTACA}/Samplesheets/${csvFile} ${SAMPLESHEETSDIR} + touch $LOGDIR/${filePrefix}/${filePrefix}.SampleSheetCopied + fi + ## Check if data is already copied to Cluster + + if [ ! -d ${RAWDATADIR}/$filePrefix ] + then + mkdir -p ${RAWDATADIR}/${filePrefix}/Info + echo "Copying data to Cluster.." >> $LOGGER + rsync -r -a ${copyRawGatToCluster} + fi + + + if [[ -d ${RAWDATADIR}/$filePrefix && ! -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToCluster ]] + then + ##Compare how many files are on both the servers in the directory + countFilesRawDataDirTmp=$(ls ${RAWDATADIR}/${filePrefix}/${filePrefix}* | wc -l) + countFilesRawDataDirGattaca=$(ssh ${groupname}-ateambot@${gattacaAddress} "ls ${GATTACA}/runs/run_${run}_${sequencer}/results/${filePrefix}* | wc -l ") + + rsync -r ${groupname}-ateambot@${gattacaAddress}:/groups/umcg-lab/scr01/sequencers/${filePrefix}/InterOp ${RAWDATADIR}/${filePrefix}/Info/ + rsync ${groupname}-ateambot@${gattacaAddress}:/groups/umcg-lab/scr01/sequencers/${filePrefix}/RunInfo.xml ${RAWDATADIR}/${filePrefix}/Info/ + rsync ${groupname}-ateambot@${gattacaAddress}:/groups/umcg-lab/scr01/sequencers/${filePrefix}/*unParameters.xml ${RAWDATADIR}/${filePrefix}/Info/ + + if [ ${countFilesRawDataDirTmp} -eq ${countFilesRawDataDirGattaca} ] + then + cd ${RAWDATADIR}/${filePrefix}/ + for i in $(ls *.fq.gz.md5 ) + do + if md5sum -c $i + then + + awk '{print $2" CHECKED, and is correct"}' $i >> $LOGGER + else + echo "md5sum check failed, the copying will start again" >> $LOGGER + rsync -r -a ${copyRawGatToCluster} + echo -e "data copied to Cluster \n" >> $LOGGER + + fi + done + touch $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToCluster + touch ${filePrefix}.md5sums.checked + + else + echo "Retry: Copying data to Cluster" >> $LOGGER + rsync -r -a ${copyRawGatToCluster} + echo "data copied to Cluster" >> $LOGGER + fi + fi +rm ${LOGDIR}/${filePrefix}/${filePrefix}.copyToCluster.locked +done + +trap - EXIT +exit 0 diff --git a/copyRawDataToDiagnosticsCluster.sh b/copyRawDataToDiagnosticsCluster.sh index 6e213d2a..456c5781 100755 --- a/copyRawDataToDiagnosticsCluster.sh +++ b/copyRawDataToDiagnosticsCluster.sh @@ -33,18 +33,6 @@ do filePrefix="${csvFile%.*}" LOGGER=${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.logger - if [ ! -d ${LOGDIR}/${filePrefix}/ ] - then - mkdir ${LOGDIR}/${filePrefix}/ - fi - - function finish { - if [ -f ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked ] - then - echo "TRAPPED" - rm ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked - fi - } trap finish HUP INT QUIT TERM EXIT ERR FINISHED="no" @@ -58,11 +46,26 @@ do if ssh umcg-ateambot@${gattacaAddress} ls ${GATTACA}/logs/${filePrefix}_Demultiplexing.finished 1> /dev/null 2>&1 then ### Demultiplexing is finished + if [ ! -d ${LOGDIR}/${filePrefix}/ ] + then + mkdir ${LOGDIR}/${filePrefix}/ + fi + printf "" else continue; fi + function finish { + if [ -f ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked ] + then + echo "${filePrefix} TRAPPED" + rm ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked + exit 1 + fi + + } + if [ -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToDiagnosticsCluster ] then continue; diff --git a/copyRawDataToPrm.sh b/copyRawDataToPrm.sh index aa3f0bfe..929ab093 100755 --- a/copyRawDataToPrm.sh +++ b/copyRawDataToPrm.sh @@ -17,10 +17,6 @@ myhost=$(hostname) ls ${SAMPLESHEETSDIR}/*.csv > ${SAMPLESHEETSDIR}/allSampleSheets_DiagnosticsCluster.txt pipeline="dna" -function finish { - echo "TRAPPED" - rm -f ${LOGDIR}/copyDataToPrm.sh.locked -} trap finish HUP INT QUIT TERM EXIT ERR ARR=() @@ -79,12 +75,24 @@ do done<${LOGDIR}/TMP/${filePrefix}.unique.projects + function finish { + echo "${filePrefix} TRAPPED" + rm -f ${LOGDIR}/copyDataToPrm.sh.locked + } + + copyRawDiagnosticsClusterToPrm="" + makeRawDataDir="" - copyRawDiagnosticsClusterToPrm="${RAWDATADIR}/${filePrefix}/* ${groupname}-dm@calculon.hpc.rug.nl:${RAWDATADIRPRM}/${filePrefix}" - makeRawDataDir=$(ssh ${groupname}-dm@calculon.hpc.rug.nl "sh ${RAWDATADIRPRM}/../checkRawData.sh ${RAWDATADIRPRM} ${filePrefix}") - - if [[ -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToDiagnosticsCluster && ! -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToPrm ]] + if [ ${myhost} == "calculon" ] + then + copyRawDiagnosticsClusterToPrm="${RAWDATADIR}/${filePrefix}/* ${RAWDATADIRPRM}/${filePrefix}" + makeRawDataDir=$(sh ${RAWDATADIRPRM}/../checkRawData.sh ${RAWDATADIRPRM} ${filePrefix}) + else + copyRawDiagnosticsClusterToPrm="${RAWDATADIR}/${filePrefix}/* ${groupname}-dm@calculon.hpc.rug.nl:${RAWDATADIRPRM}/${filePrefix}" + makeRawDataDir=$(ssh ${groupname}-dm@calculon.hpc.rug.nl "sh ${RAWDATADIRPRM}/../checkRawData.sh ${RAWDATADIRPRM} ${filePrefix}") + fi + if [[ -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToCluster && ! -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToPrm ]] then echo "working on ${filePrefix}" countFilesRawDataDirTmp=$(ls ${RAWDATADIR}/${filePrefix}/${filePrefix}* | wc -l) @@ -96,10 +104,23 @@ do fi if [ "${makeRawDataDir}" == "t" ] then - countFilesRawDataDirPrm=$(ssh ${groupname}-dm@calculon.hpc.rug.nl "ls ${RAWDATADIRPRM}/${filePrefix}/${filePrefix}* | wc -l") + countFilesRawDataDirPrm="" + if [ ${myhost} == "calculon" ] + then + countFilesRawDataDirPrm=$(ls ${RAWDATADIRPRM}/${filePrefix}/${filePrefix}* | wc -l) + else + countFilesRawDataDirPrm=$(ssh ${groupname}-dm@calculon.hpc.rug.nl "ls ${RAWDATADIRPRM}/${filePrefix}/${filePrefix}* | wc -l") + fi if [ ${countFilesRawDataDirTmp} -eq ${countFilesRawDataDirPrm} ] then - COPIEDTOPRM=$(ssh ${groupname}-dm@calculon.hpc.rug.nl "sh ${RAWDATADIRPRM}/../check.sh ${RAWDATADIRPRM} ${filePrefix}") + COPIEDTOPRM="" + if [ ${myhost} == "calculon" ] + then + COPIEDTOPRM=$(sh ${RAWDATADIRPRM}/../check.sh ${RAWDATADIRPRM} ${filePrefix}) + else + COPIEDTOPRM=$(ssh ${groupname}-dm@calculon.hpc.rug.nl "sh ${RAWDATADIRPRM}/../check.sh ${RAWDATADIRPRM} ${filePrefix}") + fi + if [[ "${COPIEDTOPRM}" == *"FAILED"* ]] then echo "md5sum check failed, the copying will start again" >> ${LOGGER} @@ -107,8 +128,15 @@ do echo "copy failed" >> $LOGDIR/${filePrefix}/${filePrefix}.failed elif [[ "${COPIEDTOPRM}" == *"PASS"* ]] then - scp ${SAMPLESHEETSDIR}/${csvFile} ${groupname}-dm@calculon.hpc.rug.nl:${RAWDATADIRPRM}/${filePrefix}/ - scp ${SAMPLESHEETSDIR}/${csvFile} ${groupname}-dm@calculon.hpc.rug.nl:${SAMPLESHEETSPRMDIR} + if [ ${myhost} == "calculon" ] + then + scp ${SAMPLESHEETSDIR}/${csvFile} ${groupname}-dm@localhost:${RAWDATADIRPRM}/${filePrefix}/ + scp ${SAMPLESHEETSDIR}/${csvFile} ${groupname}-dm@localhost:${SAMPLESHEETSPRMDIR} + else + scp ${SAMPLESHEETSDIR}/${csvFile} ${groupname}-dm@calculon.hpc.rug.nl:${RAWDATADIRPRM}/${filePrefix}/ + scp ${SAMPLESHEETSDIR}/${csvFile} ${groupname}-dm@calculon.hpc.rug.nl:${SAMPLESHEETSPRMDIR} + + fi echo "finished copying data to calculon" >> ${LOGGER} echo "finished with rawdata" >> ${LOGDIR}/${filePrefix}/${filePrefix}.copyToPrm.logger diff --git a/leucine-zipper.gcc.rug.nl.cfg b/leucine-zipper.gcc.rug.nl.cfg old mode 100644 new mode 100755 diff --git a/mailError.sh b/mailError.sh index 2ed29ee8..8f8ddbfa 100755 --- a/mailError.sh +++ b/mailError.sh @@ -37,11 +37,11 @@ do elif [ "${groupname}" == "umcg-gd" ] then echo "mailTo is umcg-gd" - if [ -f /groups/umcg-gd/${tmpDirectory}/logs/mailinglistDiagnostiek.txt ] + if [ -f /groups/umcg-gd/${tmpDirectory}/logs/mailinglistDiagnostiekCrash.txt ] then - mailTo=$(cat /groups/umcg-gd/${tmpDirectory}/logs/mailinglistDiagnostiek.txt) + mailTo=$(cat /groups/umcg-gd/${tmpDirectory}/logs/mailinglistDiagnostiekCrash.txt) else - echo "mailingListDiagnostiek.txt bestaat niet!!" + echo "mailingListDiagnostiekCrash.txt bestaat niet!!" exit 0 fi fi diff --git a/pipelineFinished.sh b/pipelineFinished.sh index 2f6dfb51..fc4501ca 100755 --- a/pipelineFinished.sh +++ b/pipelineFinished.sh @@ -92,6 +92,17 @@ do exit 0 fi fi + cd ${PROJECTSDIR}/${projectName}/*/jobs/ + zip -gr ${PROJECTSDIR}/${projectName}/*/jobs//allJobs.zip *.err + zip -gr ${PROJECTSDIR}/${projectName}/*/jobs//allJobs.zip *.out + zip -gr ${PROJECTSDIR}/${projectName}/*/jobs//allJobs.zip *.sh.finished + zip -gr ${PROJECTSDIR}/${projectName}/*/jobs//allJobs.zip *.env + zip -gr ${PROJECTSDIR}/${projectName}/*/jobs//allJobs.zip *.sh + zip -gr ${PROJECTSDIR}/${projectName}/*/jobs//allJobs.zip molgenis.* + + echo "all files in the jobs directory are now zipped into one file" + + rm ${PROJECTSDIR}/${projectName}/*/jobs/*{err,out,sh.finished,env,sh,CORRECT} printf "The results can be found: ${PROJECTSDIR}/${projectName} \n\nCheers from the GCC :)"| mail -s "NGS_DNA pipeline is finished for project ${projectName} on `date +%d/%m/%Y` `date +%H:%M`" ${mailTo} touch ${LOGDIR}/${projectName}/${projectName}.pipeline.finished.mailed diff --git a/sharedConfig.cfg b/sharedConfig.cfg old mode 100644 new mode 100755 index 4a72b184..b13f7e13 --- a/sharedConfig.cfg +++ b/sharedConfig.cfg @@ -9,5 +9,3 @@ SAMPLESHEETSPRMDIR="/groups/${GROUP}/prm02/rawdata/Samplesheets" SAMPLESHEETSDIR="${WORKDIR}/Samplesheets" GATTACA="/groups/${GROUP}/scr01/" ONTVANGER="helpdesk.gcc.groningen@gmail.com" -DNA="NGS_DNA/3.2.2-Molgenis-Compute-v16.04.1-Java-1.8.0_45" -RNA="NGS_RNA/3.2.2-Molgenis-Compute-v15.12.4-Java-1.8.0_45" diff --git a/startPipeline.sh b/startPipeline.sh index cd79db37..c29e43dc 100755 --- a/startPipeline.sh +++ b/startPipeline.sh @@ -6,25 +6,20 @@ set -u groupname=$1 MYINSTALLATIONDIR=$( cd -P "$( dirname "$0" )" && pwd ) -##source config file (zinc-finger.gcc.rug.nl.cfg, leucine-zipper.gcc.rug.nl OR gattaca.cfg) +##source config file (zinc-finger.gcc.rug.nl.cfg, leucine-zipper.gcc.rug.nl, calculon.cfg OR gattaca.cfg) myhost=$(hostname) - +echo "MYINSTALLDIR ${MYINSTALLATIONDIR}" . ${MYINSTALLATIONDIR}/${groupname}.cfg . ${MYINSTALLATIONDIR}/${myhost}.cfg . ${MYINSTALLATIONDIR}/sharedConfig.cfg -pipeline="dna" NGS_DNA="3.2.6" - -if [ "${pipeline}" == "dna" ] -then - module load NGS_DNA/${NGS_DNA} -fi +NGS_RNA="3.2.4-Molgenis-Compute-v16.05.1-Java-1.8.0_45" count=0 echo "Logfiles will be written to $LOGDIR" - +echo "Samplesheets= ${SAMPLESHEETSDIR}" for i in $(ls ${SAMPLESHEETSDIR}/*.csv) do csvFile=$(basename $i) @@ -40,15 +35,42 @@ do array=($HEADER) IFS=$OLDIFS count=1 + + pipeline="DNA" + specie="homo_sapiens" for j in "${array[@]}" do if [ "${j}" == "project" ] then awk -F"," '{print $'$count'}' ${LOGDIR}/TMP/${filePrefix}.tmp > ${LOGDIR}/TMP/${filePrefix}.tmp2 - fi + elif [[ "${j}" == *"SampleType"* ]] + then + awk -F"," '{print $'$count'}' ${LOGDIR}/TMP/${filePrefix}.tmp > ${LOGDIR}/TMP/${filePrefix}.whichPipeline + pipeline=$(head -1 ${LOGDIR}/TMP/${filePrefix}.whichPipeline) + + elif [[ "${j}" == "specie" ]] + then + awk -F"," '{print $'$count'}' ${LOGDIR}/TMP/${filePrefix}.tmp > ${LOGDIR}/TMP/${filePrefix}.specie + specie=$(head -1 ${LOGDIR}/TMP/${filePrefix}.specie) + elif [ "${j}" == "capturingKit" ] + then + awk -F"," '{print $'$count'}' ${LOGDIR}/TMP/${filePrefix}.tmp > ${LOGDIR}/TMP/${filePrefix}.capturingKit + + fi count=$((count + 1)) done + cat ${LOGDIR}/TMP/${filePrefix}.tmp2 | sort -V | uniq > ${LOGDIR}/TMP/${filePrefix}.uniq.projects + + if [[ "${pipeline}" == *"RNA"* ]] + then + module load NGS_RNA/${NGS_RNA} + echo "RNA" + elif [ "${pipeline}" == "DNA" ] + then + module load NGS_DNA/${NGS_DNA} + echo "DNA" + fi PROJECTARRAY=() while read line @@ -58,21 +80,11 @@ do done<${LOGDIR}/TMP/${filePrefix}.uniq.projects count=1 - ## Know which capturing kits - for j in "${array[@]}" - do - if [ "${j}" == "capturingKit" ] - then - - awk -F"," '{print $'$count'}' ${LOGDIR}/TMP/${filePrefix}.tmp > ${LOGDIR}/TMP/${filePrefix}.capturingKit - fi - count=$((count + 1)) - done cat ${LOGDIR}/TMP/${filePrefix}.capturingKit | sort -V | uniq > ${LOGDIR}/TMP/${filePrefix}.uniq.capturingKits miSeqRun="no" while read line do - if [[ "${line}" == *"CARDIO_v"* || "${line}" == *"DER_v"* || "${line}" == *"DYS_v"* || "${line}" == *"EPI_v"* || "${line}" == *"LEVER_v"* || "${line}" == *"NEURO_v"* || "${line}" == *"ONCO_v"* || "${line}" == *"PCS_v"* ]] + if [[ "${line}" == *"CARDIO_v"* || "${line}" == *"DER_v"* || "${line}" == *"DYS_v"* || "${line}" == *"EPI_v"* || "${line}" == *"FH_v"* || "${line}" == *"LEVER_v"* || "${line}" == *"MYO_v"* || "${line}" == *"NEURO_v"* || "${line}" == *"ONCO_v"* || "${line}" == *"PCS_v"* || "${line}" == *"TID_v"* ]] then miSeqRun="yes" break @@ -91,7 +103,16 @@ do ### Decide if the scripts should be created (per Samplesheet) ## # - if [[ -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToDiagnosticsCluster && ! -f $LOGDIR/${filePrefix}/${filePrefix}.scriptsGenerated ]] + function finish { + if [ -f ${LOGDIR}/${filePrefix}/${filePrefix}.pipeline.locked ] + then + echo "${filePrefix} TRAPPED" + rm ${LOGDIR}/${filePrefix}/${filePrefix}.pipeline.locked + fi + } + trap finish HUP INT QUIT TERM EXIT ERR + + if [[ -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToDiagnosticsCluster || -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToCalculonCluster ]] && [ ! -f $LOGDIR/${filePrefix}/${filePrefix}.scriptsGenerated ] then ### Step 4: Does the pipeline need to run? if [ "${pipeline}" == "RNA-Lexogen-reverse" ] @@ -102,7 +123,64 @@ do echo "RNA-Lexogen" >> ${LOGGER} elif [ "${pipeline}" == "RNA" ] then - echo "RNA" >> ${LOGGER} + + projectName="" + workflowRNA="hisat" + build="b37" + + for PROJECT in ${PROJECTARRAY[@]} + do + projectName=${PROJECT} + done + + echo "RNA" >> ${LOGGER} + echo "WE ARE IN" + + + # + ## CHANGE WHEN FINISHED TESTING + ### + EBROOTNGS_AUTOMATED=/home/umcg-rkanninga/github/NGS_Automated/ + ### + ## + # + + if [[ "${projectName}" == *"Lexogen"* ]] + then + workflowRNA="lexogen" + fi + # callithrix_jacchus, mus_musculus, homo_sapiens + if [ $specie != "homo_sapiens" ] + then + build="b38" + fi + + mkdir -p ${GENERATEDSCRIPTS}/${filePrefix}/ + echo "copying $EBROOTNGS_AUTOMATED/automated_RNA_generate_template.sh to ${GENERATEDSCRIPTS}/${filePrefix}/generate.sh" >> $LOGGER + + cp $EBROOTNGS_AUTOMATED/automated_RNA_generate_template.sh ${GENERATEDSCRIPTS}/${filePrefix}/generate.sh + + perl -pi -e "s|VERSIONFROMSTARTPIPELINESCRIPT|${NGS_RNA}|" ${GENERATEDSCRIPTS}/${filePrefix}/generate.sh + + if [ -f ${GENERATEDSCRIPTS}/${filePrefix}/${filePrefix}.csv ] + then + echo "${GENERATEDSCRIPTS}/${filePrefix}/${filePrefix}.csv already existed, will now be removed and will be replaced by a fresh copy" >> $LOGGER + rm ${GENERATEDSCRIPTS}/${filePrefix}/${filePrefix}.csv + fi + + cp ${SAMPLESHEETSDIR}/${csvFile} ${GENERATEDSCRIPTS}/${filePrefix}/${filePrefix}.csv + + cd ${GENERATEDSCRIPTS}/${filePrefix}/ + + echo "sh ${GENERATEDSCRIPTS}/${filePrefix}/generate.sh "${filePrefix}" ${build} ${specie} ${workflowRNA}" + + sh ${GENERATEDSCRIPTS}/${filePrefix}/generate.sh "${filePrefix}" ${build} ${specie} ${workflowRNA} > ${GENERATEDSCRIPTS}/${filePrefix}/generate.logger 2>&1 + cd scripts + touch ${LOGDIR}/${filePrefix}/${filePrefix}.pipeline.locked + sh submit.sh + rm ${LOGDIR}/${filePrefix}/${filePrefix}.pipeline.locked + touch $LOGDIR/${filePrefix}/${filePrefix}.scriptsGenerated + elif [ "${pipeline}" == "dna" ] then if pipelineVersion=$(module list | grep -o -P 'NGS_DNA(.+)') @@ -117,7 +195,7 @@ do pipelineVersion=$(module list | grep -o -P 'NGS_DNA(.+)') printf "The version which is now loaded is $pipelineVersion${normal}\n\n" fi - mkdir -p ${GENERATEDSCRIPTS}/${run}_${sequencer}/ + mkdir -p ${GENERATEDSCRIPTS}/${filePrefix}/ batching="_chr" @@ -126,27 +204,28 @@ do batching="_small" fi - echo "copying $EBROOTNGS_AUTOMATED/automated_generate_template.sh to ${GENERATEDSCRIPTS}/${run}_${sequencer}/generate.sh" >> $LOGGER - cp ${EBROOTNGS_AUTOMATED}/automated_generate_template.sh ${GENERATEDSCRIPTS}/${run}_${sequencer}/generate.sh + echo "copying $EBROOTNGS_AUTOMATED/automated_generate_template.sh to ${GENERATEDSCRIPTS}/${filePrefix}/generate.sh" >> $LOGGER + cp ${EBROOTNGS_AUTOMATED}/automated_generate_template.sh ${GENERATEDSCRIPTS}/${filePrefix}/generate.sh - perl -pi -e "s|VERSIONFROMSTARTPIPELINESCRIPT|${NGS_DNA}|" ${GENERATEDSCRIPTS}/${run}_${sequencer}/generate.sh + perl -pi -e "s|VERSIONFROMSTARTPIPELINESCRIPT|${NGS_DNA}|" ${GENERATEDSCRIPTS}/${filePrefix}/generate.sh - if [ -f ${GENERATEDSCRIPTS}/${run}_${sequencer}/${run}_${sequencer}.csv ] + if [ -f ${GENERATEDSCRIPTS}/${filePrefix}/${filePrefix}.csv ] then - echo "${GENERATEDSCRIPTS}/${run}_${sequencer}/${run}_${sequencer}.csv already existed, will now be removed and will be replaced by a fresh copy" >> $LOGGER - rm ${GENERATEDSCRIPTS}/${run}_${sequencer}/${run}_${sequencer}.csv + echo "${GENERATEDSCRIPTS}/${filePrefix}/${filePrefix}.csv already existed, will now be removed and will be replaced by a fresh copy" >> $LOGGER + rm ${GENERATEDSCRIPTS}/${filePrefix}/${filePrefix}.csv fi - cp ${SAMPLESHEETSDIR}/${csvFile} ${GENERATEDSCRIPTS}/${run}_${sequencer}/${run}_${sequencer}.csv + cp ${SAMPLESHEETSDIR}/${csvFile} ${GENERATEDSCRIPTS}/${filePrefix}/${filePrefix}.csv - cd ${GENERATEDSCRIPTS}/${run}_${sequencer}/ + cd ${GENERATEDSCRIPTS}/${filePrefix}/ - sh ${GENERATEDSCRIPTS}/${run}_${sequencer}/generate.sh "${run}_${sequencer}" ${batching} > ${GENERATEDSCRIPTS}/${run}_${sequencer}/generate.logger 2>&1 + sh ${GENERATEDSCRIPTS}/${filePrefix}/generate.sh "${filePrefix}" ${batching} > ${GENERATEDSCRIPTS}/${filePrefix}/generate.logger 2>&1 cd scripts - - sh submit.sh - touch $LOGDIR/${filePrefix}/${filePrefix}.scriptsGenerated + touch ${LOGDIR}/${filePrefix}/${filePrefix}.pipeline.locked + sh submit.sh + rm ${LOGDIR}/${filePrefix}/${filePrefix}.pipeline.locked + touch $LOGDIR/${filePrefix}/${filePrefix}.scriptsGenerated fi fi @@ -163,19 +242,19 @@ do mkdir ${LOGDIR}/${PROJECT} fi - function finish { + function finishProject { if [ -f ${LOGDIR}/${PROJECT}/${PROJECT}.pipeline.locked ] then - echo "TRAPPED" + echo "${PROJECT} TRAPPED" rm ${LOGDIR}/${PROJECT}/${PROJECT}.pipeline.locked fi } - trap finish HUP INT QUIT TERM EXIT ERR + trap finishProject HUP INT QUIT TERM EXIT ERR WHOAMI=$(whoami) HOSTN=$(hostname) LOGGER=${LOGDIR}/${PROJECT}/${PROJECT}.pipeline.logger - if [[ ! -f ${LOGDIR}/${PROJECT}/${PROJECT}.pipeline.started && ! -f ${LOGDIR}/${PROJECT}/${PROJECT}.pipeline.locked ]] + if [[ ! -f ${LOGDIR}/${PROJECT}/${PROJECT}.pipeline.started && ! -f ${LOGDIR}/${PROJECT}/${PROJECT}.pipeline.locked && ! -f ${LOGDIR}/${PROJECT}/${PROJECT}.pipeline.finished ]] then touch ${LOGDIR}/${PROJECT}/${PROJECT}.pipeline.locked cd ${PROJECTSDIR}/${PROJECT}/run01/jobs/ @@ -191,3 +270,6 @@ do done fi done + +trap - EXIT +exit 0 diff --git a/umcg-gaf.cfg b/umcg-gaf.cfg old mode 100644 new mode 100755 diff --git a/umcg-gd.cfg b/umcg-gd.cfg old mode 100644 new mode 100755 diff --git a/zinc-finger.gcc.rug.nl.cfg b/zinc-finger.gcc.rug.nl.cfg old mode 100644 new mode 100755