From eccbdfb6a285b0793cc485d5e79cc33a8bd8d275 Mon Sep 17 00:00:00 2001 From: RoanKanninga Date: Thu, 14 May 2020 14:00:07 +0200 Subject: [PATCH 1/5] small fixes everywhere --- bin/arrayConversion.sh | 14 ++++++++++++++ bin/copyRawDataToPrm.sh | 33 ++++++++++++++++----------------- bin/demultiplexing.sh | 4 ++-- bin/notifications.sh | 3 +-- etc/umcg-gsad.cfg | 2 +- lib/sharedFunctions.bash | 4 ++-- 6 files changed, 36 insertions(+), 24 deletions(-) diff --git a/bin/arrayConversion.sh b/bin/arrayConversion.sh index be09d50b..fba8f338 100755 --- a/bin/arrayConversion.sh +++ b/bin/arrayConversion.sh @@ -193,6 +193,20 @@ else mkdir "${SCR_ROOT_DIR}/logs/${project}/" fi export JOB_CONTROLE_FILE_BASE="${SCR_ROOT_DIR}/logs/${project}/run01.arrayConversion" + if [[ -f "${JOB_CONTROLE_FILE_BASE}.finished" ]] + then + log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "Found ${JOB_CONTROLE_FILE_BASE}.finished: Skipping finished ${project}." + continue + elif [[ -f "${JOB_CONTROLE_FILE_BASE}.started" ]] + then + log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "Found ${JOB_CONTROLE_FILE_BASE}.started: Skipping ${project}, which is already getting processed." + continue + elif [[ ! -f "${SCR_ROOT_DIR}/Samplesheets/${project}.csv" ]] + then + log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "No samplesheet found: skipping ${project}." + continue + fi + export TRACE_FAILED="${SCR_ROOT_DIR}/logs/${project}/trace.failed" log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "Processing project ${project} ..." diff --git a/bin/copyRawDataToPrm.sh b/bin/copyRawDataToPrm.sh index 356f75cc..b338fb5e 100755 --- a/bin/copyRawDataToPrm.sh +++ b/bin/copyRawDataToPrm.sh @@ -75,20 +75,7 @@ function rsyncRuns() { log4Bash 'TRACE' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "${_controlFileBaseForFunction}.finished not present -> Continue..." printf '' > "${_controlFileBaseForFunction}.started" fi - # - # Determine whether an rsync is required for this run, which is the case when - # raw data production has finished successfully and this copy script has not. - # - log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Checking if ${rawDataItem} is complete and ready to be copied to prm." - if ssh "${DATA_MANAGER}"@"${sourceServerFQDN}" test -e "${SCR_ROOT_DIR}/logs/${_filePrefix}/${STEPBEFOREFINISHEDFILE}" - then - log4Bash 'TRACE' "${LINENO}" "${FUNCNAME:-main}" '0' "${DATA_MANAGER}@${sourceServerFQDN}:${SCR_ROOT_DIR}/logs/${_filePrefix}/${STEPBEFOREFINISHEDFILE} present." - printf '' > "${_controlFileBaseForFunction}.started" - else - log4Bash 'TRACE' "${LINENO}" "${FUNCNAME:-main}" '0' "${DATA_MANAGER}@${sourceServerFQDN}:${SCR_ROOT_DIR}/logs/${_filePrefix}/${STEPBEFOREFINISHEDFILE} absent." - log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Skipping ${rawDataItem}, which is not ready for transfer to prm yet." - return - fi + # # Track and Trace: log that we will start rsyncing to prm. # @@ -552,7 +539,7 @@ else # # Process this sample sheet / run and find how out how many raw data items it contains. # - filePrefix="$(basename "${sampleSheet%."${SAMPLESHEET_EXT}"}")" + filePrefix="$(basename "${sampleSheet%."${SAMPLESHEET_EXT}"}")" controlFileBase="${PRM_ROOT_DIR}/logs/${filePrefix}/" runPrefix="run01" export JOB_CONTROLE_FILE_BASE="${controlFileBase}/${runPrefix}.${SCRIPT_NAME}" @@ -564,6 +551,18 @@ else log4Bash 'INFO' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "Processing run ${filePrefix} ..." fi + # + # Determine whether an rsync is required for this run, which is the case when + # raw data production has finished successfully and this copy script has not. + # + if ssh "${DATA_MANAGER}"@"${sourceServerFQDN}" test -e "${SCR_ROOT_DIR}/logs/${filePrefix}/${STEPBEFOREFINISHEDFILE}" + then + log4Bash 'TRACE' "${LINENO}" "${FUNCNAME:-main}" '0' "${DATA_MANAGER}@${sourceServerFQDN}:${SCR_ROOT_DIR}/logs/${filePrefix}/${STEPBEFOREFINISHEDFILE} present." + else + log4Bash 'TRACE' "${LINENO}" "${FUNCNAME:-main}" '0' "${DATA_MANAGER}@${sourceServerFQDN}:${SCR_ROOT_DIR}/logs/${filePrefix}/${STEPBEFOREFINISHEDFILE} absent." + continue + fi + # shellcheck disable=SC2174 mkdir -m 2770 -p "${PRM_ROOT_DIR}/logs/" # shellcheck disable=SC2174 @@ -627,11 +626,11 @@ else then log4Bash 'TRACE' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "${controlFileBase}/${runPrefix}.splitSamplesheetPerProject.finished present." rm -f "${JOB_CONTROLE_FILE_BASE}.failed" - log4Bash 'INFO' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "Finished processing ${runPrefix}." + log4Bash 'INFO' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "Finished processing ${filePrefix}." mv -v "${JOB_CONTROLE_FILE_BASE}."{started,finished} else log4Bash 'TRACE' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "${controlFileBase}/${runPrefix}.splitSamplesheetPerProject.finished absent -> splitSamplesheetPerProject failed." - log4Bash 'ERROR' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "Failed to process ${runPrefix}." + log4Bash 'ERROR' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "Failed to process ${filePrefix}." mv -v "${JOB_CONTROLE_FILE_BASE}."{started,failed} fi done diff --git a/bin/demultiplexing.sh b/bin/demultiplexing.sh index e2d49b11..5ec0f2c9 100755 --- a/bin/demultiplexing.sh +++ b/bin/demultiplexing.sh @@ -172,8 +172,8 @@ log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Log files will be written # Sequencer is writing to this location: ${SEQ_DIR} # Looping through sub dirs to see if all files. # -log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "ls -1 -d ${SEQ_DIR}/*/" -mapfile -t projects < <(find "${SEQ_DIR}/" -mindepth 1 -maxdepth 1 -type d) +log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "find "${SEQ_DIR}/" -mindepth 1 -maxdepth 1 -type d -o -type l" +mapfile -t projects < <(find "${SEQ_DIR}/" -mindepth 1 -maxdepth 1 -type d -o -type l) for i in "${projects[@]}" do diff --git a/bin/notifications.sh b/bin/notifications.sh index 874cc6c6..5e1706e9 100755 --- a/bin/notifications.sh +++ b/bin/notifications.sh @@ -209,7 +209,6 @@ function notification() { method="${traceArray[1]}" entity="status_${traceArray[2]}" field="${traceArray[3]}" - log4Bash 'TRACE' "${LINENO}" "${FUNCNAME:-main}" '0' "PROCESSING: ${_phase}:${_state} <${method}> <${entity}> <${field}>" if [ -e "${_tracingUploadFile}" ] then if grep -q "${_run}.${_phase}_${_state}" "${_tracingUploadFile}" @@ -247,7 +246,7 @@ function notification() { then log4Bash 'TRACE' "${LINENO}" "${FUNCNAME:-main}" '0' \ "adding ${_run}.${_phase}.${_state} to ${_tracingUploadFile}" - echo -e "${_run}.${_phase}.${_state}\t$(date +%FT%T%z)" >> "${_tracingUploadFile}" + echo -e "${_run}.${_phase}_${_state}\t$(date +%FT%T%z)" >> "${_tracingUploadFile}" else log4Bash 'TRACE' "${LINENO}" "${FUNCNAME:-main}" '0' "Failed in updating ${_run}.${_phase}.${_state} to ${MOLGENISSERVER}" fi diff --git a/etc/umcg-gsad.cfg b/etc/umcg-gsad.cfg index 3db7b337..f41770bc 100644 --- a/etc/umcg-gsad.cfg +++ b/etc/umcg-gsad.cfg @@ -2,7 +2,7 @@ GROUP='umcg-gsad' LAB='internal' PIPELINECOLUMN='pipeline' PROJECTCOLUMN='Project' -STEPBEFOREFINISHEDFILE='run01.AGCT.finished' +STEPBEFOREFINISHEDFILE='run01.arrayConversion.finished' PRMRAWDATA='array' declare -a RAWDATATYPES=( 'array/IDAT' diff --git a/lib/sharedFunctions.bash b/lib/sharedFunctions.bash index 84bec414..4a5fbe0c 100644 --- a/lib/sharedFunctions.bash +++ b/lib/sharedFunctions.bash @@ -204,7 +204,7 @@ function trackAndTracePostFromFile() { || { log4Bash 'ERROR' "${LINENO}" "${FUNCNAME:-main}" '0' "Failed to login at ${MOLGENISSERVER}." \ 2>&1 | tee -a "${TRACE_FAILED}" \ - && return + && return 1 } _token="${_curlResponse:10:32}" # @@ -274,7 +274,7 @@ function trackAndTracePut() { || { log4Bash 'ERROR' "${LINENO}" "${FUNCNAME:-main}" '0' "Failed to login at ${MOLGENISSERVER}." \ 2>&1 | tee -a "${TRACE_FAILED}" \ - && return + && return 1 } _token="${_curlResponse:10:32}" # From 5adc522157f32e0e1ce067becda90bf4fefa1854 Mon Sep 17 00:00:00 2001 From: RoanKanninga Date: Thu, 14 May 2020 14:04:04 +0200 Subject: [PATCH 2/5] removed indentation --- bin/copyRawDataToPrm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/copyRawDataToPrm.sh b/bin/copyRawDataToPrm.sh index b338fb5e..2478eb0e 100755 --- a/bin/copyRawDataToPrm.sh +++ b/bin/copyRawDataToPrm.sh @@ -539,7 +539,7 @@ else # # Process this sample sheet / run and find how out how many raw data items it contains. # - filePrefix="$(basename "${sampleSheet%."${SAMPLESHEET_EXT}"}")" + filePrefix="$(basename "${sampleSheet%."${SAMPLESHEET_EXT}"}")" controlFileBase="${PRM_ROOT_DIR}/logs/${filePrefix}/" runPrefix="run01" export JOB_CONTROLE_FILE_BASE="${controlFileBase}/${runPrefix}.${SCRIPT_NAME}" From 8e053743b8c840a2e561f35df25b31c1c419becd Mon Sep 17 00:00:00 2001 From: RoanKanninga Date: Wed, 20 May 2020 06:23:14 +0200 Subject: [PATCH 3/5] last fixes --- bin/startPipeline.sh | 7 +++---- etc/umcg-atd.cfg | 2 +- etc/umcg-gap.cfg | 2 +- etc/umcg-gd.cfg | 2 +- etc/umcg-gsad.cfg | 6 +++--- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/bin/startPipeline.sh b/bin/startPipeline.sh index f671ed4f..ac48c698 100755 --- a/bin/startPipeline.sh +++ b/bin/startPipeline.sh @@ -195,7 +195,6 @@ function submitPipeline () { local _sequencingStartDateIndex local _sequencer local _sequencerIndex - local _run local _runIndex local _flowcell local _flowcellIndex @@ -247,8 +246,8 @@ function submitPipeline () { fi if [[ -n "${sampleSheetColumnOffsets['run']+isset}" ]] then - _runIndex=$((${sampleSheetColumnOffsets['run']} + 1)) - _run=$(tail -n +2 "${TMP_ROOT_DIR}/projects/${_project}/${_run}/jobs/${project}.${SAMPLESHEET_EXT}" | awk -v run="${_runIndex}" 'BEGIN {FS=","}{print $run}' | head -1) + _runIdIndex=$((${sampleSheetColumnOffsets['run']} + 1)) + _runId=$(tail -n +2 "${TMP_ROOT_DIR}/projects/${_project}/${_run}/jobs/${project}.${SAMPLESHEET_EXT}" | awk -v runId="${_runIdIndex}" 'BEGIN {FS=","}{print $runId}' | head -1) fi if [[ -n "${sampleSheetColumnOffsets['flowcell']+isset}" ]] then @@ -505,7 +504,7 @@ do # Generate scripts (per sample sheet). # generateScripts "${project}" "${pipelineRun}" "${sampleType}" - # + # # Submit generated job scripts (per project). # if [[ -e "${TMP_ROOT_DIR}/logs/${project}/${pipelineRun}.generateScripts.finished" ]] diff --git a/etc/umcg-atd.cfg b/etc/umcg-atd.cfg index 5f18cf6e..1456622d 100644 --- a/etc/umcg-atd.cfg +++ b/etc/umcg-atd.cfg @@ -17,8 +17,8 @@ declare -a NOTIFICATION_ORDER_PHASE_WITH_STATE=( 'demultiplexing:failed' 'demultiplexing:finished' 'demultiplexingTiming:failed' -'copyRawDataToPrm:trace_putFromFile_overview.csv' 'copyRawDataToPrm:trace_post_projects.csv' +'copyRawDataToPrm:trace_putFromFile_overview.csv' 'copyRawDataToPrm:started' 'copyRawDataToPrm:failed' 'copyRawDataToPrm:finished' diff --git a/etc/umcg-gap.cfg b/etc/umcg-gap.cfg index 0120a988..a9ed1006 100644 --- a/etc/umcg-gap.cfg +++ b/etc/umcg-gap.cfg @@ -16,8 +16,8 @@ declare -a NOTIFICATION_ORDER_PHASE_WITH_STATE=( 'arrayConversion:started' 'arrayConversion:failed' 'arrayConversion:finished' -'copyRawDataToPrm:trace_putFromFile_overview.csv' 'copyRawDataToPrm:trace_post_projects.csv' +'copyRawDataToPrm:trace_putFromFile_overview.csv' 'copyRawDataToPrm:started' 'copyRawDataToPrm:failed' 'copyRawDataToPrm:finished' diff --git a/etc/umcg-gd.cfg b/etc/umcg-gd.cfg index cb0b14ee..7a6d2a46 100644 --- a/etc/umcg-gd.cfg +++ b/etc/umcg-gd.cfg @@ -17,8 +17,8 @@ declare -a NOTIFICATION_ORDER_PHASE_WITH_STATE=( 'demultiplexing:failed' 'demultiplexing:finished' 'demultiplexingTiming:failed' -'copyRawDataToPrm:trace_putFromFile_overview.csv' 'copyRawDataToPrm:trace_post_projects.csv' +'copyRawDataToPrm:trace_putFromFile_overview.csv' 'copyRawDataToPrm:started' 'copyRawDataToPrm:failed' 'copyRawDataToPrm:finished' diff --git a/etc/umcg-gsad.cfg b/etc/umcg-gsad.cfg index f41770bc..641e99d1 100644 --- a/etc/umcg-gsad.cfg +++ b/etc/umcg-gsad.cfg @@ -63,9 +63,9 @@ declare -A NOTIFY_FOR_PHASE_WITH_STATE=( ['pipeline:rejectedsamples']='email' ['pipelineTiming:failed']='email' ['calculateProjectMd5s:failed']='email' - ['copyProjectDataToPrm:failed']='trace/put/overview/copy_results_prm|email' - ['copyProjectDataToPrm:started']='trace/put/overview/copy_results_prm' - ['copyProjectDataToPrm:finished']='trace/put/overview/copy_results_prm|email' + ['copyProjectDataToPrm:failed']='trace/put/projects/copy_results_prm|email' + ['copyProjectDataToPrm:started']='trace/put/projects/copy_results_prm' + ['copyProjectDataToPrm:finished']='trace/put/projects/copy_results_prm|email' ['copyProjectDataToPrm:trace_putFromFile_projects.csv']='trace/putFromFile/projects/finishedDate' ['copyProjectDataToPrmTiming:failed']='email' ) \ No newline at end of file From 09747779e5f2384aeb81c4c386e0cd7cc1374867 Mon Sep 17 00:00:00 2001 From: RoanKanninga Date: Wed, 20 May 2020 06:47:22 +0200 Subject: [PATCH 4/5] last fixes --- bin/copyRawDataToPrm.sh | 5 ++--- bin/demultiplexing.sh | 2 +- bin/startPipeline.sh | 5 +++-- etc/umcg-gap.cfg | 6 +++--- etc/umcg-gd.cfg | 6 +++--- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bin/copyRawDataToPrm.sh b/bin/copyRawDataToPrm.sh index 2478eb0e..575b4947 100755 --- a/bin/copyRawDataToPrm.sh +++ b/bin/copyRawDataToPrm.sh @@ -59,8 +59,7 @@ function contains() { function rsyncRuns() { local _rawDataItem="${1}" - local _filePrefix="${2}" - local _controlFileBase="${3}" + local _controlFileBase="${2}" local _controlFileBaseForFunction="${_controlFileBase}.${FUNCNAME[0]}" log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "Processing ${_rawDataItem} ..." # @@ -602,7 +601,7 @@ else do log4Bash 'INFO' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "Processing ${rawDataItem} ..." log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Checking if ${rawDataItem} is complete and ready to be copied to prm." - rsyncRuns "${rawDataItem}" "${filePrefix}" "${controlFileBase}/${rawDataItem}" + rsyncRuns "${rawDataItem}" "${controlFileBase}/${rawDataItem}" if [[ -e "${controlFileBase}/${rawDataItem}.rsyncRuns.finished" ]] then log4Bash 'TRACE' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "${controlFileBase}/${rawDataItem}.rsyncRuns.finished present." diff --git a/bin/demultiplexing.sh b/bin/demultiplexing.sh index 5ec0f2c9..39c046a3 100755 --- a/bin/demultiplexing.sh +++ b/bin/demultiplexing.sh @@ -172,7 +172,7 @@ log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Log files will be written # Sequencer is writing to this location: ${SEQ_DIR} # Looping through sub dirs to see if all files. # -log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "find "${SEQ_DIR}/" -mindepth 1 -maxdepth 1 -type d -o -type l" +log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "find ${SEQ_DIR}/ -mindepth 1 -maxdepth 1 -type d -o -type l" mapfile -t projects < <(find "${SEQ_DIR}/" -mindepth 1 -maxdepth 1 -type d -o -type l) for i in "${projects[@]}" diff --git a/bin/startPipeline.sh b/bin/startPipeline.sh index ac48c698..3612a153 100755 --- a/bin/startPipeline.sh +++ b/bin/startPipeline.sh @@ -195,7 +195,8 @@ function submitPipeline () { local _sequencingStartDateIndex local _sequencer local _sequencerIndex - local _runIndex + local _runIdIndex + local _runId local _flowcell local _flowcellIndex local _capturingKit @@ -260,7 +261,7 @@ function submitPipeline () { _capturingKitIndex=$((${sampleSheetColumnOffsets['capturingKit']} + 1)) _capturingKit=$(tail -n +2 "${TMP_ROOT_DIR}/projects/${_project}/${_run}/jobs/${project}.${SAMPLESHEET_EXT}" | awk -v capt="${_capturingKitIndex}" 'BEGIN {FS=","}{print $capt}' | awk 'BEGIN{FS="/"}{print $2}' | head -1) fi - _filePrefix="${_sequencingStartDate}_${_sequencer}_${_run}_${_flowcell}" + _filePrefix="${_sequencingStartDate}_${_sequencer}_${_runId}_${_flowcell}" # # Track and Trace: log that we will start running jobs on the cluster. # diff --git a/etc/umcg-gap.cfg b/etc/umcg-gap.cfg index a9ed1006..41214cdd 100644 --- a/etc/umcg-gap.cfg +++ b/etc/umcg-gap.cfg @@ -63,9 +63,9 @@ declare -A NOTIFY_FOR_PHASE_WITH_STATE=( ['pipeline:rejectedsamples']='email' ['pipelineTiming:failed']='email' ['calculateProjectMd5s:failed']='email' - ['copyProjectDataToPrm:failed']='trace/put/overview/copy_results_prm|email' - ['copyProjectDataToPrm:started']='trace/put/overview/copy_results_prm' - ['copyProjectDataToPrm:finished']='trace/put/overview/copy_results_prm|email' + ['copyProjectDataToPrm:failed']='trace/put/projects/copy_results_prm|email' + ['copyProjectDataToPrm:started']='trace/put/projects/copy_results_prm' + ['copyProjectDataToPrm:finished']='trace/put/projects/copy_results_prm|email' ['copyProjectDataToPrm:trace_putFromFile_projects.csv']='trace/putFromFile/projects/finishedDate' ['copyProjectDataToPrmTiming:failed']='email' ) \ No newline at end of file diff --git a/etc/umcg-gd.cfg b/etc/umcg-gd.cfg index 7a6d2a46..224e8407 100644 --- a/etc/umcg-gd.cfg +++ b/etc/umcg-gd.cfg @@ -65,9 +65,9 @@ declare -A NOTIFY_FOR_PHASE_WITH_STATE=( ['pipeline:rejectedsamples']='email' ['pipelineTiming:failed']='email' ['calculateProjectMd5s:failed']='email' - ['copyProjectDataToPrm:failed']='trace/put/overview/copy_results_prm|email' - ['copyProjectDataToPrm:started']='trace/put/overview/copy_results_prm' - ['copyProjectDataToPrm:finished']='trace/put/overview/copy_results_prm|email' + ['copyProjectDataToPrm:failed']='trace/put/projects/copy_results_prm|email' + ['copyProjectDataToPrm:started']='trace/put/projects/copy_results_prm' + ['copyProjectDataToPrm:finished']='trace/put/projects/copy_results_prm|email' ['copyProjectDataToPrm:trace_putFromFile_projects.csv']='trace/putFromFile/projects/finishedDate' ['copyProjectDataToPrmTiming:failed']='email' ) From c4daec8902f3b977c2afa9969182ead347c59b79 Mon Sep 17 00:00:00 2001 From: RoanKanninga Date: Wed, 20 May 2020 13:45:26 +0200 Subject: [PATCH 5/5] removed unncessary tab --- bin/startPipeline.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/startPipeline.sh b/bin/startPipeline.sh index 3612a153..71106d0e 100755 --- a/bin/startPipeline.sh +++ b/bin/startPipeline.sh @@ -505,7 +505,7 @@ do # Generate scripts (per sample sheet). # generateScripts "${project}" "${pipelineRun}" "${sampleType}" - # + # # Submit generated job scripts (per project). # if [[ -e "${TMP_ROOT_DIR}/logs/${project}/${pipelineRun}.generateScripts.finished" ]]