diff --git a/bin/arrayConversion.sh b/bin/arrayConversion.sh index be09d50b..fba8f338 100755 --- a/bin/arrayConversion.sh +++ b/bin/arrayConversion.sh @@ -193,6 +193,20 @@ else mkdir "${SCR_ROOT_DIR}/logs/${project}/" fi export JOB_CONTROLE_FILE_BASE="${SCR_ROOT_DIR}/logs/${project}/run01.arrayConversion" + if [[ -f "${JOB_CONTROLE_FILE_BASE}.finished" ]] + then + log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "Found ${JOB_CONTROLE_FILE_BASE}.finished: Skipping finished ${project}." + continue + elif [[ -f "${JOB_CONTROLE_FILE_BASE}.started" ]] + then + log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "Found ${JOB_CONTROLE_FILE_BASE}.started: Skipping ${project}, which is already getting processed." + continue + elif [[ ! -f "${SCR_ROOT_DIR}/Samplesheets/${project}.csv" ]] + then + log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "No samplesheet found: skipping ${project}." + continue + fi + export TRACE_FAILED="${SCR_ROOT_DIR}/logs/${project}/trace.failed" log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "Processing project ${project} ..." diff --git a/bin/copyRawDataToPrm.sh b/bin/copyRawDataToPrm.sh index 356f75cc..575b4947 100755 --- a/bin/copyRawDataToPrm.sh +++ b/bin/copyRawDataToPrm.sh @@ -59,8 +59,7 @@ function contains() { function rsyncRuns() { local _rawDataItem="${1}" - local _filePrefix="${2}" - local _controlFileBase="${3}" + local _controlFileBase="${2}" local _controlFileBaseForFunction="${_controlFileBase}.${FUNCNAME[0]}" log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "Processing ${_rawDataItem} ..." # @@ -75,20 +74,7 @@ function rsyncRuns() { log4Bash 'TRACE' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "${_controlFileBaseForFunction}.finished not present -> Continue..." printf '' > "${_controlFileBaseForFunction}.started" fi - # - # Determine whether an rsync is required for this run, which is the case when - # raw data production has finished successfully and this copy script has not. - # - log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Checking if ${rawDataItem} is complete and ready to be copied to prm." - if ssh "${DATA_MANAGER}"@"${sourceServerFQDN}" test -e "${SCR_ROOT_DIR}/logs/${_filePrefix}/${STEPBEFOREFINISHEDFILE}" - then - log4Bash 'TRACE' "${LINENO}" "${FUNCNAME:-main}" '0' "${DATA_MANAGER}@${sourceServerFQDN}:${SCR_ROOT_DIR}/logs/${_filePrefix}/${STEPBEFOREFINISHEDFILE} present." - printf '' > "${_controlFileBaseForFunction}.started" - else - log4Bash 'TRACE' "${LINENO}" "${FUNCNAME:-main}" '0' "${DATA_MANAGER}@${sourceServerFQDN}:${SCR_ROOT_DIR}/logs/${_filePrefix}/${STEPBEFOREFINISHEDFILE} absent." - log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Skipping ${rawDataItem}, which is not ready for transfer to prm yet." - return - fi + # # Track and Trace: log that we will start rsyncing to prm. # @@ -564,6 +550,18 @@ else log4Bash 'INFO' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "Processing run ${filePrefix} ..." fi + # + # Determine whether an rsync is required for this run, which is the case when + # raw data production has finished successfully and this copy script has not. + # + if ssh "${DATA_MANAGER}"@"${sourceServerFQDN}" test -e "${SCR_ROOT_DIR}/logs/${filePrefix}/${STEPBEFOREFINISHEDFILE}" + then + log4Bash 'TRACE' "${LINENO}" "${FUNCNAME:-main}" '0' "${DATA_MANAGER}@${sourceServerFQDN}:${SCR_ROOT_DIR}/logs/${filePrefix}/${STEPBEFOREFINISHEDFILE} present." + else + log4Bash 'TRACE' "${LINENO}" "${FUNCNAME:-main}" '0' "${DATA_MANAGER}@${sourceServerFQDN}:${SCR_ROOT_DIR}/logs/${filePrefix}/${STEPBEFOREFINISHEDFILE} absent." + continue + fi + # shellcheck disable=SC2174 mkdir -m 2770 -p "${PRM_ROOT_DIR}/logs/" # shellcheck disable=SC2174 @@ -603,7 +601,7 @@ else do log4Bash 'INFO' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "Processing ${rawDataItem} ..." log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Checking if ${rawDataItem} is complete and ready to be copied to prm." - rsyncRuns "${rawDataItem}" "${filePrefix}" "${controlFileBase}/${rawDataItem}" + rsyncRuns "${rawDataItem}" "${controlFileBase}/${rawDataItem}" if [[ -e "${controlFileBase}/${rawDataItem}.rsyncRuns.finished" ]] then log4Bash 'TRACE' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "${controlFileBase}/${rawDataItem}.rsyncRuns.finished present." @@ -627,11 +625,11 @@ else then log4Bash 'TRACE' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "${controlFileBase}/${runPrefix}.splitSamplesheetPerProject.finished present." rm -f "${JOB_CONTROLE_FILE_BASE}.failed" - log4Bash 'INFO' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "Finished processing ${runPrefix}." + log4Bash 'INFO' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "Finished processing ${filePrefix}." mv -v "${JOB_CONTROLE_FILE_BASE}."{started,finished} else log4Bash 'TRACE' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "${controlFileBase}/${runPrefix}.splitSamplesheetPerProject.finished absent -> splitSamplesheetPerProject failed." - log4Bash 'ERROR' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "Failed to process ${runPrefix}." + log4Bash 'ERROR' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "Failed to process ${filePrefix}." mv -v "${JOB_CONTROLE_FILE_BASE}."{started,failed} fi done diff --git a/bin/demultiplexing.sh b/bin/demultiplexing.sh index e2d49b11..39c046a3 100755 --- a/bin/demultiplexing.sh +++ b/bin/demultiplexing.sh @@ -172,8 +172,8 @@ log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Log files will be written # Sequencer is writing to this location: ${SEQ_DIR} # Looping through sub dirs to see if all files. # -log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "ls -1 -d ${SEQ_DIR}/*/" -mapfile -t projects < <(find "${SEQ_DIR}/" -mindepth 1 -maxdepth 1 -type d) +log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "find ${SEQ_DIR}/ -mindepth 1 -maxdepth 1 -type d -o -type l" +mapfile -t projects < <(find "${SEQ_DIR}/" -mindepth 1 -maxdepth 1 -type d -o -type l) for i in "${projects[@]}" do diff --git a/bin/notifications.sh b/bin/notifications.sh index 874cc6c6..5e1706e9 100755 --- a/bin/notifications.sh +++ b/bin/notifications.sh @@ -209,7 +209,6 @@ function notification() { method="${traceArray[1]}" entity="status_${traceArray[2]}" field="${traceArray[3]}" - log4Bash 'TRACE' "${LINENO}" "${FUNCNAME:-main}" '0' "PROCESSING: ${_phase}:${_state} <${method}> <${entity}> <${field}>" if [ -e "${_tracingUploadFile}" ] then if grep -q "${_run}.${_phase}_${_state}" "${_tracingUploadFile}" @@ -247,7 +246,7 @@ function notification() { then log4Bash 'TRACE' "${LINENO}" "${FUNCNAME:-main}" '0' \ "adding ${_run}.${_phase}.${_state} to ${_tracingUploadFile}" - echo -e "${_run}.${_phase}.${_state}\t$(date +%FT%T%z)" >> "${_tracingUploadFile}" + echo -e "${_run}.${_phase}_${_state}\t$(date +%FT%T%z)" >> "${_tracingUploadFile}" else log4Bash 'TRACE' "${LINENO}" "${FUNCNAME:-main}" '0' "Failed in updating ${_run}.${_phase}.${_state} to ${MOLGENISSERVER}" fi diff --git a/bin/startPipeline.sh b/bin/startPipeline.sh index f671ed4f..71106d0e 100755 --- a/bin/startPipeline.sh +++ b/bin/startPipeline.sh @@ -195,8 +195,8 @@ function submitPipeline () { local _sequencingStartDateIndex local _sequencer local _sequencerIndex - local _run - local _runIndex + local _runIdIndex + local _runId local _flowcell local _flowcellIndex local _capturingKit @@ -247,8 +247,8 @@ function submitPipeline () { fi if [[ -n "${sampleSheetColumnOffsets['run']+isset}" ]] then - _runIndex=$((${sampleSheetColumnOffsets['run']} + 1)) - _run=$(tail -n +2 "${TMP_ROOT_DIR}/projects/${_project}/${_run}/jobs/${project}.${SAMPLESHEET_EXT}" | awk -v run="${_runIndex}" 'BEGIN {FS=","}{print $run}' | head -1) + _runIdIndex=$((${sampleSheetColumnOffsets['run']} + 1)) + _runId=$(tail -n +2 "${TMP_ROOT_DIR}/projects/${_project}/${_run}/jobs/${project}.${SAMPLESHEET_EXT}" | awk -v runId="${_runIdIndex}" 'BEGIN {FS=","}{print $runId}' | head -1) fi if [[ -n "${sampleSheetColumnOffsets['flowcell']+isset}" ]] then @@ -261,7 +261,7 @@ function submitPipeline () { _capturingKitIndex=$((${sampleSheetColumnOffsets['capturingKit']} + 1)) _capturingKit=$(tail -n +2 "${TMP_ROOT_DIR}/projects/${_project}/${_run}/jobs/${project}.${SAMPLESHEET_EXT}" | awk -v capt="${_capturingKitIndex}" 'BEGIN {FS=","}{print $capt}' | awk 'BEGIN{FS="/"}{print $2}' | head -1) fi - _filePrefix="${_sequencingStartDate}_${_sequencer}_${_run}_${_flowcell}" + _filePrefix="${_sequencingStartDate}_${_sequencer}_${_runId}_${_flowcell}" # # Track and Trace: log that we will start running jobs on the cluster. # diff --git a/etc/umcg-atd.cfg b/etc/umcg-atd.cfg index 5f18cf6e..1456622d 100644 --- a/etc/umcg-atd.cfg +++ b/etc/umcg-atd.cfg @@ -17,8 +17,8 @@ declare -a NOTIFICATION_ORDER_PHASE_WITH_STATE=( 'demultiplexing:failed' 'demultiplexing:finished' 'demultiplexingTiming:failed' -'copyRawDataToPrm:trace_putFromFile_overview.csv' 'copyRawDataToPrm:trace_post_projects.csv' +'copyRawDataToPrm:trace_putFromFile_overview.csv' 'copyRawDataToPrm:started' 'copyRawDataToPrm:failed' 'copyRawDataToPrm:finished' diff --git a/etc/umcg-gap.cfg b/etc/umcg-gap.cfg index 0120a988..41214cdd 100644 --- a/etc/umcg-gap.cfg +++ b/etc/umcg-gap.cfg @@ -16,8 +16,8 @@ declare -a NOTIFICATION_ORDER_PHASE_WITH_STATE=( 'arrayConversion:started' 'arrayConversion:failed' 'arrayConversion:finished' -'copyRawDataToPrm:trace_putFromFile_overview.csv' 'copyRawDataToPrm:trace_post_projects.csv' +'copyRawDataToPrm:trace_putFromFile_overview.csv' 'copyRawDataToPrm:started' 'copyRawDataToPrm:failed' 'copyRawDataToPrm:finished' @@ -63,9 +63,9 @@ declare -A NOTIFY_FOR_PHASE_WITH_STATE=( ['pipeline:rejectedsamples']='email' ['pipelineTiming:failed']='email' ['calculateProjectMd5s:failed']='email' - ['copyProjectDataToPrm:failed']='trace/put/overview/copy_results_prm|email' - ['copyProjectDataToPrm:started']='trace/put/overview/copy_results_prm' - ['copyProjectDataToPrm:finished']='trace/put/overview/copy_results_prm|email' + ['copyProjectDataToPrm:failed']='trace/put/projects/copy_results_prm|email' + ['copyProjectDataToPrm:started']='trace/put/projects/copy_results_prm' + ['copyProjectDataToPrm:finished']='trace/put/projects/copy_results_prm|email' ['copyProjectDataToPrm:trace_putFromFile_projects.csv']='trace/putFromFile/projects/finishedDate' ['copyProjectDataToPrmTiming:failed']='email' ) \ No newline at end of file diff --git a/etc/umcg-gd.cfg b/etc/umcg-gd.cfg index cb0b14ee..224e8407 100644 --- a/etc/umcg-gd.cfg +++ b/etc/umcg-gd.cfg @@ -17,8 +17,8 @@ declare -a NOTIFICATION_ORDER_PHASE_WITH_STATE=( 'demultiplexing:failed' 'demultiplexing:finished' 'demultiplexingTiming:failed' -'copyRawDataToPrm:trace_putFromFile_overview.csv' 'copyRawDataToPrm:trace_post_projects.csv' +'copyRawDataToPrm:trace_putFromFile_overview.csv' 'copyRawDataToPrm:started' 'copyRawDataToPrm:failed' 'copyRawDataToPrm:finished' @@ -65,9 +65,9 @@ declare -A NOTIFY_FOR_PHASE_WITH_STATE=( ['pipeline:rejectedsamples']='email' ['pipelineTiming:failed']='email' ['calculateProjectMd5s:failed']='email' - ['copyProjectDataToPrm:failed']='trace/put/overview/copy_results_prm|email' - ['copyProjectDataToPrm:started']='trace/put/overview/copy_results_prm' - ['copyProjectDataToPrm:finished']='trace/put/overview/copy_results_prm|email' + ['copyProjectDataToPrm:failed']='trace/put/projects/copy_results_prm|email' + ['copyProjectDataToPrm:started']='trace/put/projects/copy_results_prm' + ['copyProjectDataToPrm:finished']='trace/put/projects/copy_results_prm|email' ['copyProjectDataToPrm:trace_putFromFile_projects.csv']='trace/putFromFile/projects/finishedDate' ['copyProjectDataToPrmTiming:failed']='email' ) diff --git a/etc/umcg-gsad.cfg b/etc/umcg-gsad.cfg index 3db7b337..641e99d1 100644 --- a/etc/umcg-gsad.cfg +++ b/etc/umcg-gsad.cfg @@ -2,7 +2,7 @@ GROUP='umcg-gsad' LAB='internal' PIPELINECOLUMN='pipeline' PROJECTCOLUMN='Project' -STEPBEFOREFINISHEDFILE='run01.AGCT.finished' +STEPBEFOREFINISHEDFILE='run01.arrayConversion.finished' PRMRAWDATA='array' declare -a RAWDATATYPES=( 'array/IDAT' @@ -63,9 +63,9 @@ declare -A NOTIFY_FOR_PHASE_WITH_STATE=( ['pipeline:rejectedsamples']='email' ['pipelineTiming:failed']='email' ['calculateProjectMd5s:failed']='email' - ['copyProjectDataToPrm:failed']='trace/put/overview/copy_results_prm|email' - ['copyProjectDataToPrm:started']='trace/put/overview/copy_results_prm' - ['copyProjectDataToPrm:finished']='trace/put/overview/copy_results_prm|email' + ['copyProjectDataToPrm:failed']='trace/put/projects/copy_results_prm|email' + ['copyProjectDataToPrm:started']='trace/put/projects/copy_results_prm' + ['copyProjectDataToPrm:finished']='trace/put/projects/copy_results_prm|email' ['copyProjectDataToPrm:trace_putFromFile_projects.csv']='trace/putFromFile/projects/finishedDate' ['copyProjectDataToPrmTiming:failed']='email' ) \ No newline at end of file diff --git a/lib/sharedFunctions.bash b/lib/sharedFunctions.bash index 84bec414..4a5fbe0c 100644 --- a/lib/sharedFunctions.bash +++ b/lib/sharedFunctions.bash @@ -204,7 +204,7 @@ function trackAndTracePostFromFile() { || { log4Bash 'ERROR' "${LINENO}" "${FUNCNAME:-main}" '0' "Failed to login at ${MOLGENISSERVER}." \ 2>&1 | tee -a "${TRACE_FAILED}" \ - && return + && return 1 } _token="${_curlResponse:10:32}" # @@ -274,7 +274,7 @@ function trackAndTracePut() { || { log4Bash 'ERROR' "${LINENO}" "${FUNCNAME:-main}" '0' "Failed to login at ${MOLGENISSERVER}." \ 2>&1 | tee -a "${TRACE_FAILED}" \ - && return + && return 1 } _token="${_curlResponse:10:32}" #