-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #204 from pneerincx/master
Improved checkSamplesheet procedure
- Loading branch information
Showing
4 changed files
with
194 additions
and
105 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
#!/usr/bin/env python | ||
|
||
import argparse | ||
import os | ||
import csv | ||
import sys | ||
import re | ||
from collections import defaultdict | ||
from os.path import basename | ||
parser = argparse.ArgumentParser(description='Process commandline opts.') | ||
parser.add_argument("--input") | ||
parser.add_argument("--log") | ||
args = parser.parse_args() | ||
|
||
columns = defaultdict(list) | ||
f = open(args.input, 'r') # opens the samplesheet file. | ||
print("INFO: input = " + args.input) | ||
reader = csv.DictReader(f) # creates the reader object. | ||
inputFileName=(basename(args.input)) | ||
inputFileNameBase = re.sub('\..*$', '', inputFileName) | ||
|
||
# | ||
# Parse meta-data from the filename. | ||
# | ||
inputFileNameComponents = inputFileNameBase.split('_') | ||
sequencingStartDate = inputFileNameComponents[0] | ||
sequencer= inputFileNameComponents[1] | ||
run = inputFileNameComponents[2] | ||
flowcell = inputFileNameComponents[3] | ||
|
||
if len(inputFileNameComponents) > 4: | ||
for i in range(4,len(inputFileNameComponents)): | ||
flowcell+="_"+ str(inputFileNameComponents[i]) | ||
|
||
w = open(args.log, 'w') | ||
print("INFO: log = " + args.log) | ||
sanityCheckOk=True | ||
alreadyErrored=False | ||
hasRows = False | ||
listOfErrors=[] | ||
|
||
# | ||
# Iterate over the rows of the file. | ||
# | ||
for number, row in enumerate(reader,1): | ||
hasRows = True | ||
# | ||
# Check if the required columns are present. | ||
# | ||
for columnName in ('externalSampleID','project','sequencer','sequencingStartDate','flowcell','run','flowcell','lane','seqType','prepKit','capturingKit','barcode','barcodeType'): | ||
if columnName not in row.keys(): | ||
sanityCheckOk=False | ||
if not alreadyErrored: | ||
listOfErrors.append('ERROR: Required column is missing (or has a trailing space): ' + columnName + '.') | ||
alreadyErrored=True | ||
else: | ||
if row[columnName] == "": | ||
sanityCheckOk=False | ||
if columnName in ('capturingKit','barcode','barcodeType'): | ||
listOfErrors.append('ERROR on line ' + str(number) + ': Variable ' + columnName + ' is empty! Please fill in "None" (to make sure it is not missing).') | ||
else: | ||
listOfErrors.append('ERROR on line ' + str(number) + ': Variable ' + columnName + ' is empty!') | ||
# | ||
# Check if the data inside the file matches the expected filename. | ||
# | ||
if row['sequencer'] != sequencer and 'sequencer' in row.keys(): | ||
sanityCheckOk=False | ||
listOfErrors.append('ERROR on line ' + str(number) + ': sequencer value in samplesheet (' + row['sequencer'] + ') does not match sequencer in filename (' + sequencer + ').') | ||
if row['sequencingStartDate'] != sequencingStartDate and 'sequencingStartDate' in row.keys(): | ||
sanityCheckOk=False | ||
listOfErrors.append('ERROR on line ' + str(number) + ': sequencingStartDate value in samplesheet (' + row['sequencingStartDate'] + ') does not match sequencingStartDate in filename (' + sequencingStartDate + ').') | ||
if row['run'] != run and 'run' in row.keys(): | ||
sanityCheckOk=False | ||
listOfErrors.append('ERROR on line ' + str(number) + ': run value in samplesheet (' + row['run'] + ') does not match run in filename (' + run + ').') | ||
if row['flowcell'] != flowcell and 'flowcell' in row.keys(): | ||
sanityCheckOk=False | ||
listOfErrors.append('ERROR on line ' + str(number) + ': flowcell value in samplesheet ' + row['flowcell'] + ' does not match flowcell in filename (' + flowcell + ').') | ||
|
||
f.close() | ||
|
||
if not hasRows: | ||
sanityCheckOk=False | ||
print("File is empty?!") | ||
listOfErrors.append("File is empty?!") | ||
|
||
if sanityCheckOk: | ||
w.write("OK") | ||
w.close() | ||
sys.exit(0) | ||
else: | ||
print('\n'.join(listOfErrors)) | ||
w.write('\n'.join(listOfErrors)) | ||
w.close() | ||
sys.exit(1) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#!/bin/bash | ||
|
||
set -eu | ||
|
||
SCRIPT_NAME="$(basename ${0})" | ||
SCRIPT_NAME="${SCRIPT_NAME%.*sh}" | ||
INSTALLATION_DIR="$(cd -P "$(dirname "${0}")/.." && pwd)/" | ||
declare sampleSheetsDir="${INSTALLATION_DIR}"'Samplesheets/' | ||
|
||
echo "INFO: Processing samplesheets from ${sampleSheetsDir}/new/..." | ||
|
||
declare -a sampleSheets=($(find "${sampleSheetsDir}/new/" -name '*.csv')) | ||
if [[ "${#sampleSheets[@]:-0}" -eq '0' ]] | ||
then | ||
echo "WARN: No samplesheets found in ${sampleSheetsDir}/new/." | ||
exit 0 | ||
else | ||
echo "DEBUG: samplesheets found: ${sampleSheets[@]}." | ||
fi | ||
|
||
for sampleSheet in "${sampleSheets[@]}" | ||
do | ||
# | ||
# Create a copy of the original, so we preserve the original owner | ||
# and can check who to notify when something is wrong with the samplesheet. | ||
# | ||
cp "${sampleSheet}"{,.converted} | ||
|
||
# | ||
# Make sure | ||
# 1. The last line ends with a line end character. | ||
# 2. We have the right line end character: convert any carriage return (\r) to newline (\n). | ||
# 3. We remove empty lines. | ||
# | ||
printf '\n' >> "${sampleSheet}.converted" | ||
sed -i 's/\r/\n/g' "${sampleSheet}.converted" | ||
sed -i '/^\s*$/d' "${sampleSheet}.converted" | ||
# | ||
# Parse content with Python sanity check script. | ||
# | ||
check='failed' # default. | ||
fileName=$(basename "${sampleSheet}") | ||
if "${sampleSheetsDir}"/"${SCRIPT_NAME}".py --input "${sampleSheet}.converted" --log "${sampleSheet}.converted.log" | ||
then | ||
check=$(cat "${sampleSheet}.converted.log") | ||
fi | ||
|
||
if [[ "${check}" == 'OK' ]] | ||
then | ||
echo "INFO: Samplesheet is OK, moving ${sampleSheet}.converted to ${sampleSheetsDir}/${fileName}..." | ||
mv "${sampleSheet}.converted" "${sampleSheetsDir}/${fileName}" | ||
rm -f "${sampleSheet}"* # cleanup. | ||
else | ||
echo "ERROR: Samplesheet ${fileName} is not correct, see ${sampleSheet}.converted.log." | ||
if [[ -e "${sampleSheet}.converted.log.mailed" ]] | ||
then | ||
echo "INFO: Notification was already sent." | ||
else | ||
echo "INFO: Trying to send email notification ..." | ||
# | ||
# Get email addresses for list of users that should always receive mail. | ||
# | ||
declare mailAddress='' | ||
if [[ -e "${INSTALLATION_DIR}/logs/${SCRIPT_NAME}.mailinglist" ]] | ||
then | ||
mailAddress="$(cat "${INSTALLATION_DIR}/logs/${SCRIPT_NAME}.mailinglist" | tr '\n' ' ')" | ||
else | ||
printf '%s\n' "ERROR: ${INSTALLATION_DIR}/logs/${SCRIPT_NAME}.mailinglist is missing on $(hostname -s)." \ | ||
| mail -s "Samplesheet is wrong, but we cannot send email to the relevant users." '[email protected]' | ||
fi | ||
# | ||
# Get email address for owner of the samplesheet. | ||
# | ||
fileOwner=$(stat -c '%U' "${sampleSheet}" | tr -d '\n') | ||
mailAddressOwner="$(getent passwd "${fileOwner}" | cut -d ':' -s -f 5)" | ||
if [[ -z "${mailAddressOwner:-}" ]] | ||
then | ||
printf '%s\n' "WARN: We do not have an email address for this user: ${fileOwner}." \ | ||
| mail -s "Samplesheet is wrong on $(hostname -s), but we cannot email the owner." "${mailAddress:-}" | ||
else | ||
mailAddress="${mailAddress:-} ${mailAddressOwner:-}" | ||
fi | ||
# | ||
# Prepare message content. | ||
# | ||
header="Dear ${fileOwner}," | ||
body="${SCRIPT_NAME} detected an error when parsing ${sampleSheet} on $(hostname -s): $(<"${sampleSheet}.converted.log")" | ||
footer='Cheers from the GCC.' | ||
# | ||
# Send email to notify users. | ||
# | ||
printf '%s\n\n%s\n\n%s\n' "${header}" "${body}" "${footer}" \ | ||
| mail -s "Samplesheet is wrong on $(hostname -s)." "${mailAddress:-}" | ||
touch "${sampleSheet}.converted.log.mailed" | ||
fi | ||
fi | ||
done | ||
|
||
echo "INFO: finished processing samplesheets." |
This file was deleted.
Oops, something went wrong.