Skip to content

Commit

Permalink
Merge pull request #6 from UMCUGenetics/develop
Browse files Browse the repository at this point in the history
v1.1.2
  • Loading branch information
rernst authored Sep 9, 2019
2 parents 70e5405 + 6947625 commit a9397af
Show file tree
Hide file tree
Showing 16 changed files with 483 additions and 271 deletions.
20 changes: 15 additions & 5 deletions clarity_epp.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,20 @@ def export_manual_pipetting(args):
elif args.type == 'multiplex_sequence_pool':
clarity_epp.export.manual_pipetting.samplesheet_multiplex_sequence_pool(lims, args.process_id, args.output_file)


def export_ped_file(args):
"""Export ped file."""
clarity_epp.export.ped.create_file(lims, args.process_id, args.output_file)


def export_samplelist(args):
"""Generate samplelist."""
clarity_epp.export.samplelist.removed_samples(lims, args.output_file)
def export_merge_file(args):
"""Export merge file."""
clarity_epp.export.merge.create_file(lims, args.process_id, args.output_file)


def export_removed_samples(args):
"""Export removed sampels table."""
clarity_epp.export.sample.removed_samples(lims, args.output_file)


def export_tapestation(args):
Expand Down Expand Up @@ -208,6 +214,10 @@ def placement_complete_step(args):
parser_export_ped.add_argument('process_id', help='Clarity lims process id')
parser_export_ped.set_defaults(func=export_ped_file)

parser_export_ped = subparser_export.add_parser('merge', help='Export merge file.', parents=[output_parser])
parser_export_ped.add_argument('process_id', help='Clarity lims process id')
parser_export_ped.set_defaults(func=export_merge_file)

parser_export_workflow = subparser_export.add_parser('workflow', help='Export workflow result file.', parents=[output_parser])
parser_export_workflow.add_argument('type', choices=['lab', 'data_analysis'], help='Workflow type')
parser_export_workflow.add_argument('process_id', help='Clarity lims process id')
Expand All @@ -218,8 +228,8 @@ def placement_complete_step(args):
parser_export_illumina.add_argument('artifact_id', help='Clarity lims samplesheet artifact id')
parser_export_illumina.set_defaults(func=export_illumina)

parser_export_samplelist = subparser_export.add_parser('samplelist', help='Export samplelist.', parents=[output_parser])
parser_export_samplelist.set_defaults(func=export_samplelist)
parser_export_removed_samples = subparser_export.add_parser('removed_samples', help='Export removed sampels table.', parents=[output_parser])
parser_export_removed_samples.set_defaults(func=export_removed_samples)

# Sample upload
parser_upload = subparser.add_parser('upload', help='Upload samples or results to clarity lims')
Expand Down
16 changes: 9 additions & 7 deletions clarity_epp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ def get_sequence_name(sample):
except KeyError: # None DX sample, use sample.name as sequence name.
sequence_name = sample.name
else:
sequence_name = '{familienummer}{fam_status}{sex}{monsternummer}'.format(
familienummer=sample.udf['Dx Familienummer'],
fam_status=fam_status,
sex=sex,
monsternummer=sample.udf['Dx Monsternummer']
)

if not sample.name.startswith(sample.udf['Dx Familienummer']):
sequence_name = '{familienummer}{fam_status}{sex}{monsternummer}'.format(
familienummer=sample.udf['Dx Familienummer'],
fam_status=fam_status,
sex=sex,
monsternummer=sample.udf['Dx Monsternummer']
)
else:
sequence_name = sample.name
return sequence_name


Expand Down
3 changes: 2 additions & 1 deletion clarity_epp/export/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
import illumina
import labels
import manual_pipetting
import merge
import ped
import samplelist
import sample
import tapestation
import tecan
import workflow
75 changes: 56 additions & 19 deletions clarity_epp/export/illumina.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Illumina export functions."""
import re

from genologics.entities import Process, Artifact

from .. import get_sequence_name
import utils
import config


def update_samplesheet(lims, process_id, artifact_id, output_file):
Expand All @@ -22,16 +22,24 @@ def update_samplesheet(lims, process_id, artifact_id, output_file):
families[family] = {'samples': [], 'NICU': False, 'project_type': 'unknown_project', 'split_project_type': False}

# Update family information
if sample.udf['Dx NICU Spoed']:
families[family]['NICU'] = True
project_type = 'NICU_{0}'.format(sample.udf['Dx Familienummer'])
families[family]['project_type'] = project_type
families[family]['split_project_type'] = False

elif 'elidS30409818' in sample.udf['Dx Protocolomschrijving'] and not families[family]['NICU']:
project_type = 'CREv2'
families[family]['project_type'] = project_type
families[family]['split_project_type'] = True
if sample.udf['Dx Onderzoeksreden'] == 'Research': # Dx research sample
for onderzoeksindicatie in config.research_onderzoeksindicatie_project:
if sample.udf['Dx Onderzoeksindicatie'] == onderzoeksindicatie:
project_type = config.research_onderzoeksindicatie_project[onderzoeksindicatie]
families[family]['project_type'] = project_type
families[family]['split_project_type'] = False
break

else: # Dx clinic sample
if sample.udf['Dx NICU Spoed']:
families[family]['NICU'] = True
project_type = 'NICU_{0}'.format(sample.udf['Dx Familienummer'])
families[family]['project_type'] = project_type
families[family]['split_project_type'] = False
elif 'elidS30409818' in sample.udf['Dx Protocolomschrijving'] and not families[family]['NICU']:
project_type = 'CREv2'
families[family]['project_type'] = project_type
families[family]['split_project_type'] = True

else:
family = sample.project.name
Expand Down Expand Up @@ -72,21 +80,46 @@ def update_samplesheet(lims, process_id, artifact_id, output_file):
else:
family_project_type['index'] += 1

# Check sequencer type -> NextSeq runs need to reverse complement 'index2' for dual barcodes and 'index' for single barcodes.
if 'nextseq' in process.type.name.lower():
nextseq_run = True
else:
nextseq_run = False

# Edit clarity samplesheet
header = '' # empty until [data] section
sample_header = '' # empty until [data] section
settings_section = False
samplesheet_artifact = Artifact(lims, id=artifact_id)
file_id = samplesheet_artifact.files[0].id

for line in lims.get_file_contents(id=file_id).rstrip().split('\n'):
if line.startswith('Sample_ID'): # Samples header line
header = line.rstrip().split(',')
if line.startswith('[Settings]'):
output_file.write('{line}\n'.format(line=line))
output_file.write('Read1EndWithCycle,{value}\n'.format(value=process.udf['Read 1 Cycles']-1))
output_file.write('Read2EndWithCycle,{value}\n'.format(value=process.udf['Read 2 Cycles']-1))
settings_section = True

elif line.startswith('[Data]') and not settings_section:
output_file.write('[Settings]\n')
output_file.write('Read1EndWithCycle,{value}\n'.format(value=process.udf['Read 1 Cycles']-1))
output_file.write('Read2EndWithCycle,{value}\n'.format(value=process.udf['Read 2 Cycles']-1))
output_file.write('{line}\n'.format(line=line))

elif line.startswith('Sample_ID'): # Samples header line
sample_header = line.rstrip().split(',')
sample_id_index = sample_header.index('Sample_ID')
sample_name_index = sample_header.index('Sample_Name')
sample_project_index = sample_header.index('Sample_Project')

if 'index2' in sample_header:
index_index = sample_header.index('index2')
else:
index_index = sample_header.index('index')

output_file.write('{line}\n'.format(line=line))

elif header: # Samples header seen, so continue with samples.
elif sample_header: # Samples header seen, so continue with samples.
data = line.rstrip().split(',')
sample_id_index = header.index('Sample_ID')
sample_name_index = header.index('Sample_Name')
sample_project_index = header.index('Sample_Project')

# Set Sample_Project
try:
Expand All @@ -97,6 +130,10 @@ def update_samplesheet(lims, process_id, artifact_id, output_file):
# Overwrite Sample_ID with Sample_name to get correct conversion output folder structure
data[sample_id_index] = data[sample_name_index]

# Reverse complement index for NextSeq runs
if nextseq_run:
data[index_index] = utils.reverse_complement(data[index_index])

output_file.write('{line}\n'.format(line=','.join(data)))
else: # Leave other lines untouched.
output_file.write('{line}\n'.format(line=line))
Loading

0 comments on commit a9397af

Please sign in to comment.