Skip to content

Commit

Permalink
Merge pull request #217 from fls-bioinformatics-core/platforms-includ…
Browse files Browse the repository at this point in the history
…e-run-completion-files

Update 'platforms' module to include info on run completion files
  • Loading branch information
pjbriggs authored Feb 16, 2024
2 parents f1a9eb9 + b9b968a commit 07d073c
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 13 deletions.
22 changes: 21 additions & 1 deletion bcftbx/IlluminaData.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# IlluminaData.py: module for handling data about Illumina sequencer runs
# Copyright (C) University of Manchester 2012-2022 Peter Briggs
# Copyright (C) University of Manchester 2012-2024 Peter Briggs
#
########################################################################
#
Expand Down Expand Up @@ -224,6 +224,26 @@ def cycles(self):
return None
return ncycles

@property
def complete(self):
"""
Check if run is complete
Returns:
Boolean: True if run is complete (i.e. all appropriate
sentinel files are present), False if not (i.e.
some sentiel files are missing).
"""
# Acquire run completion files
try:
files = platforms.RUN_COMPLETION_FILES[self.platform]
except KeyError:
# Fallback to default
files = platforms.RUN_COMPLETION_FILES['default']
# Check if all are present
return all([os.path.exists(os.path.join(self.run_dir,f))
for f in files])

class IlluminaRunInfo:
"""Class for examining Illumina RunInfo.xml file
Expand Down
24 changes: 13 additions & 11 deletions bcftbx/mock.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# mock.py: module providing mock Illumina data for testing
# Copyright (C) University of Manchester 2012-2022 Peter Briggs
# Copyright (C) University of Manchester 2012-2024 Peter Briggs
#
########################################################################

Expand Down Expand Up @@ -42,6 +42,7 @@
from .IlluminaData import IlluminaFastq
from .IlluminaData import SampleSheet
from .TabFile import TabFile
from .platforms import get_run_completion_files
from .utils import OrderedDictionary
from .utils import mkdir

Expand Down Expand Up @@ -754,7 +755,7 @@ class MockIlluminaRun:
def __init__(self,name,platform,top_dir=None,
ntiles=None,bases_mask=None,
sample_sheet_content=None,
flowcell_mode=None):
flowcell_mode=None,complete=True):
"""
Create a new MockIlluminaRun instance
Expand All @@ -777,6 +778,9 @@ def __init__(self,name,platform,top_dir=None,
be used to generate a sample sheet
flowcell_mode (str): optionally specify the flow cell
mode to be included in the run parameters
complete (bool): default is to include the appropriate
run completion files in the mock run; set to False to
omit these files
"""
self._created = False
self._name = name
Expand All @@ -785,6 +789,7 @@ def __init__(self,name,platform,top_dir=None,
else:
self._top_dir = os.getcwd()
self._platform = platform
self._complete = bool(complete)
# Set defaults for platform
if self._platform == "miniseq":
# MiniSeq
Expand All @@ -801,7 +806,7 @@ def __init__(self,name,platform,top_dir=None,
self._include_sample_sheet = False
self._flowcell_mode = None
self._rta_version = "2.11.4.0"
self._completion_files = ("RTAComplete.txt",)
self._completion_files = get_run_completion_files("miniseq")
elif self._platform == "miseq":
# MISeq
self._nlanes = 1
Expand All @@ -817,7 +822,7 @@ def __init__(self,name,platform,top_dir=None,
self._include_sample_sheet = True
self._flowcell_mode = None
self._rta_version = "2.11.4.0"
self._completion_files = ("RTAComplete.txt",)
self._completion_files = get_run_completion_files("miseq")
elif self._platform == "hiseq":
# HISeq
self._nlanes = 8
Expand All @@ -833,7 +838,7 @@ def __init__(self,name,platform,top_dir=None,
self._include_sample_sheet = True
self._flowcell_mode = None
self._rta_version = "2.11.4.0"
self._completion_files = None
self._completion_files = get_run_completion_files("hiseq")
elif self._platform == "nextseq":
# NextSeq
self._nlanes = 4
Expand All @@ -849,8 +854,7 @@ def __init__(self,name,platform,top_dir=None,
self._include_sample_sheet = False
self._flowcell_mode = None
self._rta_version = "2.11.4.0"
self._completion_files = ("CopyComplete.txt",
"RTAComplete.txt",)
self._completion_files = get_run_completion_files("nextseq")
elif self._platform == "novaseq":
# NovaSeq
self._nlanes = 2
Expand All @@ -866,9 +870,7 @@ def __init__(self,name,platform,top_dir=None,
self._include_sample_sheet = False
self._flowcell_mode = 'SP'
self._rta_version = "v3.4.4"
self._completion_files = ("CopyComplete.txt",
"RTAComplete.txt",
"SequenceComplete.txt")
self._completion_files = get_run_completion_files("novaseq6000")
else:
raise Exception("Unrecognised platform: %s" %
self._platform)
Expand Down Expand Up @@ -1059,7 +1061,7 @@ def create(self):
io.open(self._path('Data','Intensities','BaseCalls','config.xml'),
'wb+').close()
# Run completion files (e.g. 'RTAComplete.txt' etc)
if self._completion_files:
if self._complete and self._completion_files:
for f in self._completion_files:
io.open(self._path(f),'wb+').close()

Expand Down
34 changes: 34 additions & 0 deletions bcftbx/platforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,20 @@
PLATFORMS['illumina'] = "Unknown/Illumina"
PLATFORMS['other'] = "Unknown/external"

# Expected run completion files for different platforms
RUN_COMPLETION_FILES = {
'default': ("RTAComplete.txt",),
'solid4': tuple(),
'solid5500': tuple(),
'hiseq4000': ("RTAComplete.txt",
"SequencingComplete.txt"),
'nextseq': ("CopyComplete.txt",
"RTAComplete.txt",),
'novaseq6000': ("CopyComplete.txt",
"RTAComplete.txt",
"SequenceComplete.txt"),
}

# Dictionary matching sequencing platforms to regexp patterns
# for specific instruments
SEQUENCERS = {
Expand All @@ -50,6 +64,26 @@ def list_platforms():
"""
return [x for x in PLATFORMS]

def get_run_completion_files(platform):
"""
Return a list of files indication run completion
Given a platform name, return a list of the files
that are used to indicate when the run is complete.
Arguments:
platform (str): name of the sequencing platform
(e.g. 'novaseq6000')
Returns:
Tuple: list of run completion files for the
specified platform.
"""
try:
return RUN_COMPLETION_FILES[platform]
except KeyError:
return RUN_COMPLETION_FILES['default']

def get_sequencer_platform(sequencer_name):
"""Attempt to determine platform from sequencer name
Expand Down
26 changes: 25 additions & 1 deletion bcftbx/test/test_IlluminaData.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def test_illuminarun_miseq(self):
self.assertEqual(run.bcl_extension,".bcl")
self.assertEqual(run.lanes,[1,])
self.assertEqual(run.cycles,218)
self.assertEqual(run.complete,True)

def test_illuminarun_hiseq(self):
"""
Expand Down Expand Up @@ -114,6 +115,7 @@ def test_illuminarun_hiseq(self):
self.assertEqual(run.bcl_extension,".bcl.gz")
self.assertEqual(run.lanes,[1,2,3,4,5,6,7,8])
self.assertEqual(run.cycles,218)
self.assertEqual(run.complete,True)

def test_illuminarun_nextseq(self):
"""
Expand Down Expand Up @@ -153,12 +155,13 @@ def test_illuminarun_nextseq(self):
self.assertEqual(run.bcl_extension,".bcl.bgzf")
self.assertEqual(run.lanes,[1,2,3,4])
self.assertEqual(run.cycles,158)
self.assertEqual(run.complete,True)

def test_illuminarun_novaseq(self):
"""
IlluminaRun: test for NovaSeq run
"""
# Make a mock run directory for NextSeq format
# Make a mock run directory for NovaSeq format
self.mock_illumina_run = MockIlluminaRun(
'221125_A500968_0038_ABCDE1XX',
'novaseq',
Expand Down Expand Up @@ -194,6 +197,7 @@ def test_illuminarun_novaseq(self):
self.assertEqual(run.bcl_extension,".bcl.bgzf")
self.assertEqual(run.lanes,[1,2])
self.assertEqual(run.cycles,158)
self.assertEqual(run.complete,True)

def test_illuminarun_unknown_illumina_platform(self):
"""
Expand Down Expand Up @@ -236,6 +240,7 @@ def test_illuminarun_unknown_illumina_platform(self):
self.assertEqual(run.bcl_extension,".bcl")
self.assertEqual(run.lanes,[1,])
self.assertEqual(run.cycles,218)
self.assertEqual(run.complete,True)

def test_illuminarun_unknown_illumina_platform_generic_name(self):
"""
Expand Down Expand Up @@ -281,6 +286,7 @@ def test_illuminarun_unknown_illumina_platform_generic_name(self):
self.assertEqual(run.bcl_extension,".bcl")
self.assertEqual(run.lanes,[1,])
self.assertEqual(run.cycles,218)
self.assertEqual(run.complete,True)

def test_illuminarun_miseq_specify_platform(self):
"""
Expand Down Expand Up @@ -323,6 +329,24 @@ def test_illuminarun_miseq_specify_platform(self):
self.assertEqual(run.bcl_extension,".bcl")
self.assertEqual(run.lanes,[1,])
self.assertEqual(run.cycles,218)
self.assertEqual(run.complete,True)

def test_illuminarun_incomplete(self):
"""
IlluminaRun: test 'complete' property when completion files missing
"""
# Make a mock run directory for NovaSeq format
self.mock_illumina_run = MockIlluminaRun(
'221125_A500968_0038_ABCDE1XX',
'novaseq',
complete=False,
top_dir=self.top_dir)
self.mock_illumina_run.create()
# Load into an IlluminaRun object
run = IlluminaRun(self.mock_illumina_run.dirn,
platform="novaseq")
# Check that run is not complete
self.assertEqual(run.complete,False)

def test_illuminarun_miseq_missing_directory(self):
"""
Expand Down

0 comments on commit 07d073c

Please sign in to comment.