Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update 'platforms' module to include info on run completion files #217

Merged
merged 4 commits into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion bcftbx/IlluminaData.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# IlluminaData.py: module for handling data about Illumina sequencer runs
# Copyright (C) University of Manchester 2012-2022 Peter Briggs
# Copyright (C) University of Manchester 2012-2024 Peter Briggs
#
########################################################################
#
Expand Down Expand Up @@ -224,6 +224,26 @@ def cycles(self):
return None
return ncycles

@property
def complete(self):
"""
Check if run is complete

Returns:
Boolean: True if run is complete (i.e. all appropriate
sentinel files are present), False if not (i.e.
some sentiel files are missing).
"""
# Acquire run completion files
try:
files = platforms.RUN_COMPLETION_FILES[self.platform]
except KeyError:
# Fallback to default
files = platforms.RUN_COMPLETION_FILES['default']
# Check if all are present
return all([os.path.exists(os.path.join(self.run_dir,f))
for f in files])

class IlluminaRunInfo:
"""Class for examining Illumina RunInfo.xml file

Expand Down
24 changes: 13 additions & 11 deletions bcftbx/mock.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# mock.py: module providing mock Illumina data for testing
# Copyright (C) University of Manchester 2012-2022 Peter Briggs
# Copyright (C) University of Manchester 2012-2024 Peter Briggs
#
########################################################################

Expand Down Expand Up @@ -42,6 +42,7 @@
from .IlluminaData import IlluminaFastq
from .IlluminaData import SampleSheet
from .TabFile import TabFile
from .platforms import get_run_completion_files
from .utils import OrderedDictionary
from .utils import mkdir

Expand Down Expand Up @@ -754,7 +755,7 @@ class MockIlluminaRun:
def __init__(self,name,platform,top_dir=None,
ntiles=None,bases_mask=None,
sample_sheet_content=None,
flowcell_mode=None):
flowcell_mode=None,complete=True):
"""
Create a new MockIlluminaRun instance

Expand All @@ -777,6 +778,9 @@ def __init__(self,name,platform,top_dir=None,
be used to generate a sample sheet
flowcell_mode (str): optionally specify the flow cell
mode to be included in the run parameters
complete (bool): default is to include the appropriate
run completion files in the mock run; set to False to
omit these files
"""
self._created = False
self._name = name
Expand All @@ -785,6 +789,7 @@ def __init__(self,name,platform,top_dir=None,
else:
self._top_dir = os.getcwd()
self._platform = platform
self._complete = bool(complete)
# Set defaults for platform
if self._platform == "miniseq":
# MiniSeq
Expand All @@ -801,7 +806,7 @@ def __init__(self,name,platform,top_dir=None,
self._include_sample_sheet = False
self._flowcell_mode = None
self._rta_version = "2.11.4.0"
self._completion_files = ("RTAComplete.txt",)
self._completion_files = get_run_completion_files("miniseq")
elif self._platform == "miseq":
# MISeq
self._nlanes = 1
Expand All @@ -817,7 +822,7 @@ def __init__(self,name,platform,top_dir=None,
self._include_sample_sheet = True
self._flowcell_mode = None
self._rta_version = "2.11.4.0"
self._completion_files = ("RTAComplete.txt",)
self._completion_files = get_run_completion_files("miseq")
elif self._platform == "hiseq":
# HISeq
self._nlanes = 8
Expand All @@ -833,7 +838,7 @@ def __init__(self,name,platform,top_dir=None,
self._include_sample_sheet = True
self._flowcell_mode = None
self._rta_version = "2.11.4.0"
self._completion_files = None
self._completion_files = get_run_completion_files("hiseq")
elif self._platform == "nextseq":
# NextSeq
self._nlanes = 4
Expand All @@ -849,8 +854,7 @@ def __init__(self,name,platform,top_dir=None,
self._include_sample_sheet = False
self._flowcell_mode = None
self._rta_version = "2.11.4.0"
self._completion_files = ("CopyComplete.txt",
"RTAComplete.txt",)
self._completion_files = get_run_completion_files("nextseq")
elif self._platform == "novaseq":
# NovaSeq
self._nlanes = 2
Expand All @@ -866,9 +870,7 @@ def __init__(self,name,platform,top_dir=None,
self._include_sample_sheet = False
self._flowcell_mode = 'SP'
self._rta_version = "v3.4.4"
self._completion_files = ("CopyComplete.txt",
"RTAComplete.txt",
"SequenceComplete.txt")
self._completion_files = get_run_completion_files("novaseq6000")
else:
raise Exception("Unrecognised platform: %s" %
self._platform)
Expand Down Expand Up @@ -1059,7 +1061,7 @@ def create(self):
io.open(self._path('Data','Intensities','BaseCalls','config.xml'),
'wb+').close()
# Run completion files (e.g. 'RTAComplete.txt' etc)
if self._completion_files:
if self._complete and self._completion_files:
for f in self._completion_files:
io.open(self._path(f),'wb+').close()

Expand Down
34 changes: 34 additions & 0 deletions bcftbx/platforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,20 @@
PLATFORMS['illumina'] = "Unknown/Illumina"
PLATFORMS['other'] = "Unknown/external"

# Expected run completion files for different platforms
RUN_COMPLETION_FILES = {
'default': ("RTAComplete.txt",),
'solid4': tuple(),
'solid5500': tuple(),
'hiseq4000': ("RTAComplete.txt",
"SequencingComplete.txt"),
'nextseq': ("CopyComplete.txt",
"RTAComplete.txt",),
'novaseq6000': ("CopyComplete.txt",
"RTAComplete.txt",
"SequenceComplete.txt"),
}

# Dictionary matching sequencing platforms to regexp patterns
# for specific instruments
SEQUENCERS = {
Expand All @@ -50,6 +64,26 @@ def list_platforms():
"""
return [x for x in PLATFORMS]

def get_run_completion_files(platform):
"""
Return a list of files indication run completion

Given a platform name, return a list of the files
that are used to indicate when the run is complete.

Arguments:
platform (str): name of the sequencing platform
(e.g. 'novaseq6000')

Returns:
Tuple: list of run completion files for the
specified platform.
"""
try:
return RUN_COMPLETION_FILES[platform]
except KeyError:
return RUN_COMPLETION_FILES['default']

def get_sequencer_platform(sequencer_name):
"""Attempt to determine platform from sequencer name

Expand Down
26 changes: 25 additions & 1 deletion bcftbx/test/test_IlluminaData.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def test_illuminarun_miseq(self):
self.assertEqual(run.bcl_extension,".bcl")
self.assertEqual(run.lanes,[1,])
self.assertEqual(run.cycles,218)
self.assertEqual(run.complete,True)

def test_illuminarun_hiseq(self):
"""
Expand Down Expand Up @@ -114,6 +115,7 @@ def test_illuminarun_hiseq(self):
self.assertEqual(run.bcl_extension,".bcl.gz")
self.assertEqual(run.lanes,[1,2,3,4,5,6,7,8])
self.assertEqual(run.cycles,218)
self.assertEqual(run.complete,True)

def test_illuminarun_nextseq(self):
"""
Expand Down Expand Up @@ -153,12 +155,13 @@ def test_illuminarun_nextseq(self):
self.assertEqual(run.bcl_extension,".bcl.bgzf")
self.assertEqual(run.lanes,[1,2,3,4])
self.assertEqual(run.cycles,158)
self.assertEqual(run.complete,True)

def test_illuminarun_novaseq(self):
"""
IlluminaRun: test for NovaSeq run
"""
# Make a mock run directory for NextSeq format
# Make a mock run directory for NovaSeq format
self.mock_illumina_run = MockIlluminaRun(
'221125_A500968_0038_ABCDE1XX',
'novaseq',
Expand Down Expand Up @@ -194,6 +197,7 @@ def test_illuminarun_novaseq(self):
self.assertEqual(run.bcl_extension,".bcl.bgzf")
self.assertEqual(run.lanes,[1,2])
self.assertEqual(run.cycles,158)
self.assertEqual(run.complete,True)

def test_illuminarun_unknown_illumina_platform(self):
"""
Expand Down Expand Up @@ -236,6 +240,7 @@ def test_illuminarun_unknown_illumina_platform(self):
self.assertEqual(run.bcl_extension,".bcl")
self.assertEqual(run.lanes,[1,])
self.assertEqual(run.cycles,218)
self.assertEqual(run.complete,True)

def test_illuminarun_unknown_illumina_platform_generic_name(self):
"""
Expand Down Expand Up @@ -281,6 +286,7 @@ def test_illuminarun_unknown_illumina_platform_generic_name(self):
self.assertEqual(run.bcl_extension,".bcl")
self.assertEqual(run.lanes,[1,])
self.assertEqual(run.cycles,218)
self.assertEqual(run.complete,True)

def test_illuminarun_miseq_specify_platform(self):
"""
Expand Down Expand Up @@ -323,6 +329,24 @@ def test_illuminarun_miseq_specify_platform(self):
self.assertEqual(run.bcl_extension,".bcl")
self.assertEqual(run.lanes,[1,])
self.assertEqual(run.cycles,218)
self.assertEqual(run.complete,True)

def test_illuminarun_incomplete(self):
"""
IlluminaRun: test 'complete' property when completion files missing
"""
# Make a mock run directory for NovaSeq format
self.mock_illumina_run = MockIlluminaRun(
'221125_A500968_0038_ABCDE1XX',
'novaseq',
complete=False,
top_dir=self.top_dir)
self.mock_illumina_run.create()
# Load into an IlluminaRun object
run = IlluminaRun(self.mock_illumina_run.dirn,
platform="novaseq")
# Check that run is not complete
self.assertEqual(run.complete,False)

def test_illuminarun_miseq_missing_directory(self):
"""
Expand Down
Loading