diff --git a/bcftbx/IlluminaData.py b/bcftbx/IlluminaData.py index 4da087e..afcb9ce 100644 --- a/bcftbx/IlluminaData.py +++ b/bcftbx/IlluminaData.py @@ -1,5 +1,5 @@ # IlluminaData.py: module for handling data about Illumina sequencer runs -# Copyright (C) University of Manchester 2012-2022 Peter Briggs +# Copyright (C) University of Manchester 2012-2024 Peter Briggs # ######################################################################## # @@ -224,6 +224,26 @@ def cycles(self): return None return ncycles + @property + def complete(self): + """ + Check if run is complete + + Returns: + Boolean: True if run is complete (i.e. all appropriate + sentinel files are present), False if not (i.e. + some sentiel files are missing). + """ + # Acquire run completion files + try: + files = platforms.RUN_COMPLETION_FILES[self.platform] + except KeyError: + # Fallback to default + files = platforms.RUN_COMPLETION_FILES['default'] + # Check if all are present + return all([os.path.exists(os.path.join(self.run_dir,f)) + for f in files]) + class IlluminaRunInfo: """Class for examining Illumina RunInfo.xml file diff --git a/bcftbx/mock.py b/bcftbx/mock.py index da84884..93dd467 100644 --- a/bcftbx/mock.py +++ b/bcftbx/mock.py @@ -1,5 +1,5 @@ # mock.py: module providing mock Illumina data for testing -# Copyright (C) University of Manchester 2012-2022 Peter Briggs +# Copyright (C) University of Manchester 2012-2024 Peter Briggs # ######################################################################## @@ -42,6 +42,7 @@ from .IlluminaData import IlluminaFastq from .IlluminaData import SampleSheet from .TabFile import TabFile +from .platforms import get_run_completion_files from .utils import OrderedDictionary from .utils import mkdir @@ -754,7 +755,7 @@ class MockIlluminaRun: def __init__(self,name,platform,top_dir=None, ntiles=None,bases_mask=None, sample_sheet_content=None, - flowcell_mode=None): + flowcell_mode=None,complete=True): """ Create a new MockIlluminaRun instance @@ -777,6 +778,9 @@ def __init__(self,name,platform,top_dir=None, be used to generate a sample sheet flowcell_mode (str): optionally specify the flow cell mode to be included in the run parameters + complete (bool): default is to include the appropriate + run completion files in the mock run; set to False to + omit these files """ self._created = False self._name = name @@ -785,6 +789,7 @@ def __init__(self,name,platform,top_dir=None, else: self._top_dir = os.getcwd() self._platform = platform + self._complete = bool(complete) # Set defaults for platform if self._platform == "miniseq": # MiniSeq @@ -801,7 +806,7 @@ def __init__(self,name,platform,top_dir=None, self._include_sample_sheet = False self._flowcell_mode = None self._rta_version = "2.11.4.0" - self._completion_files = ("RTAComplete.txt",) + self._completion_files = get_run_completion_files("miniseq") elif self._platform == "miseq": # MISeq self._nlanes = 1 @@ -817,7 +822,7 @@ def __init__(self,name,platform,top_dir=None, self._include_sample_sheet = True self._flowcell_mode = None self._rta_version = "2.11.4.0" - self._completion_files = ("RTAComplete.txt",) + self._completion_files = get_run_completion_files("miseq") elif self._platform == "hiseq": # HISeq self._nlanes = 8 @@ -833,7 +838,7 @@ def __init__(self,name,platform,top_dir=None, self._include_sample_sheet = True self._flowcell_mode = None self._rta_version = "2.11.4.0" - self._completion_files = None + self._completion_files = get_run_completion_files("hiseq") elif self._platform == "nextseq": # NextSeq self._nlanes = 4 @@ -849,8 +854,7 @@ def __init__(self,name,platform,top_dir=None, self._include_sample_sheet = False self._flowcell_mode = None self._rta_version = "2.11.4.0" - self._completion_files = ("CopyComplete.txt", - "RTAComplete.txt",) + self._completion_files = get_run_completion_files("nextseq") elif self._platform == "novaseq": # NovaSeq self._nlanes = 2 @@ -866,9 +870,7 @@ def __init__(self,name,platform,top_dir=None, self._include_sample_sheet = False self._flowcell_mode = 'SP' self._rta_version = "v3.4.4" - self._completion_files = ("CopyComplete.txt", - "RTAComplete.txt", - "SequenceComplete.txt") + self._completion_files = get_run_completion_files("novaseq6000") else: raise Exception("Unrecognised platform: %s" % self._platform) @@ -1059,7 +1061,7 @@ def create(self): io.open(self._path('Data','Intensities','BaseCalls','config.xml'), 'wb+').close() # Run completion files (e.g. 'RTAComplete.txt' etc) - if self._completion_files: + if self._complete and self._completion_files: for f in self._completion_files: io.open(self._path(f),'wb+').close() diff --git a/bcftbx/platforms.py b/bcftbx/platforms.py index a3fe863..c03159c 100644 --- a/bcftbx/platforms.py +++ b/bcftbx/platforms.py @@ -31,6 +31,20 @@ PLATFORMS['illumina'] = "Unknown/Illumina" PLATFORMS['other'] = "Unknown/external" +# Expected run completion files for different platforms +RUN_COMPLETION_FILES = { + 'default': ("RTAComplete.txt",), + 'solid4': tuple(), + 'solid5500': tuple(), + 'hiseq4000': ("RTAComplete.txt", + "SequencingComplete.txt"), + 'nextseq': ("CopyComplete.txt", + "RTAComplete.txt",), + 'novaseq6000': ("CopyComplete.txt", + "RTAComplete.txt", + "SequenceComplete.txt"), +} + # Dictionary matching sequencing platforms to regexp patterns # for specific instruments SEQUENCERS = { @@ -50,6 +64,26 @@ def list_platforms(): """ return [x for x in PLATFORMS] +def get_run_completion_files(platform): + """ + Return a list of files indication run completion + + Given a platform name, return a list of the files + that are used to indicate when the run is complete. + + Arguments: + platform (str): name of the sequencing platform + (e.g. 'novaseq6000') + + Returns: + Tuple: list of run completion files for the + specified platform. + """ + try: + return RUN_COMPLETION_FILES[platform] + except KeyError: + return RUN_COMPLETION_FILES['default'] + def get_sequencer_platform(sequencer_name): """Attempt to determine platform from sequencer name diff --git a/bcftbx/test/test_IlluminaData.py b/bcftbx/test/test_IlluminaData.py index 85a8155..9f5faaa 100644 --- a/bcftbx/test/test_IlluminaData.py +++ b/bcftbx/test/test_IlluminaData.py @@ -72,6 +72,7 @@ def test_illuminarun_miseq(self): self.assertEqual(run.bcl_extension,".bcl") self.assertEqual(run.lanes,[1,]) self.assertEqual(run.cycles,218) + self.assertEqual(run.complete,True) def test_illuminarun_hiseq(self): """ @@ -114,6 +115,7 @@ def test_illuminarun_hiseq(self): self.assertEqual(run.bcl_extension,".bcl.gz") self.assertEqual(run.lanes,[1,2,3,4,5,6,7,8]) self.assertEqual(run.cycles,218) + self.assertEqual(run.complete,True) def test_illuminarun_nextseq(self): """ @@ -153,12 +155,13 @@ def test_illuminarun_nextseq(self): self.assertEqual(run.bcl_extension,".bcl.bgzf") self.assertEqual(run.lanes,[1,2,3,4]) self.assertEqual(run.cycles,158) + self.assertEqual(run.complete,True) def test_illuminarun_novaseq(self): """ IlluminaRun: test for NovaSeq run """ - # Make a mock run directory for NextSeq format + # Make a mock run directory for NovaSeq format self.mock_illumina_run = MockIlluminaRun( '221125_A500968_0038_ABCDE1XX', 'novaseq', @@ -194,6 +197,7 @@ def test_illuminarun_novaseq(self): self.assertEqual(run.bcl_extension,".bcl.bgzf") self.assertEqual(run.lanes,[1,2]) self.assertEqual(run.cycles,158) + self.assertEqual(run.complete,True) def test_illuminarun_unknown_illumina_platform(self): """ @@ -236,6 +240,7 @@ def test_illuminarun_unknown_illumina_platform(self): self.assertEqual(run.bcl_extension,".bcl") self.assertEqual(run.lanes,[1,]) self.assertEqual(run.cycles,218) + self.assertEqual(run.complete,True) def test_illuminarun_unknown_illumina_platform_generic_name(self): """ @@ -281,6 +286,7 @@ def test_illuminarun_unknown_illumina_platform_generic_name(self): self.assertEqual(run.bcl_extension,".bcl") self.assertEqual(run.lanes,[1,]) self.assertEqual(run.cycles,218) + self.assertEqual(run.complete,True) def test_illuminarun_miseq_specify_platform(self): """ @@ -323,6 +329,24 @@ def test_illuminarun_miseq_specify_platform(self): self.assertEqual(run.bcl_extension,".bcl") self.assertEqual(run.lanes,[1,]) self.assertEqual(run.cycles,218) + self.assertEqual(run.complete,True) + + def test_illuminarun_incomplete(self): + """ + IlluminaRun: test 'complete' property when completion files missing + """ + # Make a mock run directory for NovaSeq format + self.mock_illumina_run = MockIlluminaRun( + '221125_A500968_0038_ABCDE1XX', + 'novaseq', + complete=False, + top_dir=self.top_dir) + self.mock_illumina_run.create() + # Load into an IlluminaRun object + run = IlluminaRun(self.mock_illumina_run.dirn, + platform="novaseq") + # Check that run is not complete + self.assertEqual(run.complete,False) def test_illuminarun_miseq_missing_directory(self): """