From fe99f74bc11986e81c8219744607e8229d624c90 Mon Sep 17 00:00:00 2001
From: Peter Briggs <peter.briggs@manchester.ac.uk>
Date: Fri, 6 Dec 2024 16:23:45 +0000
Subject: [PATCH] bcftbx: refactor 'Experiment' module to use code from
 'platforms.solid.experiment'.

---
 bcftbx/Experiment.py | 415 +++++--------------------------------------
 1 file changed, 41 insertions(+), 374 deletions(-)

diff --git a/bcftbx/Experiment.py b/bcftbx/Experiment.py
index a7d03ba0..ea9ea4ba 100644
--- a/bcftbx/Experiment.py
+++ b/bcftbx/Experiment.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 #
 #     Experiment.py: classes for defining SOLiD sequencing experiments
-#     Copyright (C) University of Manchester 2011-2019,2021 Peter Briggs
+#     Copyright (C) University of Manchester 2011-2024 Peter Briggs
 #
 ########################################################################
 #
@@ -9,420 +9,87 @@
 #
 #########################################################################
 
-"""Experiment.py
+"""
+Legacy module for handling SOLiD experiments (groups of primary data
+sets from a SOLiD sequencing run).
+
+The functionality of the module has been moved to the
+'platforms.solid.experiment' module, which supersedes this one. This
+module is now deprecated and will be removed in a future release.
+
+The legacy classes have been reimplemented as wrappers to the classes
+in the newer module, to preserve backwards compatibility.
 
-The Experiment module provides two classes: the Experiment class defines
-a single experiment (essentially a collection of one or more related
-primary data sets) from a SOLiD run; the ExperimentList class is a
-collection of experiments which are typically part of the same SOLiD run.
 """
 
 #######################################################################
 # Import modules that this module depends on
 #######################################################################
 
-import os
-import logging
 from .SolidData import SolidRun
-from .SolidData import is_paired_end
-from .utils import mkdir
-from .utils import mklink
+from .platforms.solid import experiment as expt
 
 #######################################################################
 # Class definitions
 #######################################################################
 
-class Experiment:
+class Experiment(expt.Experiment):
     """Class defining an experiment from a SOLiD run.
 
     An 'experiment' is a collection of related data.
     """
     def __init__(self):
-        """Create a new Experiment instance.
-        """
-        self.name = None
-        self.type = None
-        self.sample = None
-        self.library = None
+        expt.Experiment.__init__(self)
 
-    def dirname(self,top_dir=None):
-        """Return directory name for experiment
 
-        The directory name is the supplied name plus the experiment
-        type joined by an underscore, unless no type was specified (in
-        which case it is just the experiment name).
-
-        If top_dir is also supplied then this will be prepended to the
-        returned directory name.
-        """
-        if self.type:
-            dirname = '_'.join((self.name,self.type))
-        else:
-            dirname = self.name
-        if top_dir:
-            return os.path.join(top_dir,dirname)
-        else:
-            return dirname
-
-    def describe(self):
-        """Describe the experiment as a set of command line options
-        """
-        options = ["--name=%s" % self.name]
-        if self.type:
-            options.append("--type=%s" % self.type)
-        if self.sample:
-            sample = self.sample
-        else:
-            sample = '*'
-        if self.library:
-            library = self.library
-        else:
-            library = '*'
-        options.append("--source=%s/%s" % (sample,library))
-        return ' '.join(options)
-
-    def copy(self):
-        """Return a new Experiment instance which is a copy of this one.
-        """
-        expt_copy = Experiment()
-        expt_copy.name = self.name
-        expt_copy.type = self.type
-        expt_copy.sample = self.sample
-        expt_copy.library = self.library
-        return expt_copy
-
-class ExperimentList:
-    """Container for a collection of Experiments
-
-    Experiments are created and added to the ExperimentList by calling
-    the addExperiment method, which returns a new Experiment object.
-
-    The calling subprogram then populates the Experiment properties as
-    appropriate.
-
-    Once all Experiments are defined the analysis directory can be
-    constructed by calling the buildAnalysisDirs method, which creates
-    directories and symbolic links to primary data according to the
-    definition of each experiment.
+class ExperimentList(expt.ExperimentList):
+    """
+    Container for a collection of Experiments
     """
-
     def __init__(self,solid_run_dir=None):
-        """Create a new ExperimentList instance.
-
-        Arguments:
-          solid_run_dir: (optional) the path of the source SOLiD run
-            directory.
-        """
-        self.experiments = []
-        self.solid_run_dir = solid_run_dir
-        self.solid_runs = []
-        self.__getSolidRunData()
-
-    def __getSolidRunData(self):
-        """Get data about SOLiD runs
-
-        Internal function to construct SolidRun objects based on the
-        supplied SOLiD run directory.
-        """
-        if self.solid_run_dir is not None:
-            logging.debug("Acquiring run information")
-            for solid_dir in (self.solid_run_dir,self.solid_run_dir+"_2"):
-                logging.debug("Examining %s" % solid_dir)
-                run = SolidRun(solid_dir)
-                if not run:
-                    logging.debug("Unable to get run data for %s" % solid_dir)
-                else:
-                    self.solid_runs.append(run)
-            if len(self.solid_runs) == 0:
-                logging.warning("No run data found")
+        expt.ExperimentList.__init__(self,
+                                     solid_run_dir=solid_run_dir,
+                                     classes=dict(RunDir=SolidRun,
+                                                  Experiment=Experiment,
+                                                  LinkNames=LinkNames))
 
     def addExperiment(self,name):
-        """Create a new Experiment and add to the list
-
-        Arguments:
-          name: the name of the new experiment
-
-        Returns:
-          New Experiment object with name already set
         """
-        new_expt = Experiment()
-        new_expt.name = name
-        self.experiments.append(new_expt)
-        return new_expt
+        Create a new Experiment and add to the list
+        """
+        return expt.ExperimentList.add_experiment(self, name)
 
     def addDuplicateExperiment(self,expt):
-        """Duplicate an existing Experiment and add to the list
-
-        Arguments:
-          expt: an existing Experiment object
-
-        Returns:
-          New Experiment object with the same data as the input
         """
-        new_expt = expt.copy()
-        self.experiments.append(new_expt)
-        return new_expt
+        Duplicate an existing Experiment and add to the list
+        """
+        return expt.ExperimentList.add_duplicate_experiment(self, expt)
 
     def getLastExperiment(self):
-        """Return the last Experiment added to the list
         """
-        try:
-            return self.experiments[-1]
-        except IndexError:
-            return None
+        Return the last Experiment added to the list
+        """
+        return expt.ExperimentList.get_last_experiment(self)
 
     def buildAnalysisDirs(self,top_dir=None,dry_run=False,link_type="relative",
                           naming_scheme="partial"):
-        """Construct and populate analysis directories for the experiments
-
-        For each defined experiment, create the required analysis directories
-        and populate with links to the primary data files.
-
-        Arguments:
-          top_dir: if set then create the analysis directories as
-            subdirs of the specified directory; otherwise operate in cwd
-          dry_run: if True then only report the mkdir, ln etc operations that
-            would be performed. Default is False (do perform the operations).
-          link_type: type of link to use when linking to primary data, one of
-            'relative' or 'absolute'.
-          naming_scheme: naming scheme to use for links to primary data, one of
-            'full' (same names as primary data files), 'partial' (cut-down version
-            of the full name which excludes sample names - the default), or
-            'minimal' (just the library name).
         """
-        # Deal with top_dir
-        if top_dir:
-            if os.path.exists(top_dir):
-                print("Directory %s already exists" % top_dir)
-            else:
-                if not dry_run:
-                    # Create top directory
-                    print("Creating %s" % top_dir)
-                    mkdir(top_dir,mode=0o775)
-                else:
-                    # Report what would have been done
-                    print("mkdir %s" % top_dir)
-        # Type of link
-        if link_type == 'absolute':
-            use_relative_links = False
-        else:
-            use_relative_links = True
-        # For each experiment, make and populate directory
-        for expt in self.experiments:
-            print("Experiment: %s %s %s/%s" % (expt.name,
-                                               expt.type,
-                                               expt.sample,
-                                               expt.library))
-            expt_dir = expt.dirname(top_dir)
-            print("\tDir: %s" % expt_dir)
-            # Make directory
-            if os.path.exists(expt_dir):
-                logging.warning("Directory %s already exists" % expt_dir)
-            else:
-                if not dry_run:
-                    # Create directory
-                    mkdir(expt_dir,mode=0o775)
-                else:
-                    # Report what would have been done
-                    print("mkdir %s" % expt_dir)
-            # Locate the primary data
-            for run in self.solid_runs:
-                paired_end = is_paired_end(run)
-                libraries = run.fetchLibraries(expt.sample,expt.library)
-                for library in libraries:
-                    # Get names for links to primary data - F3
-                    ln_csfasta,ln_qual = LinkNames(naming_scheme).names(library)
-                    print("\t\t%s" % ln_csfasta)
-                    print("\t\t%s" % ln_qual)
-                    # Make links to primary data
-                    try:
-                        self.__linkToFile(library.csfasta,os.path.join(expt_dir,ln_csfasta),
-                                          relative=use_relative_links,dry_run=dry_run)
-                        self.__linkToFile(library.qual,os.path.join(expt_dir,ln_qual),
-                                          relative=use_relative_links,dry_run=dry_run)
-                    except Exception as ex:
-                        logging.error("Failed to link to some or all F3 primary data")
-                        logging.error("Exception: %s" % ex)
-                    # Get names for links to F5 reads (if paired-end run)
-                    if paired_end:
-                        ln_csfasta,ln_qual = LinkNames(naming_scheme).names(library,F5=True)
-                        print("\t\t%s" % ln_csfasta)
-                        print("\t\t%s" % ln_qual)
-                        # Make links to F5 read data
-                        try:
-                            self.__linkToFile(library.csfasta_f5,os.path.join(expt_dir,ln_csfasta),
-                                              relative=use_relative_links,dry_run=dry_run)
-                            self.__linkToFile(library.qual_f5,os.path.join(expt_dir,ln_qual),
-                                              relative=use_relative_links,dry_run=dry_run)
-                        except Exception as ex:
-                            logging.error("Failed to link to some or all F5 primary data")
-                            logging.error("Exception: %s" % ex)
-            # Make an empty ScriptCode directory
-            scriptcode_dir = os.path.join(expt_dir,"ScriptCode")
-            if os.path.exists(scriptcode_dir):
-                logging.warning("Directory %s already exists" % scriptcode_dir)
-            else:
-                if not dry_run:
-                    # Create directory
-                    mkdir(scriptcode_dir,mode=0o775)
-                else:
-                    # Report what would have been done
-                    print("mkdir %s" % scriptcode_dir)
-    
-    def __linkToFile(self,source,target,relative=True,dry_run=False):
-        """Create symbolic link to a file
-        
-        Internal function to make symbolic links to primary data. Checks that the
-        target links don't already exist, or if they do that the current source
-        file is the same as that specified in the method call.
-
-        Arguments:
-          source: the file to be linked to
-          target: the name of the link pointing to source
-          relative: if True then make a relative link (if possible); otherwise
-            link to the target as given (default)
-          dry_run: if True then only report the actions that would be performed
-            (default is False, perform the actions)
+        Construct and populate analysis directories for the experiments
         """
-        # Check if target file already exists
-        if os.path.exists(target):
-            logging.warning("Target file %s already exists" % target)
-            # Test if the sources match
-            if os.readlink(target) != source:
-                logging.error("Different sources for %s" % target)
-            return
-        if not dry_run:
-            # Make symbolic links
-            mklink(source,target,relative=relative)
-        else:
-            # Report what would have been done
-            print("ln -s %s %s" % (source,target))
+        return expt.ExperimentList.build_analysis_dirs(
+            self,
+            top_dir=top_dir,
+            dry_run=dry_run,
+            link_type=link_type,
+            naming_scheme=naming_scheme)
 
-    def __getitem__(self,key):
-        return self.experiments[key]
-
-    def __len__(self):
-        return len(self.experiments)
 
 class LinkNames:
-    """Class to construct names for links to primary data files
-
-    The LinkNames class encodes a set of naming schemes that are used to
-    construct names for the links in the analysis directories that point
-    to the primary CFASTA and QUAL data files.
-
-    The schemes are:
-
-      full:    link name is the same as the source file, e.g.
-               solid0123_20111014_FRAG_BC_AB_CD_EF_pool_F3_CD_PQ5.csfasta
-
-      partial: link name consists of the instrument name, datestamp and
-               library name, e.g.
-               solid0123_20111014_CD_PQ5.csfasta
-
-      minimal: link name consists of just the library name, e.g.
-               CD_PQ5.csfasta
-
-    For paired-end data, the 'partial' and 'minimal' names have '_F3' and
-    '_F5' appended as appropriate (full names already have this distinction).
-
-    Example usage:
-
-    To get the link names using the minimal scheme for the F3 reads ('library'
-    is a SolidLibrary object):
-
-    >>> csfasta_lnk,qual_lnk = LinkNames('minimal').names(library)
-
-    To get names for the F5 reads using the partial scheme:
-
-    >>> csfasta_lnk,qual_lnk = LinkNames('partial').names(library,F5=True)
     """
-    
+    Class to construct names for links to primary data files
+    """
     def __init__(self,scheme):
-        """Create a new LinkNames instance
-
-        Argments:
-          scheme: naming scheme, one of 'full', 'partial' or
-            'minimal'
-        """
-        # Default
-        self.__names = self.__full_names
-        # Assign according to requested scheme
-        if scheme == "minimal":
-            self.__names = self.__minimal_names
-        elif scheme == "partial":
-            self.__names = self.__partial_names
-        elif scheme == "full":
-            self.__names = self.__full_names
-
-    def names(self,library,F5=False):
-        """Get names for links to the primary data in a library
-
-        Returns a tuple of link names:
-
-        (csfasta_link_name,qual_link_name)
-
-        derived from the data in the library plus the naming scheme
-        specified when the LinkNames object was created.
-        
-        Arguments:
-          library: SolidLibrary object
-          F5: if True then indicates that names should be returned
-            for linking to the F5 reads (default is F3 reads)
-        """
-        return self.__names(library,F5)
-
-    def __minimal_names(self,library,F5):
-        """Internal: link names based on 'minimal' naming scheme
-        """
-        # Alternative naming schemes for primary data for links
-        run = library.parent_sample.parent_run
-        if not is_paired_end(run):
-            # Library names alone
-            return ("%s.csfasta" % library.name,
-                    "%s.qual" % library.name)
-        else:
-            # Add F3/F5 to distinguish the samples
-            if not F5:
-                return ("%s_F3.csfasta" % library.name,
-                        "%s_F3.qual" % library.name)
-            else:
-                return ("%s_F5.csfasta" % library.name,
-                        "%s_F5.qual" % library.name)
-
-    def __partial_names(self,library,F5):
-        """Internal: link names based on 'partial' naming scheme
-        """
-        run = library.parent_sample.parent_run
-        name = '_'.join([run.run_info.instrument,
-                         run.run_info.datestamp,
-                         library.name])
-        if not is_paired_end(run):
-            return ("%s.csfasta" % name,
-                    "%s_QV.qual" % name)
-        else:
-            # Add F3/F5 to distinguish the samples
-            if not F5:
-                return ("%s_F3.csfasta" % name,
-                        "%s_F3_QV.qual" % name)
-            else:
-                return ("%s_F5.csfasta" % name,
-                        "%s_F5_QV.qual" % name)
-
-    def __full_names(self,library,F5):
-        """Internal: link names based on 'full' naming scheme
-        """
-        run = library.parent_sample.parent_run
-        if not is_paired_end(run):
-            return (os.path.basename(library.csfasta),
-                    os.path.basename(library.qual))
-        else:
-            if not F5:
-                return (os.path.basename(library.csfasta),
-                        os.path.basename(library.qual))
-            else:
-                return (os.path.basename(library.csfasta_f5),
-                        os.path.basename(library.qual_f5))
+        expt.LinkNames.__init__(self, scheme)
 
 #######################################################################
 # Module functions