refactored __init__.py s to address import issues

cgat-developers · Oct 29, 2024 · a05824b · a05824b
1 parent 4f36da4
commit a05824b
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 155 deletions.
diff --git a/cgatcore/__init__.py b/cgatcore/__init__.py
@@ -1,5 +1,32 @@
 # cgatcore/__init__.py
-import importlib
 
-pipeline = importlib.import_module('cgatcore.pipeline')
-remote = importlib.import_module('cgatcore.remote')
+class CgatCore:
+    """Main class to encapsulate CGAT core functionality."""
+
+    def __init__(self):
+        self._pipeline = None
+        self._remote = None
+
+    @property
+    def pipeline(self):
+        """Lazy load the pipeline module."""
+        if self._pipeline is None:
+            from cgatcore import pipeline
+            self._pipeline = pipeline
+        return self._pipeline
+
+    @property
+    def remote(self):
+        """Lazy load the remote module."""
+        if self._remote is None:
+            from cgatcore import remote
+            self._remote = remote
+        return self._remote
+
+
+# Create a global instance of the CgatCore class
+cgatcore = CgatCore()
+
+# Expose the pipeline and remote attributes
+pipeline = cgatcore.pipeline
+remote = cgatcore.remote
diff --git a/cgatcore/pipeline/__init__.py b/cgatcore/pipeline/__init__.py
@@ -1,160 +1,13 @@
-'''
-pipeline.py - Tools for CGAT Ruffus Pipelines
+# cgatcore/pipeline/__init__.py
+
+"""pipeline.py - Tools for CGAT Ruffus Pipelines
 =============================================
 
 This module provides a comprehensive set of tools to facilitate the creation and management
 of data processing pipelines using CGAT Ruffus. It includes functionalities for pipeline control,
 logging, parameterization, task execution, database uploads, temporary file management, and
 integration with AWS S3.
-
-**Features:**
-
-- **Pipeline Control:** Command-line interface for executing, showing, and managing pipeline tasks.
-- **Logging:** Configures logging to files and RabbitMQ for real-time monitoring.
-- **Parameterization:** Loads and manages configuration parameters from various files.
-- **Task Execution:** Manages the execution of tasks, supporting both local and cluster environments.
-- **Database Upload:** Utilities for uploading processed data to databases.
-- **Temporary File Management:** Functions to handle temporary files and directories.
-- **AWS S3 Integration:** Support for processing files stored in AWS S3.
-
-**Example Usage:**
-
-```python
-from cgatcore import pipeline as P
-
-@P.transform("input.txt", suffix(".txt"), ".processed.txt")
-def process_data(infile, outfile):
-    # Processing logic here
-    pass
-
-if __name__ == "__main__":
-    P.main()
-
-Logging
--------
-
-Logging is set up by :func:`main`. Logging messages will be sent to
-the file :file:`pipeline.log` in the current directory.  Additionally,
-messages are sent to an RabbitMQ_ message exchange to permit
-monitoring of pipeline progress.
-
-Running tasks
--------------
-
-:mod:`pipeline` provides a :func:`pipeline.run` method to control
-running commandline tools. The :func:`pipeline.run` method takes care
-of distributing these tasks to the cluster. It takes into
-consideration command line options such as ``--cluster-queue``. The
-command line option ``--local`` will run jobs locally for testing
-purposes.
-
-For running Python code that is inside a module in a distributed
-function, use the :func:`submit` function. The :func:`execute` method
-runs a command locally.
-
-Functions such as :func:`shellquote`, :func:`getCallerLocals`,
-:func:`getCaller`, :func:`buildStatement`, :func:`expandStatement`,
-:func:`joinStatements` support the parameter interpolation mechanism
-used in :mod:`pipeline`.
-
-Parameterisation
-----------------
-
-:mod:`pipeline` provides hooks for reading pipeline configuration
-values from :file:`.ini` files and making them available inside ruffus_
-tasks. The fundamental usage is a call to :func:`getParamaters` with
-a list of configuration files, typically::
-
-    # load options from the config file
-    P.get_parameters(
-        ["%s/pipeline.yml" % os.path.splitext(__file__)[0],
-        "../pipeline.yml",
-        "pipeline.yml"])
-
-The :mod:`pipeline` module defines a global variable :data:`PARAMS`
-that provides access the configuration values. To get a handle to
-this variable outside a pipeline script, call :func:`getParams`::
-
-    my_cmd = "%(scripts_dir)s/bam2bam.py" % P.getParams()
-
-Functions such as :func:`configToDictionary`, :func:`loadParameters`
-:func:`matchParameter`, :func:`substituteParameters` support this
-functionality.
-
-Functions such as :func:`asList` and :func:`isTrue` are useful to work
-with parameters.
-
-The method :func:`peekParameters` allows one to programmatically read the
-parameters of another pipeline.
-
-Temporary files
----------------
-
-Tasks containg multiple steps often require temporary memory storage
-locations.  The functions :func:`getTempFilename`, :func:`getTempFile`
-and :func:`getTempDir` provide these. These functions are aware of the
-temporary storage locations either specified in configuration files or
-on the command line and distinguish between the ``private`` locations
-that are visible only within a particular compute node, and ``shared``
-locations that are visible between compute nodes and typically on a
-network mounted location.
-
-Requirements
-------------
-
-The methods :func:`checkExecutables`, :func:`checkScripts` and
-:func:`checkParameter` check for the presence of executables, scripts
-or parameters. These methods are useful to perform pre-run checks
-inside a pipeline if a particular requirement is met. But see also the
-``check`` commandline command.
-
-database upload
----------------
-
-To assist with uploading data into a database, :mod:`pipeline` provides
-several utility functions for conveniently uploading data. The :func:`load`
-method uploads data in a tab-separated file::
-
-    @P.transform("*.tsv.gz", suffix(".tsv.gz"), ".load")
-    def loadData(infile, outfile):
-        P.load(infile, outfile)
-
-The methods :func:`mergeAndLoad` and :func:`concatenateAndLoad` upload
-multiple files into same database by combining them first. The method
-:func:`createView` creates a table or view derived from other tables
-in the database. The function :func:`importFromIterator` uploads
-data from a python list or other iterable directly.
-
-The functions :func:`tablequote` and :func:`toTable` translate track
-names derived from filenames into names that are suitable for tables.
-
-The method :func:`build_load_statement` can be used to create an
-upload command that can be added to command line statements to
-directly upload data without storing an intermediate file.
-
-The method :func:`connect` returns a database handle for querying the
-database.
-
-Package layout
---------------
-
-The module is arranged as a python package with several submodules. Functions
-within a submodule to be exported are all imported to the namespace of
-:mod:`pipeline`.
-
-.. toctree::
-
-   cgatcore.pipeline.control
-   cgatcore.pipeline.database
-   cgatcore.pipeline.execution
-   cgatcore.pipeline.files
-   cgatcore.pipeline.parameters
-   cgatcore.pipeline.utils
-
-
-'''
-# cgatcore/pipeline/__init__.py
-
+"""
 
 # Import existing pipeline functionality
 from cgatcore.pipeline.control import *
@@ -200,6 +53,7 @@ def loadData(infile, outfile):
     'S3Pipeline', 'S3Mapper', 's3_path_to_local', 'suffix',
     's3_mapper', 'configure_s3'
 ]
+
 # Add a docstring for the module
 __doc__ = """
 This module provides pipeline functionality for cgat-core, including support for AWS S3.
@@ -226,4 +80,3 @@ def process_s3_file(infile, outfile):
 # Configure S3 credentials if needed
 P.configure_s3(aws_access_key_id="YOUR_KEY", aws_secret_access_key="YOUR_SECRET")
 """
-