Release Candidate 2.0rc5

* Now Mikado pick will use lightweight SQLite databases for inteprocess data exchange (#218). It could still be improved by allowing to remove more fragments. * Small corrections for the `daijin` pipelines. * Fix #215 * Fixing the recovery for lost loci. * Amend for #134. Now min_cds_overlap has been reduced by default to 50% (75% was probably too restrictive) * Solved a bug in `mikado compare` that led to incorrect statistics when using multiprocessing. * Needed bug fixes for Mikado serialise. * Mikado configure was embedding the scoring file within the configuration - now amended. * Fix #217
EI-CoreBioinformatics · Sep 26, 2019 · ab172f1 · ab172f1
1 parent c29a809
commit ab172f1
Show file tree

Hide file tree

Showing 39 changed files with 297,368 additions and 11,811 deletions.
diff --git a/.gitignore b/.gitignore
@@ -15,6 +15,12 @@ sample_data/uniprot*fasta*
 sample_data/chr5.fas
 sample_data/mikado_prepared.fasta
 sample_data/mikado_prepared.gtf
+sample_data/slurm*
+sample_data/Daijin
+sample_data/daijin_test
+sample_data/chr5.fas.gz.fai
+sample_data/daijin.yaml
+sample_data/daijin_exe.yaml
 Chr*
 */*~
 sample_data/*.ok

diff --git a/.travis.yml b/.travis.yml
@@ -21,11 +21,12 @@ install:
   - conda install -y -c conda-forge -- pytest-cov codecov;
   - python setup.py develop;
 script:
-  - python -c "import Mikado; Mikado.test(label='fast')";
+  - python setup.py test --addopts "-m slow Mikado/tests/test_system_calls.py::SerialiseChecker::test_subprocess_multi_empty_orfs"
   - python setup.py test --addopts " --cov Mikado --cov-config=.coveragerc -m '(slow or not slow) and not triage'";
   - cd sample_data; snakemake
   - rm -rf .snakemake/conda/;  # This causes weird bugs
   - cd ..;
+  - python -c "import Mikado; Mikado.test(label='fast')";
   # Check that the seed is set properly
   - mikado pick --procs 2 --seed 20 --fasta Mikado/tests/chr5.fas.gz  --json-conf Mikado/tests/check_seed.yaml -od 20a Mikado/tests/check_seed.gtf
   - mikado pick --procs 2 --seed 20 --fasta Mikado/tests/chr5.fas.gz  --json-conf Mikado/tests/check_seed.yaml -od 20b Mikado/tests/check_seed.gtf  

diff --git a/Mikado/configuration/configuration_blueprint.json b/Mikado/configuration/configuration_blueprint.json
@@ -346,7 +346,7 @@
               "type": "number",
               "minimum": 0,
               "maximum": 1,
-              "default": 0.75
+              "default": 0.5
             },
             "min_cdna_overlap": {
               "type": "number",
@@ -632,7 +632,7 @@
             },
             "consider_truncated_for_retained": {
               "type": "boolean",
-              "default": false
+              "default": true
             },
             "only_reference_update": {
               "type": "boolean",

diff --git a/Mikado/configuration/configurator.py b/Mikado/configuration/configurator.py
@@ -23,13 +23,30 @@
 from ..exceptions import InvalidJson, UnrecognizedRescaler
 from ..utilities import merge_dictionaries
 from ..utilities.log_utils import create_default_logger
-import sys
 import numpy
 
 
 __author__ = "Luca Venturini"
 
 
+def create_cluster_config(config, args, logger):
+    if (config["scheduler"] and config["scheduler"] != "local") or (not config["scheduler"] and args.cluster_config):
+        if not args.queue:
+            error = "A queue must be specified for the analysis when in HPC mode. Please relaunch."
+            logger.error(error)
+            exit(1)
+        if args.cluster_config is not None:
+            cluster_config = args.cluster_config
+        else:
+            cluster_config = "daijin_hpc.yaml"
+        with open(cluster_config, "wt") as out, \
+                resource_stream("Mikado", os.path.join("daijin", "hpc.yaml")) as original:
+            for pos, line in enumerate(original):
+                print(line.decode(), file=out, end="")
+                if pos == 0:
+                    print("    queue:", args.queue, file=out)
+
+
 def extend_with_default(validator_class, resolver=None, simple=False):
     """
     Function to extend the normal validation classes for jsonschema

diff --git a/Mikado/configuration/daijin_configurator.py b/Mikado/configuration/daijin_configurator.py
@@ -5,6 +5,7 @@
 import jsonschema
 from pkg_resources import resource_stream, resource_filename
 from .configurator import extend_with_default, merge_dictionaries, check_all_requirements, check_scoring, to_json
+from .configurator import create_cluster_config
 from . import print_config, check_has_requirements
 from ..exceptions import InvalidJson
 from ..utilities.log_utils import create_default_logger
@@ -214,21 +215,7 @@ def create_daijin_config(args, level="ERROR", piped=False):
         _parse_reads_from_cli(args, config, logger)
 
     config["scheduler"] = args.scheduler
-    if config["scheduler"] or args.cluster_config:
-        if not args.queue:
-            error = "A queue must be specified for the analysis when in HPC mode. Please relaunch."
-            logger.error(error)
-            exit(1)
-        if args.cluster_config is not None:
-            cluster_config = args.cluster_config
-        else:
-            cluster_config = "daijin_hpc.yaml"
-        with open(cluster_config, "wt") as out, \
-                resource_stream("Mikado", os.path.join("daijin", "hpc.yaml")) as original:
-            for pos, line in enumerate(original):
-                print(line.decode(), file=out, end="")
-                if pos == 0:
-                    print("    queue:", args.queue, file=out)
+    create_cluster_config(config, args, logger)
 
     config["threads"] = args.threads
 

diff --git a/Mikado/configuration/daijin_schema.json b/Mikado/configuration/daijin_schema.json
@@ -130,7 +130,7 @@
     "scheduler": {
       "type": ["string"],
       "default": "",
-      "enum": ["LSF", "SLURM", "PBS", ""],
+      "enum": ["LSF", "SLURM", "PBS", "", "local"],
       "Comment": "Scheduler to be used for the project. Set to null if you plan to use DRMAA or are using a local machine.",
       "required": true
     },

diff --git a/Mikado/daijin/__init__.py b/Mikado/daijin/__init__.py
@@ -139,7 +139,7 @@ def create_config_parser():
     runtime.add_argument("-o", "--out", default=sys.stdout, type=argparse.FileType("w"),
                     help="Output file. If the file name ends in \"json\", the file will be in JSON format; \
                         otherwise, Daijin will print out a YAML file. Default: STDOUT.")
-    runtime.add_argument("--scheduler", default="", choices=["", "SLURM", "LSF", "PBS"],
+    runtime.add_argument("--scheduler", default="", choices=["local", "SLURM", "LSF", "PBS"],
                         help="Scheduler to use. Default: None - ie, either execute everything on the local machine or use DRMAA to submit and control jobs (recommended).")
     runtime.add_argument("--exe", default="daijin_exe.yaml",
                          help="Configuration file for the executables.")
@@ -354,7 +354,11 @@ def assemble_transcripts_pipeline(args):
         else:
             drmaa_var = res_cmd
 
-    if drmaa_var or cluster_var:
+    if SCHEDULER == "local":
+        hpc_conf = None
+        drmaa_var = None
+        cluster_var = None
+    elif drmaa_var or cluster_var:
         if os.path.exists(args.hpc_conf):
             hpc_conf = args.hpc_conf
         else:
@@ -370,7 +374,7 @@ def assemble_transcripts_pipeline(args):
 
     if args.latency_wait is not None:
         latency = abs(args.latency_wait)
-    elif SCHEDULER != '':
+    elif SCHEDULER not in ('', "local"):
         latency = 60
     else:
         latency = 1
@@ -479,9 +483,21 @@ def mikado_pipeline(args):
     yaml.dump(doc, yaml_file)
     yaml_file.flush()
 
+    if SCHEDULER == "local":
+        hpc_conf = None
+        drmaa_var = None
+        cluster_var = None
+    elif drmaa_var or cluster_var:
+        if os.path.exists(args.hpc_conf):
+            hpc_conf = args.hpc_conf
+        else:
+            hpc_conf = system_hpc_yaml
+    else:
+        hpc_conf = None
+
     if args.latency_wait:
         latency = abs(args.latency_wait)
-    elif SCHEDULER != '':
+    elif SCHEDULER not in ('', "local"):
         latency = 60
     else:
         latency = 1