Skip to content

Commit

Permalink
Release Candidate 2.0rc5
Browse files Browse the repository at this point in the history
* Now Mikado pick will use lightweight SQLite databases for inteprocess data exchange (#218). It could still be improved by allowing to remove more fragments.
* Small corrections for the `daijin` pipelines.
* Fix #215 
* Fixing the recovery for lost loci.
* Amend for #134. Now min_cds_overlap has been reduced by default to 50% (75% was probably too restrictive)
* Solved a bug in `mikado compare` that led to incorrect statistics when using multiprocessing.
* Needed bug fixes for Mikado serialise.
* Mikado configure was embedding the scoring file within the configuration - now amended.
* Fix #217
  • Loading branch information
lucventurini authored Sep 26, 2019
1 parent c29a809 commit ab172f1
Show file tree
Hide file tree
Showing 39 changed files with 297,368 additions and 11,811 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ sample_data/uniprot*fasta*
sample_data/chr5.fas
sample_data/mikado_prepared.fasta
sample_data/mikado_prepared.gtf
sample_data/slurm*
sample_data/Daijin
sample_data/daijin_test
sample_data/chr5.fas.gz.fai
sample_data/daijin.yaml
sample_data/daijin_exe.yaml
Chr*
*/*~
sample_data/*.ok
Expand Down
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@ install:
- conda install -y -c conda-forge -- pytest-cov codecov;
- python setup.py develop;
script:
- python -c "import Mikado; Mikado.test(label='fast')";
- python setup.py test --addopts "-m slow Mikado/tests/test_system_calls.py::SerialiseChecker::test_subprocess_multi_empty_orfs"
- python setup.py test --addopts " --cov Mikado --cov-config=.coveragerc -m '(slow or not slow) and not triage'";
- cd sample_data; snakemake
- rm -rf .snakemake/conda/; # This causes weird bugs
- cd ..;
- python -c "import Mikado; Mikado.test(label='fast')";
# Check that the seed is set properly
- mikado pick --procs 2 --seed 20 --fasta Mikado/tests/chr5.fas.gz --json-conf Mikado/tests/check_seed.yaml -od 20a Mikado/tests/check_seed.gtf
- mikado pick --procs 2 --seed 20 --fasta Mikado/tests/chr5.fas.gz --json-conf Mikado/tests/check_seed.yaml -od 20b Mikado/tests/check_seed.gtf
Expand Down
4 changes: 2 additions & 2 deletions Mikado/configuration/configuration_blueprint.json
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@
"type": "number",
"minimum": 0,
"maximum": 1,
"default": 0.75
"default": 0.5
},
"min_cdna_overlap": {
"type": "number",
Expand Down Expand Up @@ -632,7 +632,7 @@
},
"consider_truncated_for_retained": {
"type": "boolean",
"default": false
"default": true
},
"only_reference_update": {
"type": "boolean",
Expand Down
19 changes: 18 additions & 1 deletion Mikado/configuration/configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,30 @@
from ..exceptions import InvalidJson, UnrecognizedRescaler
from ..utilities import merge_dictionaries
from ..utilities.log_utils import create_default_logger
import sys
import numpy


__author__ = "Luca Venturini"


def create_cluster_config(config, args, logger):
if (config["scheduler"] and config["scheduler"] != "local") or (not config["scheduler"] and args.cluster_config):
if not args.queue:
error = "A queue must be specified for the analysis when in HPC mode. Please relaunch."
logger.error(error)
exit(1)
if args.cluster_config is not None:
cluster_config = args.cluster_config
else:
cluster_config = "daijin_hpc.yaml"
with open(cluster_config, "wt") as out, \
resource_stream("Mikado", os.path.join("daijin", "hpc.yaml")) as original:
for pos, line in enumerate(original):
print(line.decode(), file=out, end="")
if pos == 0:
print(" queue:", args.queue, file=out)


def extend_with_default(validator_class, resolver=None, simple=False):
"""
Function to extend the normal validation classes for jsonschema
Expand Down
17 changes: 2 additions & 15 deletions Mikado/configuration/daijin_configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import jsonschema
from pkg_resources import resource_stream, resource_filename
from .configurator import extend_with_default, merge_dictionaries, check_all_requirements, check_scoring, to_json
from .configurator import create_cluster_config
from . import print_config, check_has_requirements
from ..exceptions import InvalidJson
from ..utilities.log_utils import create_default_logger
Expand Down Expand Up @@ -214,21 +215,7 @@ def create_daijin_config(args, level="ERROR", piped=False):
_parse_reads_from_cli(args, config, logger)

config["scheduler"] = args.scheduler
if config["scheduler"] or args.cluster_config:
if not args.queue:
error = "A queue must be specified for the analysis when in HPC mode. Please relaunch."
logger.error(error)
exit(1)
if args.cluster_config is not None:
cluster_config = args.cluster_config
else:
cluster_config = "daijin_hpc.yaml"
with open(cluster_config, "wt") as out, \
resource_stream("Mikado", os.path.join("daijin", "hpc.yaml")) as original:
for pos, line in enumerate(original):
print(line.decode(), file=out, end="")
if pos == 0:
print(" queue:", args.queue, file=out)
create_cluster_config(config, args, logger)

config["threads"] = args.threads

Expand Down
2 changes: 1 addition & 1 deletion Mikado/configuration/daijin_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@
"scheduler": {
"type": ["string"],
"default": "",
"enum": ["LSF", "SLURM", "PBS", ""],
"enum": ["LSF", "SLURM", "PBS", "", "local"],
"Comment": "Scheduler to be used for the project. Set to null if you plan to use DRMAA or are using a local machine.",
"required": true
},
Expand Down
24 changes: 20 additions & 4 deletions Mikado/daijin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def create_config_parser():
runtime.add_argument("-o", "--out", default=sys.stdout, type=argparse.FileType("w"),
help="Output file. If the file name ends in \"json\", the file will be in JSON format; \
otherwise, Daijin will print out a YAML file. Default: STDOUT.")
runtime.add_argument("--scheduler", default="", choices=["", "SLURM", "LSF", "PBS"],
runtime.add_argument("--scheduler", default="", choices=["local", "SLURM", "LSF", "PBS"],
help="Scheduler to use. Default: None - ie, either execute everything on the local machine or use DRMAA to submit and control jobs (recommended).")
runtime.add_argument("--exe", default="daijin_exe.yaml",
help="Configuration file for the executables.")
Expand Down Expand Up @@ -354,7 +354,11 @@ def assemble_transcripts_pipeline(args):
else:
drmaa_var = res_cmd

if drmaa_var or cluster_var:
if SCHEDULER == "local":
hpc_conf = None
drmaa_var = None
cluster_var = None
elif drmaa_var or cluster_var:
if os.path.exists(args.hpc_conf):
hpc_conf = args.hpc_conf
else:
Expand All @@ -370,7 +374,7 @@ def assemble_transcripts_pipeline(args):

if args.latency_wait is not None:
latency = abs(args.latency_wait)
elif SCHEDULER != '':
elif SCHEDULER not in ('', "local"):
latency = 60
else:
latency = 1
Expand Down Expand Up @@ -479,9 +483,21 @@ def mikado_pipeline(args):
yaml.dump(doc, yaml_file)
yaml_file.flush()

if SCHEDULER == "local":
hpc_conf = None
drmaa_var = None
cluster_var = None
elif drmaa_var or cluster_var:
if os.path.exists(args.hpc_conf):
hpc_conf = args.hpc_conf
else:
hpc_conf = system_hpc_yaml
else:
hpc_conf = None

if args.latency_wait:
latency = abs(args.latency_wait)
elif SCHEDULER != '':
elif SCHEDULER not in ('', "local"):
latency = 60
else:
latency = 1
Expand Down
Loading

0 comments on commit ab172f1

Please sign in to comment.