From 1fdb1fc95570a11dc3935fa7b7ddd149df907601 Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Tue, 23 Apr 2019 11:35:29 +0200 Subject: [PATCH 01/10] niceparse --- kipoi_veff/cli.py | 108 ++++-------------- .../example_files/variants.vcf.gz | Bin 209 -> 209 bytes .../example_files/variants.vcf.gz.tbi | Bin 204 -> 204 bytes .../models/rbp/example_files/variants.vcf.gz | Bin 216 -> 202 bytes .../rbp/example_files/variants.vcf.gz.tbi | Bin 207 -> 204 bytes .../example_files/variants.vcf.gz | Bin 209 -> 209 bytes .../example_files/variants.vcf.gz.tbi | Bin 204 -> 204 bytes tests/test_cli_mutation_map.py | 2 +- tests/test_cli_score_variants.py | 61 +++++----- 9 files changed, 58 insertions(+), 113 deletions(-) diff --git a/kipoi_veff/cli.py b/kipoi_veff/cli.py index 107fe8b..b3cc508 100644 --- a/kipoi_veff/cli.py +++ b/kipoi_veff/cli.py @@ -16,44 +16,10 @@ from kipoi_veff.utils.io import SyncBatchWriter from kipoi import writers from kipoi_utils.utils import cd -from kipoi_utils.utils import parse_json_file_str - +from kipoi_utils.utils import parse_json_file_str, parse_json_file_str_or_arglist logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) - -def _prepare_multi_model_args(args): - assert isinstance(args.model, list) - assert isinstance(args.source, list) - assert isinstance(args.seq_length, list) - assert isinstance(args.dataloader, list) - assert isinstance(args.dataloader_source, list) - assert isinstance(args.dataloader_args, list) - - def ensure_matching_args(ref_arg, query_arg, ref_label, query_label, allow_zero=True): - assert isinstance(ref_arg, list) - assert isinstance(query_arg, list) - n = len(ref_arg) - if allow_zero and (len(query_arg) == 0): - ret = [None] * n - elif len(query_arg) == 1: - ret = [query_arg[0]] * n - elif not len(query_arg) == n: - raise Exception("Either give one {q} for all {r} or one {q} for every {r} in the same order.".format( - q=query_label, r=ref_label)) - else: - ret = query_arg - return ret - - args.source = ensure_matching_args(args.model, args.source, "--model", "--source", allow_zero=False) - args.seq_length = ensure_matching_args(args.model, args.seq_length, "--model", "--seq_length") - args.dataloader = ensure_matching_args(args.model, args.dataloader, "--model", "--dataloader") - args.dataloader_source = ensure_matching_args(args.dataloader, args.dataloader_source, "--dataloader", - "--dataloader_source") - args.dataloader_args = ensure_matching_args(args.model, args.dataloader_args, "--model", - "--dataloader_args", allow_zero=False) - - def get_single(x, name): """Make sure only a single element is used """ @@ -80,18 +46,18 @@ def cli_score_variants(command, raw_args): assert command == "score_variants" parser = argparse.ArgumentParser('kipoi veff {}'.format(command), description='Predict effect of SNVs using ISM.') - parser.add_argument('model', help='Model name.', nargs="+") - parser.add_argument('--source', default=["kipoi"], nargs="+", + parser.add_argument('model', help='Model name.') + parser.add_argument('--source', default=["kipoi"], choices=list(kipoi.config.model_sources().keys()), help='Model source to use. Specified in ~/.kipoi/config.yaml' + " under model_sources. " + "'dir' is an additional source referring to the local folder.") - parser.add_argument('--dataloader', nargs="+", default=[], + parser.add_argument('--dataloader', default=None, help="Dataloader name. If not specified, the model's default" + "DataLoader will be used") - parser.add_argument('--dataloader_source', nargs="+", default=["kipoi"], + parser.add_argument('--dataloader_source', default="kipoi", help="Dataloader source") - parser.add_argument('--dataloader_args', nargs="+", default=[], + parser.add_argument('--dataloader_args', default=None, help="Dataloader arguments either as a json string:" + "'{\"arg1\": 1} or as a file path to a json file") parser.add_argument('-i', '--input_vcf', required=True, @@ -117,7 +83,7 @@ def cli_score_variants(command, raw_args): "individual JSONs are expected to be supplied in the same order as the labels defined in " "--scoring. If the defaults or no arguments should be used define '{}' for that respective " "scoring method.") - parser.add_argument('-l', "--seq_length", type=int, nargs="+", default=[], + parser.add_argument('-l', "--seq_length", type=int, default=None, help="Optional parameter: Model input sequence length - necessary if the model does not have a " "pre-defined input sequence length.") parser.add_argument('--std_var_id', action="store_true", help="If set then variant IDs in the annotated" @@ -137,8 +103,10 @@ def cli_score_variants(command, raw_args): "$SINGULARITY_CACHEDIR if set") args = parser.parse_args(raw_args) + + # OBSOLETE # Make sure all the multi-model arguments like source, dataloader etc. fit together - _prepare_multi_model_args(args) + #_prepare_multi_model_args(args) # Check that all the folders exist file_exists(args.input_vcf, logger) @@ -163,11 +131,6 @@ def cli_score_variants(command, raw_args): # Drop the singularity flag raw_args = [x for x in raw_args if x != '--singularity'] - # handle the list case - args.model = get_single(args.model, 'model') - args.dataloader_args = get_single(args.dataloader_args, 'dataloader_args') - args.source = get_single(args.source, 'source') - dataloader_kwargs = parse_json_file_str(args.dataloader_args) # create output files @@ -203,15 +166,16 @@ def cli_score_variants(command, raw_args): "path of a file containing them) must be given for every " "`--scores` function.") - n_models = len(args.model) - # TODO - remove the feature of running multiple models in parallel - for model_name, model_source, dataloader, dataloader_source, dataloader_args, seq_length in zip(args.model, - args.source, - args.dataloader, - args.dataloader_source, - args.dataloader_args, - args.seq_length): + if True: + model_name = args.model + model_source = args.source + dataloader = args.dataloader + dataloader_source = args.dataloader_source + dataloader_args = args.dataloader_args + seq_length = args.seq_length + + model_name_safe = model_name.replace("/", "_") # VCF writer @@ -219,20 +183,11 @@ def cli_score_variants(command, raw_args): if args.output_vcf is not None: dir_exists(os.path.dirname(args.output_vcf), logger) output_vcf_model = args.output_vcf - # If multiple models are to be analysed then vcfs need renaming. - if n_models > 1: - if output_vcf_model.endswith(".vcf"): - output_vcf_model = output_vcf_model[:-4] - output_vcf_model += model_name_safe + ".vcf" # Other writers if args.extra_output is not None: dir_exists(os.path.dirname(args.extra_output), logger) - if n_models > 1: - ending = args.extra_output.split('.')[-1] - extra_output = args.extra_output[:-len(ending)] + model_name_safe + "." + ending - else: - extra_output = args.extra_output + extra_output = args.extra_output writer = writers.get_writer(extra_output, metadata_schema=None) assert writer is not None extra_writers = [SyncBatchWriter(writer)] @@ -283,23 +238,6 @@ def cli_score_variants(command, raw_args): return_predictions=False, model_outputs=model_outputs) - # tabular files - # if keep_predictions: - # if file_format in ["tsv"]: - # for model_name in res: - # for i, k in enumerate(res[model_name]): - # # Remove an old file if it is still there... - # if i == 0: - # try: - # os.unlink(args.extra_output) - # except Exception: - # pass - # with open(args.extra_output, "w") as ofh: - # ofh.write("KPVEP_%s:%s\n" % (k.upper(), model_name)) - # res[model_name][k].to_csv(args.extra_output, sep="\t", mode="a") - - # if file_format in ["hdf5", "h5"]: - # deepdish.io.save(args.extra_output, res) logger.info('Successfully predicted samples') @@ -349,8 +287,9 @@ def cli_create_mutation_map(command, raw_args): args = parser.parse_args(raw_args) # extract args for kipoi.variant_effects.predict_snvs - - dataloader_arguments = parse_json_file_str(args.dataloader_args) + print("DL ARGS",args.dataloader_args) + dataloader_arguments = parse_json_file_str_or_arglist(args.dataloader_args) + #dataloader_arguments = parse_json_file_str(args.dataloader_args) if args.output is None: raise Exception("Output file `--output` has to be set!") @@ -367,6 +306,7 @@ def cli_create_mutation_map(command, raw_args): source=args.source, dry_run=False) return None + # -------------------------------------------- # install args if args.install_req: diff --git a/tests/models/non_bedinput_model/example_files/variants.vcf.gz b/tests/models/non_bedinput_model/example_files/variants.vcf.gz index 9e53897244b81e1e85ceb5853b6e43b59a20fcdd..25234629a4cc69a1cd23fc2e2d9468d79f6ff1bf 100644 GIT binary patch delta 144 zcmV;B0B`@%0nq`FCV$kX7OK@6-9;=4icrPhZzK*;#Jlajd+_3TIGqo}b-e8ElWl1p zX+lw4&iZbSWw%B%MPC`{)?&5MElktesz-r%wVBXp$fuy6K`*Uk8|{w+GgzopsDd!ugdb=)B(EKNp*T3h y_#L~XOcL`uFMYcD(~i fG`ak!55s#LCTYI(HQDnt)yuifUT3LZ&XIjjjoc?& diff --git a/tests/models/rbp/example_files/variants.vcf.gz b/tests/models/rbp/example_files/variants.vcf.gz index 89163614130cdfbb9e7e4d2d674de8de71a65892..3d45fcc1018bb7fdc0aef35226c22fe76a93bb73 100644 GIT binary patch literal 202 zcmb2|=3rp}f&Xj_PR>jWYZ+?K8FC*o5MX`q^OyOq>8F=wzI-5%6wG`vqvxHH7^|zs zqxa_5mQK~;&)d<@uJ3#M^w%r*cHe$zcw77L8DpQItsn9@v$wMvv&UyoFguW@uA7nU z{Py6QE#7_4`#3r}=X7!@GIJ{4o1qxm^vL@tPiOMMi7)PQUHGGN^UpzkpFp##S`)uY p@Au$%;wdWMGg-bFDN3GuZJU0s!B~Ph|iA delta 194 zcmV;z06qW80oVb5ABzYC000000RIL6LPG)oy8wmHO$&lR5C-7$>)-G$v$L+^p~GUX z36^DB_lTsR2rT;jZEOdNK4xz7yfZLi*d30$?%ba?*U>sRw>Y98%qML##k^Uf8lx=? zbZxO%=^Cc0ZPB8dnI_ooduFguu}~SIzX(^*u!tT#^h~ijcSi1d=#mmi%+G!4w#y&x w23D7UBc8tpqE8o65SC*E8slFZV(axdstPf}tbQ}4;Z?>gFGhGCJbA0h%<7XSbN diff --git a/tests/models/rbp/example_files/variants.vcf.gz.tbi b/tests/models/rbp/example_files/variants.vcf.gz.tbi index a99ba8b1a38eabc884afbb81df3e7025fafea112..7ec37910231b09723d2b63035116ca94f7ae5256 100644 GIT binary patch literal 204 zcmb2|=3rp}f&Xj_PR>jW>lxnO*~ojyK!D)@zYvEQ1EZ?+l4XmUXYpoCcwxlFn!JH2 zX`Km6j<#3#iyhP7Y`*{h8=Kv`Z=s@si}OV8Z#?O@tI*24RQ7S6Y4q|{l~em4|C`Bw z{mRbX#5%FVJ9L8TS7vY8o_cC}?d;k$LHpjE-hDo^p51ej3MTr0x(z5Ae{+)kmjBIa ZN4b~!i<+HsV_=X+bFVZ5GuZhc0sxQIO{f3> literal 207 zcmb2|=3rp}f&Xj_PR>jWn;72SG2~)06ks@D>v&PTBdXJW$1OMOwSOCX6VhwfZ}?!^ z^uySHazTuC*||e$lYBS*bmY4o8|r#w!P!UFVK)P3AHTeGUGj}{YTYGS!B5}rs6Y1C z^lps%e!I@PIm=(xSzb@c-yEX-r%wVBXp$fuy6K`*Uk8|{w+GgzopsDd!ugdb=)B(EKNp*T3h y_#L~XOcL`uFMYcD(~i fG`ak!55s#LCTYI(HQDnt)yuifUT3LZ&XIjjjoc?& diff --git a/tests/test_cli_mutation_map.py b/tests/test_cli_mutation_map.py index 8e49f80..d544492 100644 --- a/tests/test_cli_mutation_map.py +++ b/tests/test_cli_mutation_map.py @@ -36,7 +36,7 @@ def test_generate_mutation_maps_example(example, tmpdir): dataloader_kwargs = {k: example_dir + v for k, v in dataloader_kwargs.items()} import json dataloader_kwargs_str = json.dumps(dataloader_kwargs) - + print("THE KWARG STR",dataloader_kwargs_str) args = ["python", os.path.abspath("./kipoi_veff/cli.py"), "create_mutation_map", # "./", # directory diff --git a/tests/test_cli_score_variants.py b/tests/test_cli_score_variants.py index bc08114..0894f5e 100644 --- a/tests/test_cli_score_variants.py +++ b/tests/test_cli_score_variants.py @@ -27,35 +27,35 @@ class dummy_container(object): pass - -def test__prepare_multi_model_args(): - from kipoi_veff.cli import _prepare_multi_model_args - any_len = ["seq_length", "dataloader", "dataloader_source"] - keys = ["model", "source", "seq_length", "dataloader", "dataloader_source", "dataloader_args"] - for some_empty in [True, False]: - args = dummy_container() - for k in keys: - if k in any_len and some_empty: - setattr(args, k, []) - else: - setattr(args, k, ["a", "b"]) - _prepare_multi_model_args(args) - for k in keys: - assert len(getattr(args, k)) == len(getattr(args, "model")) - if k in any_len and some_empty: - assert all([el is None for el in getattr(args, k)]) - else: - assert all([el is not None for el in getattr(args, k)]) - args = dummy_container() - for k in keys: - setattr(args, k, ["a", "b"]) - args.model = ["a"] - with pytest.raises(Exception): - _prepare_multi_model_args(args) +# OBSOLETE +# def test__prepare_multi_model_args(): +# from kipoi_veff.cli import _prepare_multi_model_args +# any_len = ["seq_length", "dataloader", "dataloader_source"] +# keys = ["model", "source", "seq_length", "dataloader", "dataloader_source", "dataloader_args"] +# for some_empty in [True, False]: +# args = dummy_container() +# for k in keys: +# if k in any_len and some_empty: +# setattr(args, k, []) +# else: +# setattr(args, k, ["a", "b"]) +# _prepare_multi_model_args(args) +# for k in keys: +# assert len(getattr(args, k)) == len(getattr(args, "model")) +# if k in any_len and some_empty: +# assert all([el is None for el in getattr(args, k)]) +# else: +# assert all([el is not None for el in getattr(args, k)]) +# args = dummy_container() +# for k in keys: +# setattr(args, k, ["a", "b"]) +# args.model = ["a"] +# with pytest.raises(Exception): +# _prepare_multi_model_args(args) @pytest.mark.parametrize("file_format", ["tsv", "hdf5"]) -def test_predict_variants_example_multimodel(file_format, tmpdir): +def test_predict_variants_example_single_model(file_format, tmpdir): """kipoi predict ... """ if sys.version_info[0] == 2: @@ -83,7 +83,7 @@ def test_predict_variants_example_multimodel(file_format, tmpdir): args = ["python", os.path.abspath("./kipoi_veff/cli.py"), "score_variants", # "./", # directory - example_dirs[0], example_dirs[1], + example_dirs[1], "--source=dir", "--batch_size=4", "--dataloader_args='%s'" % dataloader_kwargs_str, @@ -98,10 +98,15 @@ def test_predict_variants_example_multimodel(file_format, tmpdir): # run the command kipoi_veff.cli.cli_score_variants('score_variants', args[3:]) - for example_dir in example_dirs: + for example_dir in [example_dirs[1]]: # assert filecmp.cmp(example_dir + "/example_files/variants_ref_out.vcf", vcf_tmpfile) model_name_safe = example_dir.replace("/", "_") + vcf_tmpfile_model = vcf_tmpfile[:-4] + model_name_safe + ".vcf" + + print("model_name_safe", model_name_safe ) + print("vcf_tmpfile", vcf_tmpfile ) + print("vcf_tmpfile_model", vcf_tmpfile_model ) assert os.path.exists(vcf_tmpfile_model) compare_vcfs(example_dir + "/example_files/variants_ref_out.vcf", vcf_tmpfile_model) ending = tmpfile.split('.')[-1] From 682a2814afa385f72cc79b6770ff0992ac16d1f6 Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Tue, 23 Apr 2019 11:46:29 +0200 Subject: [PATCH 02/10] stuff --- kipoi_veff/cli.py | 2 +- tests/test_cli_score_variants.py | 65 ++++++++++++++++++-------------- 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/kipoi_veff/cli.py b/kipoi_veff/cli.py index b3cc508..76dd24d 100644 --- a/kipoi_veff/cli.py +++ b/kipoi_veff/cli.py @@ -47,7 +47,7 @@ def cli_score_variants(command, raw_args): parser = argparse.ArgumentParser('kipoi veff {}'.format(command), description='Predict effect of SNVs using ISM.') parser.add_argument('model', help='Model name.') - parser.add_argument('--source', default=["kipoi"], + parser.add_argument('--source', default="kipoi", choices=list(kipoi.config.model_sources().keys()), help='Model source to use. Specified in ~/.kipoi/config.yaml' + " under model_sources. " + diff --git a/tests/test_cli_score_variants.py b/tests/test_cli_score_variants.py index 0894f5e..1212027 100644 --- a/tests/test_cli_score_variants.py +++ b/tests/test_cli_score_variants.py @@ -63,40 +63,49 @@ def test_predict_variants_example_single_model(file_format, tmpdir): examples = "rbp", "non_bedinput_model" example_dirs = ["tests/models/{0}/".format(ex) for ex in examples] - main_example_dir = example_dirs[1] tmpdir_here = tmpdir.mkdir("example") + for i in range(2): + main_example_dir = example_dirs[1] + + + + # non_bedinput_model is not compatible with restricted bed files as + # alterations in region generation have no influence on that model + tmpfile = str(tmpdir_here.join("out.{0}".format(file_format))) + vcf_tmpfile = str(tmpdir_here.join("out.{0}".format("vcf"))) + + dataloader_kwargs = {"fasta_file": "example_files/hg38_chr22.fa", + "preproc_transformer": "dataloader_files/encodeSplines.pkl", + "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz", + "intervals_file": "example_files/variant_intervals.tsv"} + dataloader_kwargs = {k: main_example_dir + v for k, v in dataloader_kwargs.items()} + import json + dataloader_kwargs_str = json.dumps(dataloader_kwargs) + + args = ["python", os.path.abspath("./kipoi_veff/cli.py"), + "score_variants", + # "./", # directory + example_dirs[i], + "--source=dir", + "--batch_size=4", + "--dataloader_args='%s'" % dataloader_kwargs_str, + "--input_vcf", main_example_dir + "/example_files/variants.vcf", + # this one was now gone in the master?! + "--output_vcf", vcf_tmpfile, + "--extra_output", tmpfile] + # run the + if INSTALL_FLAG: + args.append(INSTALL_FLAG) + + # run the command + kipoi_veff.cli.cli_score_variants('score_variants', args[3:]) + + - # non_bedinput_model is not compatible with restricted bed files as - # alterations in region generation have no influence on that model - tmpfile = str(tmpdir_here.join("out.{0}".format(file_format))) - vcf_tmpfile = str(tmpdir_here.join("out.{0}".format("vcf"))) - dataloader_kwargs = {"fasta_file": "example_files/hg38_chr22.fa", - "preproc_transformer": "dataloader_files/encodeSplines.pkl", - "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz", - "intervals_file": "example_files/variant_intervals.tsv"} - dataloader_kwargs = {k: main_example_dir + v for k, v in dataloader_kwargs.items()} - import json - dataloader_kwargs_str = json.dumps(dataloader_kwargs) - args = ["python", os.path.abspath("./kipoi_veff/cli.py"), - "score_variants", - # "./", # directory - example_dirs[1], - "--source=dir", - "--batch_size=4", - "--dataloader_args='%s'" % dataloader_kwargs_str, - "--input_vcf", main_example_dir + "/example_files/variants.vcf", - # this one was now gone in the master?! - "--output_vcf", vcf_tmpfile, - "--extra_output", tmpfile] - # run the - if INSTALL_FLAG: - args.append(INSTALL_FLAG) - # run the command - kipoi_veff.cli.cli_score_variants('score_variants', args[3:]) for example_dir in [example_dirs[1]]: # assert filecmp.cmp(example_dir + "/example_files/variants_ref_out.vcf", vcf_tmpfile) From 82000dc983e12028f58a4be5b015ba7650791a47 Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Tue, 23 Apr 2019 15:32:21 +0200 Subject: [PATCH 03/10] - removed support for using cli with multiple models - added new dataloader non-json parsing in cli --- kipoi_veff/cli.py | 129 ++++++++++++++----------------- setup.py | 2 +- tests/test_cli_mutation_map.py | 35 ++++++--- tests/test_cli_score_variants.py | 124 ++++++++++++++--------------- 4 files changed, 142 insertions(+), 148 deletions(-) diff --git a/kipoi_veff/cli.py b/kipoi_veff/cli.py index 76dd24d..63e45a7 100644 --- a/kipoi_veff/cli.py +++ b/kipoi_veff/cli.py @@ -52,14 +52,9 @@ def cli_score_variants(command, raw_args): help='Model source to use. Specified in ~/.kipoi/config.yaml' + " under model_sources. " + "'dir' is an additional source referring to the local folder.") - parser.add_argument('--dataloader', default=None, - help="Dataloader name. If not specified, the model's default" + - "DataLoader will be used") - parser.add_argument('--dataloader_source', default="kipoi", - help="Dataloader source") - parser.add_argument('--dataloader_args', default=None, - help="Dataloader arguments either as a json string:" + - "'{\"arg1\": 1} or as a file path to a json file") + + add_dataloader(parser=parser, with_args=True) + parser.add_argument('-i', '--input_vcf', required=True, help='Input VCF.') parser.add_argument('-o', '--output_vcf', @@ -131,7 +126,7 @@ def cli_score_variants(command, raw_args): # Drop the singularity flag raw_args = [x for x in raw_args if x != '--singularity'] - dataloader_kwargs = parse_json_file_str(args.dataloader_args) + dataloader_kwargs = parse_json_file_str_or_arglist(args.dataloader_args) # create output files output_files = [] @@ -166,77 +161,65 @@ def cli_score_variants(command, raw_args): "path of a file containing them) must be given for every " "`--scores` function.") + # VCF writer + output_vcf_model = None + if args.output_vcf is not None: + dir_exists(os.path.dirname(args.output_vcf), logger) + output_vcf_model = args.output_vcf - if True: - model_name = args.model - model_source = args.source - dataloader = args.dataloader - dataloader_source = args.dataloader_source - dataloader_args = args.dataloader_args - seq_length = args.seq_length - - - model_name_safe = model_name.replace("/", "_") - - # VCF writer - output_vcf_model = None - if args.output_vcf is not None: - dir_exists(os.path.dirname(args.output_vcf), logger) - output_vcf_model = args.output_vcf - - # Other writers - if args.extra_output is not None: - dir_exists(os.path.dirname(args.extra_output), logger) - extra_output = args.extra_output - writer = writers.get_writer(extra_output, metadata_schema=None) - assert writer is not None - extra_writers = [SyncBatchWriter(writer)] - else: - extra_writers = [] - - dataloader_arguments = parse_json_file_str(dataloader_args) - - # -------------------------------------------- - # load model & dataloader - model = kipoi.get_model(model_name, model_source) - - if dataloader is not None: - Dl = kipoi.get_dataloader_factory(dataloader, dataloader_source) - else: - Dl = model.default_dataloader + # Other writers + if args.extra_output is not None: + dir_exists(os.path.dirname(args.extra_output), logger) + extra_output = args.extra_output + writer = writers.get_writer(extra_output, metadata_schema=None) + assert writer is not None + extra_writers = [SyncBatchWriter(writer)] + else: + extra_writers = [] - # Load effect prediction related model info - model_info = kipoi_veff.ModelInfoExtractor(model, Dl) + dataloader_arguments = parse_json_file_str_or_arglist(args.dataloader_args) - if model_info.use_seq_only_rc: - logger.info('Model SUPPORTS simple reverse complementation of input DNA sequences.') - else: - logger.info('Model DOES NOT support simple reverse complementation of input DNA sequences.') + # -------------------------------------------- + # load model & dataloader + model = kipoi.get_model(args.model, args.source) - if output_vcf_model is not None: - logger.info('Annotated VCF will be written to %s.' % str(output_vcf_model)) + if args.dataloader is not None: + Dl = kipoi.get_dataloader_factory(args.dataloader, args.dataloader_source) + else: + Dl = model.default_dataloader - model_outputs = None - if args.model_outputs is not None: - model_outputs = args.model_outputs + # Load effect prediction related model info + model_info = kipoi_veff.ModelInfoExtractor(model, Dl) - elif args.model_outputs_i is not None: - model_outputs = args.model_outputs_i + if model_info.use_seq_only_rc: + logger.info('Model SUPPORTS simple reverse complementation of input DNA sequences.') + else: + logger.info('Model DOES NOT support simple reverse complementation of input DNA sequences.') - kipoi_veff.score_variants(model, - dataloader_arguments, - args.input_vcf, - output_vcf=output_vcf_model, - output_writers=extra_writers, - scores=args.scores, - score_kwargs=score_kwargs, - num_workers=args.num_workers, - batch_size=args.batch_size, - seq_length=seq_length, - std_var_id=args.std_var_id, - restriction_bed=args.restriction_bed, - return_predictions=False, - model_outputs=model_outputs) + if output_vcf_model is not None: + logger.info('Annotated VCF will be written to %s.' % str(output_vcf_model)) + + model_outputs = None + if args.model_outputs is not None: + model_outputs = args.model_outputs + + elif args.model_outputs_i is not None: + model_outputs = args.model_outputs_i + + kipoi_veff.score_variants(model, + dataloader_arguments, + args.input_vcf, + output_vcf=output_vcf_model, + output_writers=extra_writers, + scores=args.scores, + score_kwargs=score_kwargs, + num_workers=args.num_workers, + batch_size=args.batch_size, + seq_length=args.seq_length, + std_var_id=args.std_var_id, + restriction_bed=args.restriction_bed, + return_predictions=False, + model_outputs=model_outputs) logger.info('Successfully predicted samples') diff --git a/setup.py b/setup.py index 38ba9cb..e7a8577 100755 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ requirements = [ "kipoi>=0.6.1", - "kipoi-utils>=0.1.1", + "kipoi-utils>=0.1.12", # vep "pyvcf", "cyvcf2", diff --git a/tests/test_cli_mutation_map.py b/tests/test_cli_mutation_map.py index d544492..4b17b97 100644 --- a/tests/test_cli_mutation_map.py +++ b/tests/test_cli_mutation_map.py @@ -13,7 +13,8 @@ @pytest.mark.parametrize("example", EXAMPLES_TO_RUN) -def test_generate_mutation_maps_example(example, tmpdir): +@pytest.mark.parametrize("new_dataloader_kwargs_format", [False, True]) +def test_generate_mutation_maps_example(example, new_dataloader_kwargs_format, tmpdir): """kipoi predict ... """ if (example not in {"rbp"}) or (sys.version_info[0] == 2): @@ -24,20 +25,20 @@ def test_generate_mutation_maps_example(example, tmpdir): tmpdir_here = tmpdir.mkdir("example") # restricted_bed = False - print(example) - print("tmpdir: {0}".format(tmpdir)) mm_tmpfile = str(tmpdir_here.join("out_mm.hdf5")) plt_tmpfile = str(tmpdir_here.join("plot.png")) dataloader_kwargs = {"fasta_file": "example_files/hg38_chr22.fa", - "preproc_transformer": "dataloader_files/encodeSplines.pkl", - "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz", - "intervals_file": "example_files/variant_intervals.tsv"} - dataloader_kwargs = {k: example_dir + v for k, v in dataloader_kwargs.items()} - import json - dataloader_kwargs_str = json.dumps(dataloader_kwargs) - print("THE KWARG STR",dataloader_kwargs_str) - args = ["python", os.path.abspath("./kipoi_veff/cli.py"), + "preproc_transformer": "dataloader_files/encodeSplines.pkl", + "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz", + "intervals_file": "example_files/variant_intervals.tsv"} + dataloader_kwargs = {k: example_dir + v for k, v in dataloader_kwargs.items()} + + if not new_dataloader_kwargs_format: + + import json + dataloader_kwargs_str = json.dumps(dataloader_kwargs) + args = ["python", os.path.abspath("./kipoi_veff/cli.py"), "create_mutation_map", # "./", # directory example_dir, @@ -46,13 +47,23 @@ def test_generate_mutation_maps_example(example, tmpdir): "--dataloader_args='%s'" % dataloader_kwargs_str, "--regions_file", example_dir + "example_files/first_variant.vcf", "--output", mm_tmpfile] + else: + dataloader_kwargs_list = ["{0}={1}".format(key, val) for key,val in dataloader_kwargs.items()] + + args = ["python", os.path.abspath("./kipoi_veff/cli.py"), + "create_mutation_map", + # "./", # directory + example_dir, + "--source=dir", + "--batch_size=4", + "--dataloader_args"] + dataloader_kwargs_list + ["--regions_file", example_dir + "example_files/first_variant.vcf", + "--output", mm_tmpfile] # run the if INSTALL_FLAG: args.append(INSTALL_FLAG) returncode = subprocess.call(args=args, cwd=".") assert returncode == 0 - assert os.path.exists(mm_tmpfile) # make the plot diff --git a/tests/test_cli_score_variants.py b/tests/test_cli_score_variants.py index 1212027..8f484e8 100644 --- a/tests/test_cli_score_variants.py +++ b/tests/test_cli_score_variants.py @@ -27,7 +27,7 @@ class dummy_container(object): pass -# OBSOLETE + # def test__prepare_multi_model_args(): # from kipoi_veff.cli import _prepare_multi_model_args # any_len = ["seq_length", "dataloader", "dataloader_source"] @@ -63,63 +63,49 @@ def test_predict_variants_example_single_model(file_format, tmpdir): examples = "rbp", "non_bedinput_model" example_dirs = ["tests/models/{0}/".format(ex) for ex in examples] + main_example_dir = example_dirs[1] tmpdir_here = tmpdir.mkdir("example") - for i in range(2): - main_example_dir = example_dirs[1] - - - - # non_bedinput_model is not compatible with restricted bed files as - # alterations in region generation have no influence on that model - tmpfile = str(tmpdir_here.join("out.{0}".format(file_format))) - vcf_tmpfile = str(tmpdir_here.join("out.{0}".format("vcf"))) - - dataloader_kwargs = {"fasta_file": "example_files/hg38_chr22.fa", - "preproc_transformer": "dataloader_files/encodeSplines.pkl", - "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz", - "intervals_file": "example_files/variant_intervals.tsv"} - dataloader_kwargs = {k: main_example_dir + v for k, v in dataloader_kwargs.items()} - import json - dataloader_kwargs_str = json.dumps(dataloader_kwargs) - - args = ["python", os.path.abspath("./kipoi_veff/cli.py"), - "score_variants", - # "./", # directory - example_dirs[i], - "--source=dir", - "--batch_size=4", - "--dataloader_args='%s'" % dataloader_kwargs_str, - "--input_vcf", main_example_dir + "/example_files/variants.vcf", - # this one was now gone in the master?! - "--output_vcf", vcf_tmpfile, - "--extra_output", tmpfile] - # run the - if INSTALL_FLAG: - args.append(INSTALL_FLAG) - - # run the command - kipoi_veff.cli.cli_score_variants('score_variants', args[3:]) - - + # non_bedinput_model is not compatible with restricted bed files as + # alterations in region generation have no influence on that model + tmpfile = str(tmpdir_here.join("out.{0}".format(file_format))) + vcf_tmpfile = str(tmpdir_here.join("out.{0}".format("vcf"))) + dataloader_kwargs = {"fasta_file": "example_files/hg38_chr22.fa", + "preproc_transformer": "dataloader_files/encodeSplines.pkl", + "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz", + "intervals_file": "example_files/variant_intervals.tsv"} + dataloader_kwargs = {k: main_example_dir + v for k, v in dataloader_kwargs.items()} + import json + dataloader_kwargs_str = json.dumps(dataloader_kwargs) + args = ["python", os.path.abspath("./kipoi_veff/cli.py"), + "score_variants", + # "./", # directory + example_dirs[1], + "--source=dir", + "--batch_size=4", + "--dataloader_args='%s'" % dataloader_kwargs_str, + "--input_vcf", main_example_dir + "/example_files/variants.vcf", + # this one was now gone in the master?! + "--output_vcf", vcf_tmpfile, + "--extra_output", tmpfile] + # run the + if INSTALL_FLAG: + args.append(INSTALL_FLAG) + # run the command + kipoi_veff.cli.cli_score_variants('score_variants', args[3:]) - for example_dir in [example_dirs[1]]: + for example_dir in example_dirs[1:2]: # assert filecmp.cmp(example_dir + "/example_files/variants_ref_out.vcf", vcf_tmpfile) model_name_safe = example_dir.replace("/", "_") - - vcf_tmpfile_model = vcf_tmpfile[:-4] + model_name_safe + ".vcf" - - print("model_name_safe", model_name_safe ) - print("vcf_tmpfile", vcf_tmpfile ) - print("vcf_tmpfile_model", vcf_tmpfile_model ) + vcf_tmpfile_model = vcf_tmpfile assert os.path.exists(vcf_tmpfile_model) compare_vcfs(example_dir + "/example_files/variants_ref_out.vcf", vcf_tmpfile_model) ending = tmpfile.split('.')[-1] - extra_output = tmpfile[:-len(ending)] + model_name_safe + "." + ending + extra_output = tmpfile assert os.path.exists(extra_output) if file_format == "hdf5": @@ -131,7 +117,8 @@ def test_predict_variants_example_single_model(file_format, tmpdir): @pytest.mark.parametrize("example", ["rbp", "non_bedinput_model"]) @pytest.mark.parametrize("restricted_bed", [True, False]) @pytest.mark.parametrize("file_format", ["tsv", "hdf5"]) -def test_predict_variants_example(example, restricted_bed, file_format, tmpdir): +@pytest.mark.parametrize("new_dataloader_kwargs_format", [False, True]) +def test_predict_variants_example(example, restricted_bed, file_format, new_dataloader_kwargs_format, tmpdir): """kipoi predict ... """ if (example not in {"rbp", "non_bedinput_model"}) or (sys.version_info[0] == 2): @@ -145,8 +132,6 @@ def test_predict_variants_example(example, restricted_bed, file_format, tmpdir): # alterations in region generation have no influence on that model if restricted_bed and (example != "rbp"): pytest.skip("Resticted_bed only available for rbp_eclip") - print(example) - print("tmpdir: {0}".format(tmpdir)) tmpfile = str(tmpdir_here.join("out.{0}".format(file_format))) vcf_tmpfile = str(tmpdir_here.join("out.{0}".format("vcf"))) @@ -155,20 +140,35 @@ def test_predict_variants_example(example, restricted_bed, file_format, tmpdir): "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz", "intervals_file": "example_files/variant_intervals.tsv"} dataloader_kwargs = {k: example_dir + v for k, v in dataloader_kwargs.items()} - import json - dataloader_kwargs_str = json.dumps(dataloader_kwargs) + if not new_dataloader_kwargs_format: + import json + dataloader_kwargs_str = json.dumps(dataloader_kwargs) + + args = ["python", os.path.abspath("./kipoi_veff/cli.py"), + "score_variants", + # "./", # directory + example_dir, + "--source=dir", + "--batch_size=4", + "--dataloader_args='%s'" % dataloader_kwargs_str, + "--input_vcf", temp(example_dir + "/example_files/variants.vcf", tmpdir), + # this one was now gone in the master?! + "--output_vcf", vcf_tmpfile, + "--extra_output", tmpfile] + else: + dataloader_kwargs_list = ["{0}={1}".format(key, val) for key,val in dataloader_kwargs.items()] + args = ["python", os.path.abspath("./kipoi_veff/cli.py"), + "score_variants", + # "./", # directory + example_dir, + "--source=dir", + "--batch_size=4", + "--dataloader_args"] + dataloader_kwargs_list + [ + "--input_vcf", temp(example_dir + "/example_files/variants.vcf", tmpdir), + # this one was now gone in the master?! + "--output_vcf", vcf_tmpfile, + "--extra_output", tmpfile] - args = ["python", os.path.abspath("./kipoi_veff/cli.py"), - "score_variants", - # "./", # directory - example_dir, - "--source=dir", - "--batch_size=4", - "--dataloader_args='%s'" % dataloader_kwargs_str, - "--input_vcf", temp(example_dir + "/example_files/variants.vcf", tmpdir), - # this one was now gone in the master?! - "--output_vcf", vcf_tmpfile, - "--extra_output", tmpfile] # run the if INSTALL_FLAG: args.append(INSTALL_FLAG) From 0a4b1d26719443c45c6706271fad7851e5d90176 Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Tue, 23 Apr 2019 15:37:35 +0200 Subject: [PATCH 04/10] also deprecating py27 --- .circleci/config.yml | 49 -------------------------------------------- 1 file changed, 49 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 80649e4..2200303 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -88,64 +88,15 @@ jobs: - *store_test_results - *store_test_artifacts - test-py27: - docker: - - image: kipoi/miniconda:4.3.14 - working_directory: ~/repo - steps: - - checkout - - *update_conda - - *install_git_lfs - - *update_pytorch - - *install_conda_deps - - *install_pip_deps - - *install_kipoi - - *install_kipoi_veff - - *kipoi_ls - - *run_tests - - *store_test_results - - *store_test_artifacts - - build-deploy-docs: - docker: - # - image: continuumio/miniconda3:4.3.14 - - image: kipoi/miniconda3:4.3.14 - # - image: continuumio/anaconda3:5.0.1 - # - image: circleci/python:3.5 - working_directory: ~/repo - steps: - - add_ssh_keys: - fingerprints: - - 08:c1:46:ae:ea:06:99:b6:64:ee:3f:e0:98:ac:30:ce - - checkout - - run: - name: Install pip - Kipoi - command: pip install -e . - - run: - name: Install build deps - command: pip install nbconvert mkdocs pydoc-markdown - - run: - name: Build docs - command: | - cd docs/ - mkdir -p theme_dir/img/ipynb/ - ./render_ipynb.bash - pydocmd build - - run: - name: Deploy docs - command: .circleci/deploy_docs.bash - workflows: version: 2 test: jobs: - test-py36 - - test-py27 - build-deploy-docs: requires: - test-py36 - - test-py27 filters: branches: only: From ac7a0a97a4109082efacaa15f486a2595c87cd26 Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Tue, 23 Apr 2019 15:55:06 +0200 Subject: [PATCH 05/10] using special kipoi branch --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2200303..d26330b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -32,7 +32,7 @@ variables: run: name: Install Kipoi command: | - git clone git@github.com:kipoi/kipoi.git kipoi_pkg + git clone git@github.com:DerThorsten/kipoi.git -b niceparse kipoi_pkg cd kipoi_pkg pip install '.' cd .. From e046f6d0ef255a770ef95f7de3d846cdc7912287 Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Tue, 23 Apr 2019 16:07:00 +0200 Subject: [PATCH 06/10] using special kipoi branch --- .circleci/config.yml | 2 +- setup.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d26330b..70f0bce 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -80,8 +80,8 @@ jobs: - *update_pytorch - *install_conda_deps - *install_pip_deps - - *install_kipoi - *install_kipoi_veff + - *install_kipoi - *kipoi_ls - *run_tests - *run_coveralls diff --git a/setup.py b/setup.py index e7a8577..9201cf6 100755 --- a/setup.py +++ b/setup.py @@ -6,6 +6,7 @@ requirements = [ "kipoi>=0.6.1", + #$"git+ssh://git@github.com/DerThorsten/kipoi.git@niceparse",#egg=kipoi, "kipoi-utils>=0.1.12", # vep "pyvcf", @@ -29,7 +30,7 @@ "cookiecutter", # sometimes required "h5py", - "urllib3>=1.21.1", #,<1.23", + "urllib3>=1.21.1", #,<1.23", ] test_requirements = [ From 1f369a5dd52ee2b5b3fe019bf4f76b7b98156350 Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Tue, 23 Apr 2019 16:14:57 +0200 Subject: [PATCH 07/10] no deps --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 70f0bce..5e55ba6 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -34,7 +34,7 @@ variables: command: | git clone git@github.com:DerThorsten/kipoi.git -b niceparse kipoi_pkg cd kipoi_pkg - pip install '.' + pip install '.' --ignore-installed --no-deps cd .. install_kipoi_veff: &install_kipoi_veff run: From acc28be8e2441401fe22757f028840196e3b314a Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Tue, 23 Apr 2019 16:23:40 +0200 Subject: [PATCH 08/10] no deps --- .circleci/config.yml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 5e55ba6..eddaffc 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -28,7 +28,7 @@ variables: name: Install conda dependencies command: | conda install genomelake pyfaidx -c bioconda - install_kipoi: &install_kipoi + install_kipoi: &install_kipoi_no_deps_and_ignore run: name: Install Kipoi command: | @@ -36,6 +36,15 @@ variables: cd kipoi_pkg pip install '.' --ignore-installed --no-deps cd .. + + install_kipoi: &install_kipoi + run: + name: Install Kipoi + command: | + git clone git@github.com:DerThorsten/kipoi.git -b niceparse kipoi_pkg + cd kipoi_pkg + pip install '.' --no-deps + cd .. install_kipoi_veff: &install_kipoi_veff run: name: Install Kipoi-veff @@ -80,8 +89,9 @@ jobs: - *update_pytorch - *install_conda_deps - *install_pip_deps - - *install_kipoi_veff - *install_kipoi + - *install_kipoi_veff + - *install_kipoi_no_deps_and_ignore - *kipoi_ls - *run_tests - *run_coveralls From 59841be493b8726d9b3bdeb95e5b1eaa501511bf Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Tue, 23 Apr 2019 17:22:42 +0200 Subject: [PATCH 09/10] removing ignore-installed flag --- .circleci/config.yml | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index eddaffc..d31a6b5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -28,14 +28,6 @@ variables: name: Install conda dependencies command: | conda install genomelake pyfaidx -c bioconda - install_kipoi: &install_kipoi_no_deps_and_ignore - run: - name: Install Kipoi - command: | - git clone git@github.com:DerThorsten/kipoi.git -b niceparse kipoi_pkg - cd kipoi_pkg - pip install '.' --ignore-installed --no-deps - cd .. install_kipoi: &install_kipoi run: @@ -91,7 +83,7 @@ jobs: - *install_pip_deps - *install_kipoi - *install_kipoi_veff - - *install_kipoi_no_deps_and_ignore + - *install_kipoi - *kipoi_ls - *run_tests - *run_coveralls From ca5a8bf918c84a299de7b5bd6fa94a454357ebbc Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Thu, 25 Apr 2019 15:41:21 +0200 Subject: [PATCH 10/10] using most recent kipoi version --- .circleci/config.yml | 18 ++++++++---------- setup.py | 2 +- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d31a6b5..9d26d67 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -29,14 +29,14 @@ variables: command: | conda install genomelake pyfaidx -c bioconda - install_kipoi: &install_kipoi - run: - name: Install Kipoi - command: | - git clone git@github.com:DerThorsten/kipoi.git -b niceparse kipoi_pkg - cd kipoi_pkg - pip install '.' --no-deps - cd .. + # install_kipoi: &install_kipoi + # run: + # name: Install Kipoi + # command: | + # git clone git@github.com:DerThorsten/kipoi.git -b niceparse kipoi_pkg + # cd kipoi_pkg + # pip install '.' --no-deps + # cd .. install_kipoi_veff: &install_kipoi_veff run: name: Install Kipoi-veff @@ -81,9 +81,7 @@ jobs: - *update_pytorch - *install_conda_deps - *install_pip_deps - - *install_kipoi - *install_kipoi_veff - - *install_kipoi - *kipoi_ls - *run_tests - *run_coveralls diff --git a/setup.py b/setup.py index 9201cf6..6ab806d 100755 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ requirements = [ - "kipoi>=0.6.1", + "kipoi>=0.6.12", #$"git+ssh://git@github.com/DerThorsten/kipoi.git@niceparse",#egg=kipoi, "kipoi-utils>=0.1.12", # vep