diff --git a/Snakefile b/Snakefile index 9d2debe3..caee3428 100644 --- a/Snakefile +++ b/Snakefile @@ -42,7 +42,6 @@ def algo_has_mult_param_combos(algo): return len(algorithm_params.get(algo, {})) > 1 algorithms_mult_param_combos = [algo for algo in algorithms if algo_has_mult_param_combos(algo)] - # Get the parameter dictionary for the specified # algorithm and parameter combination hash def reconstruction_params(algorithm, params_hash): @@ -105,8 +104,17 @@ def make_final_input(wildcards): final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-ensemble-pathway.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms)) if _config.config.analysis_include_evaluation: - final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-evaluation.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_pairs,algorithm_params=algorithms_with_params)) - + final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}precision-recall-per-pathway.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_pairs,algorithm_params=algorithms_with_params)) + final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}precision-recall-per-pathway.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_pairs)) + final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}precision-recall-pca-chosen-pathway.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_pairs)) + final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}precision-recall-curve-ensemble-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_pairs,algorithm_params=algorithms_with_params)) + # TODO: should we provide the node ensemble frequencies + if _config.config.analysis_include_evaluation_aggregate_algo: + final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}{algorithm}-precision-recall-per-pathway.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_pairs,algorithm=algorithms)) + final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}{algorithm}-precision-recall-per-pathway.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_pairs,algorithm=algorithms)) + final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}{algorithm}-precision-recall-pca-chosen-pathway.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_pairs,algorithm=algorithms_mult_param_combos)) + final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}{algorithm}-precision-recall-curve-ensemble-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_pairs,algorithm=algorithms)) + # TODO: should we provide the node ensemble frequencies per algortihm if len(final_input) == 0: # No analysis added yet, so add reconstruction output files if they exist. # (if analysis is specified, these should be implicitly run). @@ -372,15 +380,76 @@ def get_dataset_label(wildcards): dataset = parts[0] return dataset -# Run evaluation code for a specific dataset's pathway outputs against its paired gold standard + +# Run evaluation for all pathway outputs, ensemble.txt, and pca_coordinates.txt for a dataset against its paired gold standard rule evaluation: input: gold_standard_file = get_gold_standard_pickle_file, pathways = expand('{out_dir}{sep}{dataset_label}-{algorithm_params}{sep}pathway.txt', out_dir=out_dir, sep=SEP, algorithm_params=algorithms_with_params, dataset_label=get_dataset_label), - output: eval_file = SEP.join([out_dir, "{dataset_gold_standard_pairs}-evaluation.txt"]) + ensemble_file=lambda wildcards: f"{out_dir}{SEP}{get_dataset_label(wildcards)}-ml{SEP}ensemble-pathway.txt", + pca_coordinates_file =lambda wildcards: f"{out_dir}{SEP}{get_dataset_label(wildcards)}-ml{SEP}pca-coordinates.txt" + output: + pr_file = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', "precision-recall-per-pathway.txt"]), + pr_png = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', 'precision-recall-per-pathway.png']), + pr_curve_png = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', 'precision-recall-curve-ensemble-nodes.png']), + pca_chosen_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', "precision-recall-pca-chosen-pathway.txt"]), + run: + node_table = Evaluation.from_file(input.gold_standard_file).node_table + Evaluation.precision_and_recall(input.pathways, node_table, algorithms, output.pr_file, output.pr_png) + node_ensemble = Evaluation.edge_frequency_node_ensemble(input.ensemble_file) + Evaluation.precision_recall_curve_node_ensemble(node_ensemble, node_table, output.pr_curve_png) + pca_chosen_pathway = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, out_dir) + Evaluation.precision_and_recall(pca_chosen_pathway, node_table, algorithms, output.pca_chosen_pr_file) + +# Returns all pathways for a specific algorithm and dataset +def collect_pathways_per_algo_per_dataset(wildcards): + dataset_label = get_dataset_label(wildcards) + filtered_algo_params = [algo_param for algo_param in algorithms_with_params if wildcards.algorithm in algo_param] + return expand('{out_dir}{sep}{dataset_label}-{algorithm_params}{sep}pathway.txt', out_dir=out_dir, sep=SEP, algorithm_params=filtered_algo_params, dataset_label= dataset_label) + +# Returns ensemble file for a specific algorithm and dataset +def collect_ensemble_per_algo_per_dataset(wildcards): + dataset_label = get_dataset_label(wildcards) + return f"{out_dir}{SEP}{dataset_label}-ml{SEP}{wildcards.algorithm}-ensemble-pathway.txt" + +# Returns pca coordinates for a specific algorithm and dataset +def collect_pca_coordinates_per_algo_per_dataset(wildcards): + dataset_label = get_dataset_label(wildcards) + return f"{out_dir}{SEP}{dataset_label}-ml{SEP}{wildcards.algorithm}-pca-coordinates.txt" + +# Run evaluation per algortihm for all associated pathway outputs, ensemble.txt, and pca_coordinates.txt for a dataset against its paired gold standard +rule evaluation_per_algo_pathways: + input: + gold_standard_file = get_gold_standard_pickle_file, + pathways = collect_pathways_per_algo_per_dataset, + output: + pr_file = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', "{algorithm}-precision-recall-per-pathway.txt"]), + pr_png = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', '{algorithm}-precision-recall-per-pathway.png']), + run: + node_table = Evaluation.from_file(input.gold_standard_file).node_table + Evaluation.precision_and_recall(input.pathways, node_table, algorithms, output.pr_file, output.pr_png) + +rule evaluation_per_algo_ensemble_pr_curve: + input: + gold_standard_file = get_gold_standard_pickle_file, + ensemble_file = collect_ensemble_per_algo_per_dataset, + output: + pr_curve_png = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', '{algorithm}-precision-recall-curve-ensemble-nodes.png']), + run: + node_table = Evaluation.from_file(input.gold_standard_file).node_table + node_ensemble = Evaluation.edge_frequency_node_ensemble(input.ensemble_file) + Evaluation.precision_recall_curve_node_ensemble(node_ensemble, node_table, output.pr_curve_png) + +rule evaluation_per_algo_pca_chosen: + input: + gold_standard_file = get_gold_standard_pickle_file, + pca_coordinates_file = collect_pca_coordinates_per_algo_per_dataset + output: + pca_chosen_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', "{algorithm}-precision-recall-pca-chosen-pathway.txt"]), run: node_table = Evaluation.from_file(input.gold_standard_file).node_table - Evaluation.precision(input.pathways, node_table, output.eval_file) + pca_chosen_pathway = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, out_dir) + Evaluation.precision_and_recall(pca_chosen_pathway, node_table, algorithms, output.pca_chosen_pr_file) # Remove the output directory rule clean: diff --git a/config/config.yaml b/config/config.yaml index b87bcd45..76231276 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -173,4 +173,14 @@ analysis: # 'euclidean', 'manhattan', 'cosine' metric: 'euclidean' evaluation: + # evaluation per dataset-goldstandard pair + # evalution will not run unless ml include is set to true include: true + # adds evaluation per algorithm per dataset-goldstandard pair + # evalution per algortihm will not run unless ml include and ml aggregate_per_algorithm is set to true + aggregate_per_algorithm: true + # TODO: should we decouple parts of eval that involve ml + # it will be good to seperate them otherwise if ml doesn't work then eval won't work at all + # pca_chosen + # ensemble + # precisin and recall diff --git a/config/egfr-param-tuning.yaml b/config/egfr-param-tuning.yaml new file mode 100644 index 00000000..a0a965b7 --- /dev/null +++ b/config/egfr-param-tuning.yaml @@ -0,0 +1,3460 @@ +hash_length: 7 +container_framework: docker +unpack_singularity: false +container_registry: + base_url: docker.io + owner: reedcompbio +algorithms: + - name: omicsintegrator2 + params: + include: true + run1: + b: + - 1 + g: + - 3 + w: + - 5 + run2: + b: + - 10 + g: + - 7 + w: + - 6 + run3: + b: + - 1 + g: + - 5 + w: + - 8 + run4: + b: + - 9 + g: + - 7 + w: + - 8 + run5: + b: + - 7 + g: + - 7 + w: + - 10 + run6: + b: + - 6 + g: + - 7 + w: + - 4 + run7: + b: + - 9 + g: + - 7 + w: + - 3 + run8: + b: + - 1 + g: + - 6 + w: + - 2 + run9: + b: + - 9 + g: + - 7 + w: + - 4 + run10: + b: + - 3 + g: + - 6 + w: + - 3 + run11: + b: + - 4 + g: + - 6 + w: + - 2 + run12: + b: + - 6 + g: + - 7 + w: + - 6 + run13: + b: + - 5 + g: + - 7 + w: + - 5 + run14: + b: + - 7 + g: + - 6 + w: + - 1 + run15: + b: + - 1 + g: + - 4 + w: + - 8 + run16: + b: + - 1 + g: + - 4 + w: + - 1 + run17: + b: + - 1 + g: + - 3 + w: + - 3 + run18: + b: + - 2 + g: + - 6 + w: + - 2 + run19: + b: + - 1 + g: + - 6 + w: + - 7 + run20: + b: + - 1 + g: + - 4 + w: + - 4 + run21: + b: + - 8 + g: + - 7 + w: + - 9 + run22: + b: + - 1 + g: + - 2 + w: + - 4 + run23: + b: + - 1 + g: + - 6 + w: + - 3 + run24: + b: + - 6 + g: + - 7 + w: + - 9 + run25: + b: + - 10 + g: + - 7 + w: + - 8 + run26: + b: + - 5 + g: + - 5 + w: + - 1 + run27: + b: + - 9 + g: + - 7 + w: + - 7 + run28: + b: + - 7 + g: + - 7 + w: + - 4 + run29: + b: + - 1 + g: + - 3 + w: + - 9 + run30: + b: + - 8 + g: + - 7 + w: + - 4 + run31: + b: + - 10 + g: + - 7 + w: + - 5 + run32: + b: + - 7 + g: + - 7 + w: + - 9 + run33: + b: + - 4 + g: + - 6 + w: + - 1 + run34: + b: + - 9 + g: + - 6 + w: + - 2 + run35: + b: + - 8 + g: + - 6 + w: + - 2 + run36: + b: + - 8 + g: + - 7 + w: + - 10 + run37: + b: + - 7 + g: + - 7 + w: + - 8 + run38: + b: + - 2 + g: + - 6 + w: + - 10 + run39: + b: + - 6 + g: + - 7 + w: + - 10 + run40: + b: + - 1 + g: + - 5 + w: + - 4 + run41: + b: + - 8 + g: + - 7 + w: + - 5 + run42: + b: + - 1 + g: + - 3 + w: + - 1 + run43: + b: + - 1 + g: + - 5 + w: + - 1 + run44: + b: + - 3 + g: + - 6 + w: + - 2 + run45: + b: + - 3 + g: + - 6 + w: + - 4 + run46: + b: + - 6 + g: + - 7 + w: + - 7 + run47: + b: + - 6 + g: + - 5 + w: + - 1 + run48: + b: + - 10 + g: + - 5 + w: + - 1 + run49: + b: + - 1 + g: + - 2 + w: + - 2 + run50: + b: + - 2 + g: + - 6 + w: + - 6 + run51: + b: + - 10 + g: + - 7 + w: + - 10 + run52: + b: + - 10 + g: + - 7 + w: + - 9 + run53: + b: + - 8 + g: + - 5 + w: + - 1 + run54: + b: + - 6 + g: + - 6 + w: + - 2 + run55: + b: + - 6 + g: + - 7 + w: + - 5 + run56: + b: + - 2 + g: + - 5 + w: + - 1 + run57: + b: + - 2 + g: + - 6 + w: + - 5 + run58: + b: + - 9 + g: + - 7 + w: + - 10 + run59: + b: + - 7 + g: + - 7 + w: + - 6 + run60: + b: + - 5 + g: + - 6 + w: + - 1 + run61: + b: + - 4 + g: + - 5 + w: + - 1 + run62: + b: + - 8 + g: + - 7 + w: + - 8 + run63: + b: + - 10 + g: + - 6 + w: + - 2 + run64: + b: + - 4 + g: + - 6 + w: + - 3 + run65: + b: + - 7 + g: + - 6 + w: + - 2 + run66: + b: + - 2 + g: + - 6 + w: + - 3 + run67: + b: + - 2 + g: + - 6 + w: + - 1 + run68: + b: + - 5 + g: + - 6 + w: + - 2 + run69: + b: + - 8 + g: + - 7 + w: + - 6 + run70: + b: + - 10 + g: + - 7 + w: + - 7 + run71: + b: + - 1 + g: + - 5 + w: + - 6 + run72: + b: + - 1 + g: + - 5 + w: + - 7 + run73: + b: + - 2 + g: + - 6 + w: + - 4 + - name: domino + params: + include: true + run1: + module_threshold: + - 0.001 + slice_threshold: + - 0.1 + run2: + module_threshold: + - 0.001 + slice_threshold: + - 0.001 + run3: + module_threshold: + - 0.02 + slice_threshold: + - 0.1 + run4: + module_threshold: + - 0.01 + slice_threshold: + - 0.001 + run5: + module_threshold: + - 0.01 + slice_threshold: + - 0.1 + run6: + module_threshold: + - 0.02 + slice_threshold: + - 0.001 + run7: + module_threshold: + - 0.001 + slice_threshold: + - 0.9 + run8: + module_threshold: + - 0.001 + slice_threshold: + - 0.3 + run9: + module_threshold: + - 0.001 + slice_threshold: + - 1 + - name: mincostflow + params: + include: false + run1: + capacity: + - 15 + flow: + - 80 + run2: + capacity: + - 1 + flow: + - 6 + run3: + capacity: + - 5 + flow: + - 60 + run4: + capacity: + - 1 + flow: + - 8 + run5: + capacity: + - 5 + flow: + - 50 + run6: + capacity: + - 10 + flow: + - 150 + run7: + capacity: + - 1 + flow: + - 20 + run8: + capacity: + - 5 + flow: + - 150 + run9: + capacity: + - 5 + flow: + - 90 + run10: + capacity: + - 5 + flow: + - 70 + - name: pathlinker + params: + include: true + run1: + k: + - 200 + run2: + k: + - 10 + run3: + k: + - 50 + run4: + k: + - 30 + run5: + k: + - 40 + run6: + k: + - 500 + run7: + k: + - 20 + run8: + k: + - 60 + run9: + k: + - 100 + - name: allpairs + params: + include: true + - name: meo + params: + include: true + run1: + local_search: + - 'No' + max_path_length: + - 2 + rand_restarts: + - 10 + - name: omicsintegrator1 + params: + include: true + run1: + b: + - 5 + d: + - 20 + g: + - 0.0001 + mu: + - 0.03 + r: + - 1 + w: + - 0.5 + run2: + b: + - 2 + d: + - 20 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.01 + w: + - 8 + run3: + b: + - 5 + d: + - 30 + g: + - 0.0001 + mu: + - 0.03 + r: + - 1 + w: + - 0.1 + run4: + b: + - 2 + d: + - 20 + g: + - 0.0001 + mu: + - 0.02 + r: + - 0.1 + w: + - 0.1 + run5: + b: + - 2 + d: + - 20 + g: + - 0.0001 + mu: + - 0.001 + r: + - 0.1 + w: + - 0.001 + run6: + b: + - 2 + d: + - 30 + g: + - 0.001 + mu: + - 0.03 + r: + - 1 + w: + - 0.5 + run7: + b: + - 5 + d: + - 30 + g: + - 0.0001 + mu: + - 0.03 + r: + - 1 + w: + - 0.001 + run8: + b: + - 5 + d: + - 40 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.1 + w: + - 0.1 + run9: + b: + - 0.55 + d: + - 10 + g: + - 0.0001 + mu: + - 0.02 + r: + - 1 + w: + - 0.5 + run10: + b: + - 0.55 + d: + - 20 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.01 + w: + - 8 + run11: + b: + - 10 + d: + - 40 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.1 + w: + - 0.5 + run12: + b: + - 0.55 + d: + - 10 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.1 + w: + - 8 + run13: + b: + - 2 + d: + - 10 + g: + - 0.0001 + mu: + - 0.02 + r: + - 0.01 + w: + - 0.5 + run14: + b: + - 5 + d: + - 40 + g: + - 0.0001 + mu: + - 0.008 + r: + - 0.01 + w: + - 0.5 + run15: + b: + - 10 + d: + - 40 + g: + - 0.0001 + mu: + - 0.001 + r: + - 0.01 + w: + - 0.1 + run16: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.008 + r: + - 0.1 + w: + - 0.1 + run17: + b: + - 5 + d: + - 10 + g: + - 0.0001 + mu: + - 0.008 + r: + - 0.01 + w: + - 0.1 + run18: + b: + - 2 + d: + - 20 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.01 + w: + - 0.5 + run19: + b: + - 5 + d: + - 30 + g: + - 0.001 + mu: + - 0.02 + r: + - 1 + w: + - 0.001 + run20: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.001 + r: + - 0.01 + w: + - 0.5 + run21: + b: + - 2 + d: + - 30 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.01 + w: + - 8 + run22: + b: + - 5 + d: + - 30 + g: + - 0.001 + mu: + - 0.03 + r: + - 1 + w: + - 0.5 + run23: + b: + - 2 + d: + - 20 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.1 + w: + - 0.5 + run24: + b: + - 0.55 + d: + - 40 + g: + - 0.0001 + mu: + - 0.02 + r: + - 0.1 + w: + - 8 + run25: + b: + - 10 + d: + - 20 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.1 + w: + - 0.5 + run26: + b: + - 0.55 + d: + - 40 + g: + - 0.0001 + mu: + - 0.02 + r: + - 0.1 + w: + - 0.5 + run27: + b: + - 10 + d: + - 30 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.1 + w: + - 0.1 + run28: + b: + - 5 + d: + - 20 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.1 + w: + - 0.5 + run29: + b: + - 5 + d: + - 30 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.01 + w: + - 0.1 + run30: + b: + - 0.55 + d: + - 20 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.01 + w: + - 0.1 + run31: + b: + - 0.55 + d: + - 10 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.1 + w: + - 0.5 + run32: + b: + - 0.55 + d: + - 20 + g: + - 0.001 + mu: + - 0.008 + r: + - 0.01 + w: + - 0.5 + run33: + b: + - 0.55 + d: + - 30 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.1 + w: + - 2 + run34: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.1 + w: + - 0.5 + run35: + b: + - 10 + d: + - 10 + g: + - 0.0001 + mu: + - 0.008 + r: + - 0.01 + w: + - 0.1 + run36: + b: + - 0.55 + d: + - 10 + g: + - 0.0001 + mu: + - 0.03 + r: + - 1 + w: + - 2 + run37: + b: + - 0.55 + d: + - 40 + g: + - 0.0001 + mu: + - 0.03 + r: + - 1 + w: + - 0.1 + run38: + b: + - 0.55 + d: + - 10 + g: + - 0.0001 + mu: + - 0.02 + r: + - 0.01 + w: + - 0.5 + run39: + b: + - 5 + d: + - 10 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.1 + w: + - 0.5 + run40: + b: + - 0.55 + d: + - 20 + g: + - 0.0001 + mu: + - 0.001 + r: + - 0.1 + w: + - 0.5 + run41: + b: + - 2 + d: + - 10 + g: + - 0.001 + mu: + - 0.008 + r: + - 0.01 + w: + - 0.1 + run42: + b: + - 2 + d: + - 40 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.01 + w: + - 0.5 + run43: + b: + - 0.55 + d: + - 10 + g: + - 0.001 + mu: + - 0.02 + r: + - 1 + w: + - 8 + run44: + b: + - 10 + d: + - 30 + g: + - 0.0001 + mu: + - 0.02 + r: + - 0.1 + w: + - 0.1 + run45: + b: + - 0.55 + d: + - 20 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.01 + w: + - 8 + run46: + b: + - 5 + d: + - 40 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.1 + w: + - 0.1 + run47: + b: + - 0.55 + d: + - 40 + g: + - 0.001 + mu: + - 0.008 + r: + - 0.01 + w: + - 8 + run48: + b: + - 0.55 + d: + - 20 + g: + - 0.001 + mu: + - 0.02 + r: + - 1 + w: + - 0.1 + run49: + b: + - 5 + d: + - 40 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.01 + w: + - 0.5 + run50: + b: + - 0.55 + d: + - 10 + g: + - 0.001 + mu: + - 0.008 + r: + - 0.1 + w: + - 0.5 + run51: + b: + - 0.55 + d: + - 10 + g: + - 0.0001 + mu: + - 0.02 + r: + - 0.01 + w: + - 2 + run52: + b: + - 5 + d: + - 20 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.1 + w: + - 0.1 + run53: + b: + - 0.55 + d: + - 10 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.01 + w: + - 2 + run54: + b: + - 0.55 + d: + - 20 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.1 + w: + - 2 + run55: + b: + - 2 + d: + - 20 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.01 + w: + - 0.5 + run56: + b: + - 10 + d: + - 40 + g: + - 0.001 + mu: + - 0.001 + r: + - 0.1 + w: + - 0.1 + run57: + b: + - 0.01 + d: + - 40 + g: + - 0.001 + mu: + - 0.02 + r: + - 1 + w: + - 0.1 + run58: + b: + - 2 + d: + - 40 + g: + - 0.0001 + mu: + - 0.02 + r: + - 1 + w: + - 0.001 + run59: + b: + - 2 + d: + - 40 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.1 + w: + - 0.5 + run60: + b: + - 5 + d: + - 10 + g: + - 0.001 + mu: + - 0.02 + r: + - 1 + w: + - 0.1 + run61: + b: + - 5 + d: + - 10 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.01 + w: + - 0.5 + run62: + b: + - 2 + d: + - 20 + g: + - 0.001 + mu: + - 0.008 + r: + - 0.01 + w: + - 0.5 + run63: + b: + - 2 + d: + - 20 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.1 + w: + - 2 + run64: + b: + - 0.55 + d: + - 10 + g: + - 0.0001 + mu: + - 0.008 + r: + - 0.01 + w: + - 8 + run65: + b: + - 0.55 + d: + - 30 + g: + - 0.001 + mu: + - 0.008 + r: + - 0.01 + w: + - 2 + run66: + b: + - 2 + d: + - 40 + g: + - 0.0001 + mu: + - 0.03 + r: + - 1 + w: + - 0.001 + run67: + b: + - 5 + d: + - 20 + g: + - 0.0001 + mu: + - 0.008 + r: + - 0.1 + w: + - 0.5 + run68: + b: + - 0.55 + d: + - 20 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.01 + w: + - 8 + run69: + b: + - 10 + d: + - 40 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.01 + w: + - 0.5 + run70: + b: + - 0.55 + d: + - 10 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.01 + w: + - 8 + run71: + b: + - 0.55 + d: + - 20 + g: + - 0.0001 + mu: + - 0.02 + r: + - 1 + w: + - 2 + run72: + b: + - 2 + d: + - 40 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.01 + w: + - 0.5 + run73: + b: + - 2 + d: + - 10 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.1 + w: + - 0.5 + run74: + b: + - 0.55 + d: + - 20 + g: + - 0.001 + mu: + - 0.008 + r: + - 0.1 + w: + - 8 + run75: + b: + - 10 + d: + - 20 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.1 + w: + - 0.5 + run76: + b: + - 2 + d: + - 10 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.01 + w: + - 2 + run77: + b: + - 2 + d: + - 30 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.01 + w: + - 0.5 + run78: + b: + - 0.55 + d: + - 20 + g: + - 0.001 + mu: + - 0.001 + r: + - 0.1 + w: + - 0.5 + run79: + b: + - 2 + d: + - 10 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.01 + w: + - 2 + run80: + b: + - 5 + d: + - 20 + g: + - 0.001 + mu: + - 0.02 + r: + - 1 + w: + - 0.1 + run81: + b: + - 0.55 + d: + - 40 + g: + - 0.0001 + mu: + - 0.008 + r: + - 0.1 + w: + - 0.5 + run82: + b: + - 5 + d: + - 40 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.1 + w: + - 0.5 + run83: + b: + - 0.55 + d: + - 30 + g: + - 0.001 + mu: + - 0.03 + r: + - 1 + w: + - 0.5 + run84: + b: + - 5 + d: + - 30 + g: + - 0.001 + mu: + - 0.02 + r: + - 1 + w: + - 0.1 + run85: + b: + - 2 + d: + - 10 + g: + - 0.001 + mu: + - 0.008 + r: + - 0.01 + w: + - 0.5 + run86: + b: + - 0.55 + d: + - 20 + g: + - 0.001 + mu: + - 0.03 + r: + - 1 + w: + - 2 + run87: + b: + - 10 + d: + - 40 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.01 + w: + - 0.5 + run88: + b: + - 0.55 + d: + - 10 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.1 + w: + - 2 + run89: + b: + - 2 + d: + - 40 + g: + - 0.0001 + mu: + - 0.008 + r: + - 0.1 + w: + - 0.5 + run90: + b: + - 2 + d: + - 40 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.1 + w: + - 8 + run91: + b: + - 0.55 + d: + - 10 + g: + - 0.001 + mu: + - 0.03 + r: + - 1 + w: + - 8 + run92: + b: + - 2 + d: + - 20 + g: + - 0.001 + mu: + - 0.03 + r: + - 1 + w: + - 0.1 + run93: + b: + - 0.55 + d: + - 40 + g: + - 0.0001 + mu: + - 0.03 + r: + - 1 + w: + - 8 + run94: + b: + - 0.55 + d: + - 40 + g: + - 0.0001 + mu: + - 0.001 + r: + - 0.1 + w: + - 0.1 + run95: + b: + - 5 + d: + - 10 + g: + - 0.0001 + mu: + - 0.008 + r: + - 0.1 + w: + - 0.5 + run96: + b: + - 0.55 + d: + - 10 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.1 + w: + - 8 + run97: + b: + - 0.55 + d: + - 40 + g: + - 0.0001 + mu: + - 0.02 + r: + - 1 + w: + - 0.001 + run98: + b: + - 0.55 + d: + - 20 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.01 + w: + - 2 + run99: + b: + - 0.55 + d: + - 40 + g: + - 0.0001 + mu: + - 0.001 + r: + - 0.1 + w: + - 0.001 + run100: + b: + - 10 + d: + - 30 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.01 + w: + - 0.1 + run101: + b: + - 10 + d: + - 10 + g: + - 0.001 + mu: + - 0.008 + r: + - 0.01 + w: + - 0.5 + run102: + b: + - 0.55 + d: + - 40 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.1 + w: + - 8 + run103: + b: + - 0.55 + d: + - 40 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.01 + w: + - 2 + run104: + b: + - 0.55 + d: + - 40 + g: + - 0.0001 + mu: + - 0.02 + r: + - 0.01 + w: + - 8 + run105: + b: + - 10 + d: + - 20 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.1 + w: + - 0.5 + run106: + b: + - 0.55 + d: + - 40 + g: + - 0.001 + mu: + - 0.02 + r: + - 1 + w: + - 2 + run107: + b: + - 10 + d: + - 10 + g: + - 0.001 + mu: + - 0.03 + r: + - 1 + w: + - 0.001 + run108: + b: + - 2 + d: + - 30 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.1 + w: + - 2 + run109: + b: + - 0.55 + d: + - 30 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.1 + w: + - 8 + run110: + b: + - 0.55 + d: + - 10 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.01 + w: + - 2 + run111: + b: + - 0.55 + d: + - 20 + g: + - 0.0001 + mu: + - 0.008 + r: + - 0.1 + w: + - 2 + run112: + b: + - 0.55 + d: + - 20 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.1 + w: + - 8 + run113: + b: + - 2 + d: + - 30 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.01 + w: + - 0.5 + run114: + b: + - 0.01 + d: + - 10 + g: + - 0.001 + mu: + - 0.008 + r: + - 1 + w: + - 8 + run115: + b: + - 10 + d: + - 30 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.1 + w: + - 0.5 + run116: + b: + - 0.55 + d: + - 30 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.01 + w: + - 8 + run117: + b: + - 10 + d: + - 40 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.01 + w: + - 0.5 + run118: + b: + - 10 + d: + - 20 + g: + - 0.0001 + mu: + - 0.03 + r: + - 1 + w: + - 0.1 + run119: + b: + - 0.55 + d: + - 20 + g: + - 0.001 + mu: + - 0.008 + r: + - 0.1 + w: + - 2 + run120: + b: + - 2 + d: + - 10 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.1 + w: + - 2 + run121: + b: + - 2 + d: + - 30 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.01 + w: + - 2 + run122: + b: + - 2 + d: + - 10 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.01 + w: + - 8 + run123: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.02 + r: + - 1 + w: + - 8 + run124: + b: + - 2 + d: + - 10 + g: + - 0.001 + mu: + - 0.02 + r: + - 1 + w: + - 0.1 + run125: + b: + - 2 + d: + - 10 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.1 + w: + - 8 + run126: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.1 + w: + - 0.5 + run127: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.01 + w: + - 2 + run128: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.1 + w: + - 2 + run129: + b: + - 0.55 + d: + - 10 + g: + - 0.001 + mu: + - 0.02 + r: + - 1 + w: + - 2 + run130: + b: + - 2 + d: + - 20 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.1 + w: + - 0.5 + run131: + b: + - 0.55 + d: + - 20 + g: + - 0.0001 + mu: + - 0.03 + r: + - 1 + w: + - 8 + run132: + b: + - 0.55 + d: + - 40 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.1 + w: + - 2 + run133: + b: + - 0.55 + d: + - 30 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.01 + w: + - 2 + run134: + b: + - 5 + d: + - 30 + g: + - 0.0001 + mu: + - 0.03 + r: + - 1 + w: + - 0.5 + run135: + b: + - 0.55 + d: + - 30 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.1 + w: + - 8 + run136: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.01 + w: + - 0.5 + run137: + b: + - 0.55 + d: + - 10 + g: + - 0.0001 + mu: + - 0.02 + r: + - 0.1 + w: + - 2 + run138: + b: + - 5 + d: + - 10 + g: + - 0.001 + mu: + - 0.008 + r: + - 0.1 + w: + - 0.5 + run139: + b: + - 0.55 + d: + - 20 + g: + - 0.0001 + mu: + - 0.03 + r: + - 1 + w: + - 2 + run140: + b: + - 2 + d: + - 20 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.01 + w: + - 8 + run141: + b: + - 0.55 + d: + - 40 + g: + - 0.001 + mu: + - 0.001 + r: + - 0.1 + w: + - 0.5 + run142: + b: + - 5 + d: + - 20 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.01 + w: + - 0.5 + run143: + b: + - 2 + d: + - 30 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.01 + w: + - 0.5 + run144: + b: + - 0.55 + d: + - 30 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.1 + w: + - 2 + run145: + b: + - 0.55 + d: + - 40 + g: + - 0.0001 + mu: + - 0.001 + r: + - 0.01 + w: + - 0.5 + run146: + b: + - 0.55 + d: + - 30 + g: + - 0.001 + mu: + - 0.008 + r: + - 0.1 + w: + - 2 + run147: + b: + - 0.55 + d: + - 40 + g: + - 0.0001 + mu: + - 0.03 + r: + - 1 + w: + - 2 + run148: + b: + - 0.55 + d: + - 30 + g: + - 0.001 + mu: + - 0.001 + r: + - 0.1 + w: + - 0.5 + run149: + b: + - 0.55 + d: + - 30 + g: + - 0.001 + mu: + - 0.008 + r: + - 0.01 + w: + - 0.5 + run150: + b: + - 0.55 + d: + - 20 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.01 + w: + - 2 + run151: + b: + - 5 + d: + - 20 + g: + - 0.0001 + mu: + - 0.02 + r: + - 1 + w: + - 0.001 + run152: + b: + - 0.55 + d: + - 10 + g: + - 0.0001 + mu: + - 0.02 + r: + - 0.01 + w: + - 8 + run153: + b: + - 10 + d: + - 10 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.1 + w: + - 0.5 + run154: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.02 + r: + - 0.01 + w: + - 8 + run155: + b: + - 2 + d: + - 30 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.1 + w: + - 8 + run156: + b: + - 5 + d: + - 20 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.1 + w: + - 0.5 + run157: + b: + - 10 + d: + - 30 + g: + - 0.0001 + mu: + - 0.008 + r: + - 0.1 + w: + - 0.5 + run158: + b: + - 5 + d: + - 30 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.1 + w: + - 0.5 + run159: + b: + - 0.55 + d: + - 20 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.01 + w: + - 8 + run160: + b: + - 10 + d: + - 20 + g: + - 0.001 + mu: + - 0.008 + r: + - 0.1 + w: + - 0.5 + run161: + b: + - 5 + d: + - 20 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.01 + w: + - 0.1 + run162: + b: + - 5 + d: + - 30 + g: + - 0.0001 + mu: + - 0.008 + r: + - 0.1 + w: + - 0.5 + run163: + b: + - 0.55 + d: + - 20 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.01 + w: + - 2 + run164: + b: + - 2 + d: + - 10 + g: + - 0.001 + mu: + - 0.005 + r: + - 0.1 + w: + - 0.5 + run165: + b: + - 0.55 + d: + - 10 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.01 + w: + - 0.5 + run166: + b: + - 0.55 + d: + - 30 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.01 + w: + - 2 + run167: + b: + - 0.55 + d: + - 10 + g: + - 0.001 + mu: + - 0.001 + r: + - 0.1 + w: + - 0.5 + run168: + b: + - 0.55 + d: + - 20 + g: + - 0.001 + mu: + - 0.008 + r: + - 0.01 + w: + - 8 + run169: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.02 + r: + - 0.1 + w: + - 8 + run170: + b: + - 0.55 + d: + - 10 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.1 + w: + - 8 + run171: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.1 + w: + - 8 + run172: + b: + - 5 + d: + - 10 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.1 + w: + - 0.5 + run173: + b: + - 0.55 + d: + - 20 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.01 + w: + - 2 + run174: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.1 + w: + - 8 + run175: + b: + - 5 + d: + - 40 + g: + - 0.0001 + mu: + - 0.03 + r: + - 1 + w: + - 0.5 + run176: + b: + - 5 + d: + - 30 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.01 + w: + - 0.5 + run177: + b: + - 0.55 + d: + - 30 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.01 + w: + - 8 + run178: + b: + - 0.55 + d: + - 40 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.1 + w: + - 8 + run179: + b: + - 10 + d: + - 10 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.01 + w: + - 0.5 + run180: + b: + - 2 + d: + - 40 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.01 + w: + - 2 + run181: + b: + - 0.55 + d: + - 10 + g: + - 0.0001 + mu: + - 0.02 + r: + - 1 + w: + - 8 + run182: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.01 + w: + - 8 + run183: + b: + - 10 + d: + - 40 + g: + - 0.001 + mu: + - 0.008 + r: + - 0.1 + w: + - 0.5 + run184: + b: + - 2 + d: + - 40 + g: + - 0.001 + mu: + - 0.03 + r: + - 1 + w: + - 0.5 + run185: + b: + - 0.55 + d: + - 20 + g: + - 0.0001 + mu: + - 0.008 + r: + - 0.1 + w: + - 8 + run186: + b: + - 2 + d: + - 40 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.01 + w: + - 8 + run187: + b: + - 2 + d: + - 10 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.1 + w: + - 2 + run188: + b: + - 2 + d: + - 10 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.01 + w: + - 8 + run189: + b: + - 0.55 + d: + - 10 + g: + - 0.0001 + mu: + - 0.02 + r: + - 1 + w: + - 0.1 + run190: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.001 + r: + - 0.1 + w: + - 0.5 + run191: + b: + - 0.55 + d: + - 30 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.01 + w: + - 8 + run192: + b: + - 2 + d: + - 20 + g: + - 0.0001 + mu: + - 0.02 + r: + - 1 + w: + - 0.001 + run193: + b: + - 0.55 + d: + - 10 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.1 + w: + - 2 + run194: + b: + - 2 + d: + - 30 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.1 + w: + - 0.5 + run195: + b: + - 2 + d: + - 40 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.1 + w: + - 2 + run196: + b: + - 0.55 + d: + - 30 + g: + - 0.001 + mu: + - 0.02 + r: + - 0.1 + w: + - 8 + run197: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.02 + r: + - 0.1 + w: + - 2 + run198: + b: + - 0.55 + d: + - 20 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.01 + w: + - 2 + run199: + b: + - 0.55 + d: + - 20 + g: + - 0.0001 + mu: + - 0.005 + r: + - 0.1 + w: + - 8 + run200: + b: + - 0.55 + d: + - 40 + g: + - 0.0001 + mu: + - 0.02 + r: + - 1 + w: + - 8 + run201: + b: + - 5 + d: + - 40 + g: + - 0.001 + mu: + - 0.03 + r: + - 1 + w: + - 0.5 + run202: + b: + - 0.55 + d: + - 40 + g: + - 0.0001 + mu: + - 0.001 + r: + - 0.1 + w: + - 0.5 + run203: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.008 + r: + - 0.01 + w: + - 0.5 + run204: + b: + - 5 + d: + - 20 + g: + - 0.0001 + mu: + - 0.008 + r: + - 0.01 + w: + - 0.5 + run205: + b: + - 0.55 + d: + - 30 + g: + - 0.001 + mu: + - 0.008 + r: + - 0.1 + w: + - 8 + run206: + b: + - 2 + d: + - 10 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.1 + w: + - 8 + run207: + b: + - 2 + d: + - 30 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.01 + w: + - 8 + run208: + b: + - 0.55 + d: + - 40 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.01 + w: + - 2 + run209: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.008 + r: + - 0.01 + w: + - 2 + run210: + b: + - 0.55 + d: + - 30 + g: + - 0.0001 + mu: + - 0.008 + r: + - 0.1 + w: + - 2 + run211: + b: + - 10 + d: + - 30 + g: + - 0.001 + mu: + - 0.03 + r: + - 0.01 + w: + - 0.5 + run212: + b: + - 2 + d: + - 20 + g: + - 0.0001 + mu: + - 0.03 + r: + - 0.1 + w: + - 8 +datasets: + - label: tps_egfr + node_files: + - tps-egfr-prizes.txt + edge_files: + - phosphosite-irefindex13.0-uniprot.txt + other_files: [] + data_dir: input +gold_standards: + - label: gs_egfr + node_files: + - gs-egfr.txt + data_dir: input + dataset_labels: + - tps_egfr +reconstruction_settings: + locations: + reconstruction_dir: output/tps_egfr + run: true +analysis: + summary: + include: true + graphspace: + include: false + cytoscape: + include: false + ml: + include: true + aggregate_per_algorithm: true + components: 4 + labels: false + linkage: ward + metric: euclidean + evaluation: + include: true + aggregate_per_algorithm: true diff --git a/config/egfr.yaml b/config/egfr.yaml index 0b41f0a5..93cbccec 100644 --- a/config/egfr.yaml +++ b/config/egfr.yaml @@ -90,4 +90,4 @@ analysis: ml: include: false evaluation: - include: false + include: false diff --git a/input/gs-egfr.txt b/input/gs-egfr.txt new file mode 100644 index 00000000..4b880cd4 --- /dev/null +++ b/input/gs-egfr.txt @@ -0,0 +1,324 @@ +1433B_HUMAN +1433E_HUMAN +1433T_HUMAN +4EBP1_HUMAN +ABI1_HUMAN +ABL1_HUMAN +ACK1_HUMAN +ACTS_HUMAN +AHSA1_HUMAN +AIMP2_HUMAN +AKT1_HUMAN +AKT2_HUMAN +AKT3_HUMAN +ANDR_HUMAN +AP2A1_HUMAN +AP2B1_HUMAN +AP2M1_HUMAN +AP2S1_HUMAN +ARAF_HUMAN +AREG_HUMAN +ARF4_HUMAN +ARF6_HUMAN +ARHG2_HUMAN +ARHG7_HUMAN +ARRB1_HUMAN +ASAP1_HUMAN +ASAP2_HUMAN +ATF1_HUMAN +ATF2_HUMAN +ATX1_HUMAN +B2CL1_HUMAN +BAD_HUMAN +BCAR1_HUMAN +BCL2_HUMAN +BDNF_HUMAN +BRAF_HUMAN +BTC_HUMAN +Ca++_PSEUDONODE +CASP3_HUMAN +CASP9_HUMAN +CAV1_HUMAN +CAV2_HUMAN +CBL_HUMAN +CBLB_HUMAN +CBLC_HUMAN +CCND1_HUMAN +CDC42_HUMAN +CDN1A_HUMAN +CEAM1_HUMAN +CEBPA_HUMAN +CEBPB_HUMAN +CLCA_HUMAN +CREB1_HUMAN +CRK_HUMAN +CRKL_HUMAN +CSK_HUMAN +CTND1_HUMAN +CXA1_HUMAN +CYH3_HUMAN +DAG_PSEUDONODE +DAXX_HUMAN +DDIT3_HUMAN +DOK2_HUMAN +DP13A_HUMAN +DP13B_HUMAN +DYN1_HUMAN +ECSIT_HUMAN +EF1A1_HUMAN +EF1A2_HUMAN +EF2K_HUMAN +EGF_HUMAN +EGFR_HUMAN +ELF3_HUMAN +ELK1_HUMAN +ELK4_HUMAN +EP15R_HUMAN +EPHB2_HUMAN +EPIPL_HUMAN +EPN1_HUMAN +EPS15_HUMAN +EPS8_HUMAN +ERBB2_HUMAN +ERBB3_HUMAN +ERBB4_HUMAN +EREG_HUMAN +ESR1_HUMAN +FAK1_HUMAN +FAK2_HUMAN +FGF1_HUMAN +FGFR1_HUMAN +FLNA_HUMAN +FLNB_HUMAN +FLNC_HUMAN +FOS_HUMAN +FOXO1_HUMAN +GA45G_HUMAN +GAB1_HUMAN +GAB2_HUMAN +GELS_HUMAN +GIT1_HUMAN +GNA12_HUMAN +GNAI1_HUMAN +GNAI3_HUMAN +GNDS_HUMAN +GRAP2_HUMAN +GRB10_HUMAN +GRB14_HUMAN +GRB2_HUMAN +GRB7_HUMAN +GSK3B_HUMAN +H31T_HUMAN +HAT1_HUMAN +HBEGF_HUMAN +HD_HUMAN +HDAC1_HUMAN +HDAC2_HUMAN +HDAC3_HUMAN +HGS_HUMAN +HIP1_HUMAN +HSPB1_HUMAN +ICEF1_HUMAN +IFIT3_HUMAN +IKKA_HUMAN +IL1A_HUMAN +IL1R1_HUMAN +ITCH_HUMAN +JAK1_HUMAN +JAK2_HUMAN +JIP2_HUMAN +JIP3_HUMAN +JUN_HUMAN +JUNB_HUMAN +JUND_HUMAN +K1C17_HUMAN +K1C18_HUMAN +K2C7_HUMAN +K2C8_HUMAN +KAP1_HUMAN +KAP2_HUMAN +KAP3_HUMAN +KAPCA_HUMAN +KAPCB_HUMAN +KCC2G_HUMAN +KLF11_HUMAN +KPCA_HUMAN +KPCD1_HUMAN +KPCG_HUMAN +KPCI_HUMAN +KPCZ_HUMAN +KS6A1_HUMAN +KS6A2_HUMAN +KS6A3_HUMAN +KS6A4_HUMAN +KS6A5_HUMAN +KS6B1_HUMAN +LTOR3_HUMAN +M3K1_HUMAN +M3K11_HUMAN +M3K12_HUMAN +M3K13_HUMAN +M3K14_HUMAN +M3K2_HUMAN +M3K3_HUMAN +M3K4_HUMAN +M3K5_HUMAN +M3K7_HUMAN +M3K8_HUMAN +M4K1_HUMAN +M4K2_HUMAN +M4K4_HUMAN +MAPK3_HUMAN +MAPK5_HUMAN +MAX_HUMAN +MCF2_HUMAN +MED1_HUMAN +MEF2C_HUMAN +MK01_HUMAN +MK03_HUMAN +MK07_HUMAN +MK08_HUMAN +MK10_HUMAN +MK14_HUMAN +MKNK2_HUMAN +MLTK_HUMAN +MP2K1_HUMAN +MP2K2_HUMAN +MP2K3_HUMAN +MP2K4_HUMAN +MP2K5_HUMAN +MP2K6_HUMAN +MP2K7_HUMAN +MTA2_HUMAN +MTOR_HUMAN +MYC_HUMAN +NCK1_HUMAN +NCK2_HUMAN +NCOA1_HUMAN +NF1_HUMAN +NFAC4_HUMAN +NGF_HUMAN +NLK_HUMAN +NRG1_HUMAN +NRG2_HUMAN +NRG3_HUMAN +NRG4_HUMAN +NTF3_HUMAN +NTRK1_HUMAN +P53_HUMAN +P55G_HUMAN +P63_HUMAN +P85A_HUMAN +P85B_HUMAN +PAK1_HUMAN +PAXI_HUMAN +PDGFA_HUMAN +PDPK1_HUMAN +PEBP1_HUMAN +PGFRA_HUMAN +PI3,4,5P3_PSEUDONODE +PI51C_HUMAN +PIPNA_HUMAN +PK3CA_HUMAN +PK3CB_HUMAN +PK3CD_HUMAN +PK3CG_HUMAN +PKD1_HUMAN +PKN2_HUMAN +PLCG1_HUMAN +PLCG2_HUMAN +PLD1_HUMAN +PLD2_HUMAN +PLEC_HUMAN +PLS1_HUMAN +PPM1B_HUMAN +PPP5_HUMAN +PRS6A_HUMAN +PTK6_HUMAN +PTN1_HUMAN +PTN11_HUMAN +PTN12_HUMAN +PTN5_HUMAN +PTN6_HUMAN +PTN7_HUMAN +PTPRH_HUMAN +PTPRR_HUMAN +RAB5A_HUMAN +RAC2_HUMAN +RAF1_HUMAN +RALB_HUMAN +RAP1A_HUMAN +RASA1_HUMAN +RASA2_HUMAN +RASH_HUMAN +RASK_HUMAN +RASN_HUMAN +RBBP7_HUMAN +RBP1_HUMAN +REPS1_HUMAN +REPS2_HUMAN +RGS16_HUMAN +RHEB_HUMAN +RHG01_HUMAN +RIPK1_HUMAN +RRAS2_HUMAN +RSSA_HUMAN +SH2D3_HUMAN +SH3G2_HUMAN +SH3G3_HUMAN +SH3K1_HUMAN +SH3L1_HUMAN +SHC1_HUMAN +SHC2_HUMAN +SHIP2_HUMAN +SHOC2_HUMAN +SIN3A_HUMAN +SMAD2_HUMAN +SMAD3_HUMAN +SMD2_HUMAN +SOCS1_HUMAN +SOCS3_HUMAN +SOS1_HUMAN +SOS2_HUMAN +SP1_HUMAN +SPY1_HUMAN +SPY2_HUMAN +SRC_HUMAN +SRF_HUMAN +STA5A_HUMAN +STA5B_HUMAN +STAM1_HUMAN +STAT1_HUMAN +STAT3_HUMAN +STK3_HUMAN +STXB1_HUMAN +SYGP1_HUMAN +SYHC_HUMAN +SYUA_HUMAN +TAB1_HUMAN +TAB2_HUMAN +TAU_HUMAN +TE2IP_HUMAN +TGFA_HUMAN +TGFB1_HUMAN +TGFR1_HUMAN +TGIF1_HUMAN +TLN1_HUMAN +TNFA_HUMAN +TNFL6_HUMAN +TNR1A_HUMAN +TNR6_HUMAN +TRAF2_HUMAN +TRAF6_HUMAN +TSC1_HUMAN +TSC2_HUMAN +UBB_HUMAN +UBC_HUMAN +US6NL_HUMAN +VAV_HUMAN +VAV2_HUMAN +VAV3_HUMAN +WASL_HUMAN +WNK1_HUMAN +ZHX2_HUMAN +ZPR1_HUMAN \ No newline at end of file diff --git a/spras/analysis/ml.py b/spras/analysis/ml.py index 3dad8775..7d45e091 100644 --- a/spras/analysis/ml.py +++ b/spras/analysis/ml.py @@ -10,7 +10,7 @@ from scipy.cluster.hierarchy import dendrogram, fcluster from sklearn.cluster import AgglomerativeClustering from sklearn.decomposition import PCA -from sklearn.preprocessing import StandardScaler +from sklearn.preprocessing import MinMaxScaler, StandardScaler from spras.util import make_required_dirs @@ -142,8 +142,14 @@ def pca(dataframe: pd.DataFrame, output_png: str, output_var: str, output_coord: if not isinstance(labels, bool): raise ValueError(f"labels={labels} must be True or False") - scaler = StandardScaler() + #TODO: MinMaxScaler changes nothing about the data + # scaler = MinMaxScaler() + # scaler.fit(X) # calc mean and standard deviation + # X_scaled = scaler.transform(X) + + scaler = StandardScaler() # TODO: StandardScalar doesn't make sense on binary data because the mean and variance lead to values outside the binary range scaler.fit(X) # calc mean and standard deviation + scaler.transform(X) X_scaled = scaler.transform(X) # choosing the PCA @@ -152,20 +158,28 @@ def pca(dataframe: pd.DataFrame, output_png: str, output_var: str, output_coord: X_pca = pca_instance.transform(X_scaled) variance = pca_instance.explained_variance_ratio_ * 100 + # calculating the centroid + centroid = np.mean(X_pca, axis=0) # mean of each principal component across all samples + # making the plot label_color_map = create_palette(column_names) plt.figure(figsize=(10, 7)) - sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], s=70, hue=column_names, legend=True, palette=label_color_map) + sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], s=70, hue=column_names, palette=label_color_map) + plt.scatter(centroid[0], centroid[1], color='red', marker='X', s=100, label='Centroid') plt.title("PCA") + plt.legend() plt.xlabel(f"PC1 ({variance[0]:.1f}% variance)") plt.ylabel(f"PC2 ({variance[1]:.1f}% variance)") # saving the coordinates of each algorithm make_required_dirs(output_coord) coordinates_df = pd.DataFrame(X_pca, columns=['PC' + str(i) for i in range(1, components+1)]) - coordinates_df.insert(0, 'algorithm', columns.tolist()) + coordinates_df.insert(0, 'datapoint_labels', columns.tolist()) + centroid_row = ['centroid'] + centroid.tolist() + coordinates_df.loc[len(coordinates_df)] = centroid_row coordinates_df.to_csv(output_coord, sep='\t', index=False) + # saving the principal components make_required_dirs(output_var) with open(output_var, "w") as f: diff --git a/spras/config.py b/spras/config.py index 14f1a926..cd8c228b 100644 --- a/spras/config.py +++ b/spras/config.py @@ -101,6 +101,10 @@ def __init__(self, raw_config): self.analysis_include_ml = None # A Boolean specifying whether to run the Evaluation analysis self.analysis_include_evaluation = None + # A Boolean specifying whether to run the ML per algorithm analysis + self.analysis_include_ml_aggregate_algo = None + # A Boolean specifying whether to run the Evaluation per algorithm aanalysis + self.analysis_include_evaluation_aggregate_algo = None _raw_config = copy.deepcopy(raw_config) self.process_config(_raw_config) @@ -233,6 +237,7 @@ def process_config(self, raw_config): self.analysis_params = raw_config["analysis"] if "analysis" in raw_config else {} self.ml_params = self.analysis_params["ml"] if "ml" in self.analysis_params else {} + self.evaluation_params = self.analysis_params["evaluation"] if "evaluation" in self.analysis_params else {} self.pca_params = {} if "components" in self.ml_params: @@ -252,11 +257,27 @@ def process_config(self, raw_config): self.analysis_include_ml = raw_config["analysis"]["ml"]["include"] self.analysis_include_evaluation = raw_config["analysis"]["evaluation"]["include"] + # Only run ML aggregate per algorithm if analysis include ML is set to True + if 'aggregate_per_algorithm' in self.ml_params and self.analysis_include_ml: + self.analysis_include_ml_aggregate_algo = raw_config["analysis"]["ml"]["aggregate_per_algorithm"] + else: + self.analysis_include_ml_aggregate_algo = False + + # Raises an error if Evaluation is enabled but no gold standard data is provided if self.gold_standards == {} and self.analysis_include_evaluation: raise ValueError("Evaluation analysis cannot run as gold standard data not provided. " "Please set evaluation include to false or provide gold standard data.") - if 'aggregate_per_algorithm' in self.ml_params and self.analysis_include_ml: - self.analysis_include_ml_aggregate_algo = raw_config["analysis"]["ml"]["aggregate_per_algorithm"] + # Only run Evaluation if ML is set to True + if not self.analysis_include_ml and self.analysis_include_evaluation: + self.analysis_include_evaluation = False + + # Only run Evaluation aggregate per algorithm if analysis include ML is set to True + if 'aggregate_per_algorithm' in self.evaluation_params and self.analysis_include_evaluation: + self.analysis_include_evaluation_aggregate_algo = raw_config["analysis"]["evaluation"]["aggregate_per_algorithm"] else: - self.analysis_include_ml_aggregate_algo = False + self.analysis_include_evaluation_aggregate_algo = False + + # Only run Evaluation per algorithm if ML per algorithm is set to True + if not self.analysis_include_ml_aggregate_algo and self.analysis_include_evaluation_aggregate_algo: + self.analysis_include_evaluation_aggregate_algo = False diff --git a/spras/evaluation.py b/spras/evaluation.py index 5d00e7d4..e6f60c0b 100644 --- a/spras/evaluation.py +++ b/spras/evaluation.py @@ -3,8 +3,15 @@ from pathlib import Path from typing import Dict, Iterable +import matplotlib.pyplot as plt +import numpy as np import pandas as pd -from sklearn.metrics import precision_score +from sklearn.metrics import ( + average_precision_score, + precision_recall_curve, + precision_score, + recall_score, +) class Evaluation: @@ -72,29 +79,158 @@ def load_files_from_dict(self, gold_standard_dict: Dict): # TODO: later iteration - chose between node and edge file, or allow both @staticmethod - def precision(file_paths: Iterable[Path], node_table: pd.DataFrame, output_file: str): + def precision_and_recall(file_paths: Iterable[Path], node_table: pd.DataFrame, algorithms: list, output_file: str, output_png:str=None): """ Takes in file paths for a specific dataset and an associated gold standard node table. - Calculates precision for each pathway file + Calculates precision and recall for each pathway file Returns output back to output_file @param file_paths: file paths of pathway reconstruction algorithm outputs @param node_table: the gold standard nodes - @param output_file: the filename to save the precision of each pathway + @param algorithms: list of algorithms used in current run of SPRAS + @param output_file: the filename to save the precision and recall of each pathway + @param output_png (optional): the filename to plot the precision and recall of each pathway (not a PRC) """ y_true = set(node_table['NODEID']) results = [] - for file in file_paths: df = pd.read_table(file, sep="\t", header=0, usecols=["Node1", "Node2"]) + # TODO: do we want to include the pathways that are empty for evaluation / in the pr_df? y_pred = set(df['Node1']).union(set(df['Node2'])) all_nodes = y_true.union(y_pred) y_true_binary = [1 if node in y_true else 0 for node in all_nodes] y_pred_binary = [1 if node in y_pred else 0 for node in all_nodes] - # default to 0.0 if there is a divide by 0 error + # not using precision_recall_curve because thresholds are binary (0 or 1); rather we are directly calculating precision and recall per pathway precision = precision_score(y_true_binary, y_pred_binary, zero_division=0.0) + recall = recall_score(y_true_binary, y_pred_binary, zero_division=0.0) + results.append({"Pathway": file, "Precision": precision, "Recall": recall}) + + pr_df = pd.DataFrame(results) + pr_df.sort_values(by=["Recall", "Pathway"], axis=0, ascending=True, inplace=True) + pr_df.to_csv(output_file, sep="\t", index=False) + + if output_png is not None: + if not pr_df.empty: + plt.figure(figsize=(8, 6)) + # plot a line per algorithm + for algorithm in algorithms: + subset = pr_df[pr_df["Pathway"].str.contains(algorithm)] + if not subset.empty: + plt.plot( + subset["Recall"], + subset["Precision"], + marker='o', + linestyle='', + label=f"{algorithm}" + ) + + + plt.xlabel("Recall") + plt.ylabel("Precision") + plt.title(f"Precision and Recall Plot") + plt.legend() + plt.grid(True) + plt.savefig(output_png) + else: + plt.figure() + plt.plot([], []) + plt.title("Empty Pathway Files") + plt.savefig(output_png) + + + def select_max_freq_and_node(row: pd.Series): + """ + Selects the node and frequency with the highest frequency value from two potential nodes in a row. + Handles cases where one of the nodes or frequencies may be missing and returns the node associated with the maximum frequency. + """ + max_freq = 0 + node = "" + if pd.isna(row['Node2']) and pd.isna(row['Freq2']): + max_freq = row['Freq1'] + node = row['Node1'] + elif pd.isna(row['Node1']) and pd.isna(row['Freq1']): + max_freq = row['Freq2'] + node = row['Node2'] + else: + max_freq = max(row['Freq1'], row['Freq2']) + node = row['Node1'] + return node, max_freq + + def edge_frequency_node_ensemble(ensemble_file: str): + """ + Processes an ensemble of edge frequencies to identify the highest frequency associated with each node + Reads ensemble_file, separates frequencies by node, and then calculates the maximum frequency for each node. + Returns a DataFrame of nodes with their respective maximum frequencies, or an empty DataFrame if ensemble_file is empty. + @param ensemble_file: the pre-computed node_ensemble + """ + ensemble_df = pd.read_table(ensemble_file, sep="\t", header=0) + + if not ensemble_df.empty: + node1_freq = ensemble_df.drop(columns = ['Node2', 'Direction']) + node2_freq = ensemble_df.drop(columns = ['Node1', 'Direction']) + + max_node1_freq = node1_freq.groupby(['Node1']).max().reset_index() + max_node1_freq.rename(columns = {'Frequency': 'Freq1'}, inplace = True) + max_node2_freq = node2_freq.groupby(['Node2']).max().reset_index() + max_node2_freq.rename(columns = {'Frequency': 'Freq2'}, inplace = True) + + node_ensemble = max_node1_freq.merge(max_node2_freq, left_on='Node1', right_on='Node2', how='outer') + node_ensemble[['Node', 'max_freq']] = node_ensemble.apply(Evaluation.select_max_freq_and_node, axis=1, result_type='expand') + node_ensemble.drop(columns = ['Node1', 'Node2', 'Freq1', 'Freq2'], inplace = True) + node_ensemble.sort_values('max_freq', ascending= False, inplace = True) + return node_ensemble + else: + return pd.DataFrame(columns = ['Node', 'max_freq']) + + def precision_recall_curve_node_ensemble(node_ensemble:pd.DataFrame, node_table:pd.DataFrame, output_png: str): + """ + Takes in an node ensemble for specific dataset or specific algorithm in a dataset, and an associated gold standard node table. + Plots a precision and recall curve for the node ensemble against its associated gold standard node table + Returns output back to output_png + @param node_ensemble: the pre-computed node_ensemble + @param node_table: the gold standard nodes + @param output_file: the filename to save the precision and recall curves + """ + gold_standard_nodes = set(node_table['NODEID']) + + if not node_ensemble.empty: + y_true = [1 if node in gold_standard_nodes else 0 for node in node_ensemble['Node']] + y_scores = node_ensemble['max_freq'].tolist() + precision, recall, thresholds = precision_recall_curve(y_true, y_scores) + auc_precision_recall = average_precision_score(y_true, y_scores) + + plt.figure() + plt.plot(recall, precision, marker='o', label='Precision-Recall curve') + plt.axhline(y=auc_precision_recall, color='r', linestyle='--', label=f'Avg Precision: {auc_precision_recall:.4f}') + plt.xlabel('Recall') + plt.ylabel('Precision') + plt.title('Precision-Recall Curve') + plt.legend() + plt.grid(True) + plt.savefig(output_png) + else: + plt.figure() + plt.plot([], []) + plt.title("Empty Ensemble File") + plt.savefig(output_png) + + def pca_chosen_pathway(coordinates_file: str, output_dir:str): + """ + Identifies the pathway closest to a specified centroid based on PCA coordinates + Calculates the Euclidean distance from each data point to the centroid, then selects the closest pathway. + Returns the file path for the representative pathway associated with the closest data point. + @param coordinates_file: the pca coordinates file for a dataset or specific algorithm in a datset + @param output_dir: the main reconstruction directory + """ + coord_df = pd.read_csv(coordinates_file, delimiter="\t", header=0) + + centroid_row = coord_df[coord_df['datapoint_labels'] == 'centroid'] + centroid = centroid_row.iloc[0, 1:].tolist() + coord_df = coord_df[coord_df['datapoint_labels'] != 'centroid'] - results.append({"Pathway": file, "Precision": precision}) + pc_columns = [col for col in coord_df.columns if col.startswith('PC')] + coord_df['Distance To Centroid'] = np.sqrt(sum((coord_df[pc] - centroid[i]) ** 2 for i, pc in enumerate(pc_columns))) + closest_to_centroid = coord_df.sort_values(by='Distance To Centroid').iloc[0] + rep_pathway = [os.path.join(output_dir, f"{closest_to_centroid['datapoint_labels']}", "pathway.txt")] - precision_df = pd.DataFrame(results) - precision_df.to_csv(output_file, sep="\t", index=False) + return rep_pathway diff --git a/test/evaluate/expected/expected-node-ensemble.csv b/test/evaluate/expected/expected-node-ensemble.csv new file mode 100644 index 00000000..ba467d55 --- /dev/null +++ b/test/evaluate/expected/expected-node-ensemble.csv @@ -0,0 +1,13 @@ +Node max_freq +C 0.75 +E 0.75 +D 0.75 +F 0.75 +A 0.5 +B 0.5 +L 0.5 +M 0.5 +O 0.25 +P 0.25 +N 0.25 +Q 0.25 diff --git a/test/evaluate/expected/expected-precision-recall-per-pathway-empty.txt b/test/evaluate/expected/expected-precision-recall-per-pathway-empty.txt new file mode 100644 index 00000000..6c97ff7e --- /dev/null +++ b/test/evaluate/expected/expected-precision-recall-per-pathway-empty.txt @@ -0,0 +1,2 @@ +Pathway Precision Recall +test/evaluate/input/data-test-params-empty/pathway.txt 0.0 0.0 diff --git a/test/evaluate/expected/expected-precision-recall-per-pathway-pca-chosen.txt b/test/evaluate/expected/expected-precision-recall-per-pathway-pca-chosen.txt new file mode 100644 index 00000000..6c97ff7e --- /dev/null +++ b/test/evaluate/expected/expected-precision-recall-per-pathway-pca-chosen.txt @@ -0,0 +1,2 @@ +Pathway Precision Recall +test/evaluate/input/data-test-params-empty/pathway.txt 0.0 0.0 diff --git a/test/evaluate/expected/expected-precision-recall-per-pathway.txt b/test/evaluate/expected/expected-precision-recall-per-pathway.txt new file mode 100644 index 00000000..02e17a7c --- /dev/null +++ b/test/evaluate/expected/expected-precision-recall-per-pathway.txt @@ -0,0 +1,5 @@ +Pathway Precision Recall +test/evaluate/input/data-test-params-456/pathway.txt 0.0 0.0 +test/evaluate/input/data-test-params-empty/pathway.txt 0.0 0.0 +test/evaluate/input/data-test-params-123/pathway.txt 0.6666666666666666 0.6666666666666666 +test/evaluate/input/data-test-params-789/pathway.txt 1.0 1.0 diff --git a/test/evaluate/input/data-test-params-123/pathway.txt b/test/evaluate/input/data-test-params-123/pathway.txt new file mode 100644 index 00000000..21768464 --- /dev/null +++ b/test/evaluate/input/data-test-params-123/pathway.txt @@ -0,0 +1,3 @@ +Node1 Node2 Rank Direction +A B 1 U +B C 1 U diff --git a/test/evaluate/input/data-test-params-456/pathway.txt b/test/evaluate/input/data-test-params-456/pathway.txt new file mode 100644 index 00000000..d445d80f --- /dev/null +++ b/test/evaluate/input/data-test-params-456/pathway.txt @@ -0,0 +1,2 @@ +Node1 Node2 Rank Direction +F L 1 U diff --git a/test/evaluate/input/data-test-params-789/pathway.txt b/test/evaluate/input/data-test-params-789/pathway.txt new file mode 100644 index 00000000..352698a0 --- /dev/null +++ b/test/evaluate/input/data-test-params-789/pathway.txt @@ -0,0 +1,3 @@ +Node1 Node2 Rank Direction +A B 1 U +B Q 1 U diff --git a/test/evaluate/input/data-test-params-empty/pathway.txt b/test/evaluate/input/data-test-params-empty/pathway.txt new file mode 100644 index 00000000..63fda2b1 --- /dev/null +++ b/test/evaluate/input/data-test-params-empty/pathway.txt @@ -0,0 +1 @@ +Node1 Node2 Rank Direction \ No newline at end of file diff --git a/test/evaluate/input/ensemble-network.tsv b/test/evaluate/input/ensemble-network.tsv new file mode 100644 index 00000000..293ec3f5 --- /dev/null +++ b/test/evaluate/input/ensemble-network.tsv @@ -0,0 +1,10 @@ +Node1 Node2 Frequency Direction +A B 0.5 U +C D 0.75 U +E F 0.75 U +L M 0.5 U +M N 0.25 U +O P 0.25 U +P Q 0.25 U +A B 0.25 D +B A 0.25 D \ No newline at end of file diff --git a/test/evaluate/input/node-ensemble-empty.csv b/test/evaluate/input/node-ensemble-empty.csv new file mode 100644 index 00000000..e488f56a --- /dev/null +++ b/test/evaluate/input/node-ensemble-empty.csv @@ -0,0 +1,2 @@ +Node max_freq + diff --git a/test/evaluate/input/node-ensemble.csv b/test/evaluate/input/node-ensemble.csv new file mode 100644 index 00000000..ba467d55 --- /dev/null +++ b/test/evaluate/input/node-ensemble.csv @@ -0,0 +1,13 @@ +Node max_freq +C 0.75 +E 0.75 +D 0.75 +F 0.75 +A 0.5 +B 0.5 +L 0.5 +M 0.5 +O 0.25 +P 0.25 +N 0.25 +Q 0.25 diff --git a/test/evaluate/input/node_table.csv b/test/evaluate/input/node_table.csv new file mode 100644 index 00000000..5b9cd41b --- /dev/null +++ b/test/evaluate/input/node_table.csv @@ -0,0 +1,4 @@ +NODEID +A +B +Q \ No newline at end of file diff --git a/test/evaluate/input/pca-coordinates.tsv b/test/evaluate/input/pca-coordinates.tsv new file mode 100644 index 00000000..92fc6b3d --- /dev/null +++ b/test/evaluate/input/pca-coordinates.tsv @@ -0,0 +1,6 @@ +datapoint_labels PC1 PC2 +data-test-params-123 -1.3973472526239425 -1.632993161855452 +data-test-params-456 2.025440509784659 1.9566080710032526e-16 +data-test-params-789 -1.3973472526239425 1.632993161855452 +data-test-params-empty 0.7692539954632259 -4.1496185644351084e-16 +centroid -2.7755575615628914e-17 -4.822931287961988e-17 diff --git a/test/evaluate/test_evaluate.py b/test/evaluate/test_evaluate.py new file mode 100644 index 00000000..5dc0b8f3 --- /dev/null +++ b/test/evaluate/test_evaluate.py @@ -0,0 +1,73 @@ +import filecmp +from pathlib import Path + +import pandas as pd +import pytest + +import spras.analysis.ml as ml +from spras.evaluation import Evaluation + +INPUT_DIR = 'test/evaluate/input/' +OUT_DIR = 'test/evaluate/output/' +EXPECT_DIR = 'test/evaluate/expected/' +NODE_TABLE = pd.read_csv(INPUT_DIR + "node_table.csv", header=0) +class TestEvaluate: + @classmethod + def setup_class(cls): + """ + Create the expected output directory + """ + Path(OUT_DIR).mkdir(parents=True, exist_ok=True) + + def test_node_ensemble(self): + ensemble_file = INPUT_DIR + 'ensemble-network.tsv' + edge_freq = Evaluation.edge_frequency_node_ensemble(ensemble_file) + edge_freq.to_csv(OUT_DIR + 'node-ensemble.csv', sep="\t", index=False) + assert filecmp.cmp(OUT_DIR + 'node-ensemble.csv', EXPECT_DIR + 'expected-node-ensemble.csv', shallow=False) + + def test_precision_recal_curve_ensemble_nodes(self): + out_path = Path(OUT_DIR+"test-precision-recall-curve-ensemble-nodes.png") + out_path.unlink(missing_ok=True) + ensemble_file = pd.read_csv(INPUT_DIR + 'node-ensemble.csv', sep="\t", header=0) + Evaluation.precision_recall_curve_node_ensemble(ensemble_file, NODE_TABLE, out_path) + assert out_path.exists() + + def test_precision_recal_curve_ensemble_nodes_empty(self): + out_path = Path(OUT_DIR+"test-precision-recall-curve-ensemble-nodes-empty.png") + out_path.unlink(missing_ok=True) + ensemble_file = pd.read_csv(INPUT_DIR + 'node-ensemble-empty.csv', sep="\t", header=0) + Evaluation.precision_recall_curve_node_ensemble(ensemble_file, NODE_TABLE, out_path) + assert out_path.exists() + + def test_precision_recall_per_pathway(self): + file_paths = [INPUT_DIR + "data-test-params-123/pathway.txt", INPUT_DIR + "data-test-params-456/pathway.txt", INPUT_DIR + "data-test-params-789/pathway.txt", INPUT_DIR + "data-test-params-empty/pathway.txt"] + algorithms = ["test"] + output_file = OUT_DIR + "test-precision-recall-per-pathway.txt" + output_png = OUT_DIR + "test-precision-recall-per-pathway.png" + + Evaluation.precision_and_recall(file_paths, NODE_TABLE, algorithms, output_file, output_png) + assert filecmp.cmp(output_file, EXPECT_DIR + 'expected-precision-recall-per-pathway.txt', shallow=False) + + def test_precision_recall_per_pathway_empty(self): + + file_paths = [INPUT_DIR + "data-test-params-empty/pathway.txt"] + algorithms = ["test"] + output_file = OUT_DIR +"test-precision-recall-per-pathway-empty.txt" + output_png = OUT_DIR + "test-precision-recall-per-pathway-empty.png" + + Evaluation.precision_and_recall(file_paths, NODE_TABLE, algorithms, output_file, output_png) + assert filecmp.cmp(output_file, EXPECT_DIR + 'expected-precision-recall-per-pathway-empty.txt', shallow=False) + + + def test_precision_recall_pca_chosen_pathway(self): + file_paths = [INPUT_DIR + "data-test-params-123/pathway.txt", INPUT_DIR + "data-test-params-456/pathway.txt", INPUT_DIR + "data-test-params-789/pathway.txt", INPUT_DIR + "data-test-params-empty/pathway.txt"] + algorithms = ["test"] + output_file = OUT_DIR +"test-precision-recall-per-pathway-pca-chosen.txt" + output_png = OUT_DIR + "test-precision-recall-per-pathway-pca-chosen.png" + + dataframe = ml.summarize_networks(file_paths) + ml.pca(dataframe, OUT_DIR + 'pca.png', OUT_DIR + 'pca-variance.txt', OUT_DIR + 'pca-coordinates.tsv') + + pathway = Evaluation.pca_chosen_pathway(OUT_DIR + 'pca-coordinates.tsv', INPUT_DIR) + Evaluation.precision_and_recall(pathway, NODE_TABLE, algorithms, output_file, output_png) + assert filecmp.cmp(output_file, EXPECT_DIR + 'expected-precision-recall-per-pathway-pca-chosen.txt', shallow=False) diff --git a/test/ml/expected/expected-pca-coordinates.tsv b/test/ml/expected/expected-pca-coordinates.tsv index b6371c84..ac10f2db 100644 --- a/test/ml/expected/expected-pca-coordinates.tsv +++ b/test/ml/expected/expected-pca-coordinates.tsv @@ -1,4 +1,5 @@ -algorithm PC1 PC2 -test-data-s1 -2.006650210482033 -0.9865875190637743 -test-data-s2 -1.5276508866841987 1.0799457247533237 -test-data-s3 3.534301097166232 -0.0933582056895495 \ No newline at end of file +datapoint_labels PC1 PC2 +test-data-s1 -2.0066502104820323 -0.9865875190637746 +test-data-s2 -1.5276508866841985 1.0799457247533233 +test-data-s3 3.5343010971662308 -0.09335820568954915 +centroid 0.0 -1.6190752442450199e-16 diff --git a/test/ml/test_ml.py b/test/ml/test_ml.py index 2b5720ae..b9ca69ca 100644 --- a/test/ml/test_ml.py +++ b/test/ml/test_ml.py @@ -77,14 +77,15 @@ def test_pca_robustness(self): dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt']) expected = pd.read_table(EXPECT_DIR + 'expected-pca-coordinates.tsv') expected = expected.round(5) + expected.sort_values(by='datapoint_labels', ignore_index=True, inplace=True) + for _ in range(5): dataframe_shuffled = dataframe.sample(frac=1, axis=1) # permute the columns ml.pca(dataframe_shuffled, OUT_DIR + 'pca-shuffled-columns.png', OUT_DIR + 'pca-shuffled-columns-variance.txt', OUT_DIR + 'pca-shuffled-columns-coordinates.tsv') coord = pd.read_table(OUT_DIR + 'pca-shuffled-columns-coordinates.tsv') coord = coord.round(5) # round values to 5 digits to account for numeric differences across machines - coord.sort_values(by='algorithm', ignore_index=True, inplace=True) - + coord.sort_values(by='datapoint_labels', ignore_index=True, inplace=True) assert coord.equals(expected) for _ in range(5): @@ -93,7 +94,7 @@ def test_pca_robustness(self): OUT_DIR + 'pca-shuffled-rows-coordinates.tsv') coord = pd.read_table(OUT_DIR + 'pca-shuffled-rows-coordinates.tsv') coord = coord.round(5) # round values to 5 digits to account for numeric differences across machines - coord.sort_values(by='algorithm', ignore_index=True, inplace=True) + coord.sort_values(by='datapoint_labels', ignore_index=True, inplace=True) assert coord.equals(expected) diff --git a/test/test_config.py b/test/test_config.py index bf13cd6e..c89d7123 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -27,7 +27,8 @@ def get_test_config(): "include": False }, "ml": { - "include": False + "include": False, + "aggregate_per_algorithm": False }, "graphspace": { "include": False @@ -36,7 +37,8 @@ def get_test_config(): "include": False }, "evaluation": { - "include": False + "include": False, + "aggregate_per_algorithm": False }, }, } @@ -142,3 +144,132 @@ def test_error_gs_dataset_mismatch(self): with pytest.raises(ValueError): config.init_global(test_config) + + def test_eval_ml_coupling(self): + test_config = get_test_config() + include_combos = [(True, True), (True, False), (False, True), (False, False)] + + # ml: True evaluation: True + test_config["analysis"]["ml"]["include"] = include_combos[0][0] + test_config["analysis"]["evaluation"]["include"] = include_combos[0][1] + config.init_global(test_config) + assert config.config.analysis_include_ml == True and config.config.analysis_include_evaluation == True + + # ml: True evaluation: False + test_config["analysis"]["ml"]["include"] = include_combos[1][0] + test_config["analysis"]["evaluation"]["include"] = include_combos[1][1] + config.init_global(test_config) + assert config.config.analysis_include_ml == True and config.config.analysis_include_evaluation == False + + # ml: False evaluation: True + test_config["analysis"]["ml"]["include"] = include_combos[2][0] + test_config["analysis"]["evaluation"]["include"] = include_combos[2][1] + config.init_global(test_config) + assert config.config.analysis_include_ml == False and config.config.analysis_include_evaluation == False + + # ml: False evaluation: False + test_config["analysis"]["ml"]["include"] = include_combos[3][0] + test_config["analysis"]["evaluation"]["include"] = include_combos[3][1] + config.init_global(test_config) + assert config.config.analysis_include_ml == False and config.config.analysis_include_evaluation == False + + + def test_ml_agg_algo_coupling(self): + + test_config = get_test_config() + include_combos = [(True, True), (True, False), (False, True), (False, False)] + + test_config["analysis"]["ml"]["include"] = include_combos[0][0] + test_config["analysis"]["ml"]["aggregate_per_algorithm"] = include_combos[0][1] + config.init_global(test_config) + assert config.config.analysis_include_ml == True and config.config.analysis_include_ml_aggregate_algo == True + + + test_config["analysis"]["ml"]["include"] = include_combos[1][0] + test_config["analysis"]["ml"]["aggregate_per_algorithm"] = include_combos[1][1] + config.init_global(test_config) + assert config.config.analysis_include_ml == True and config.config.analysis_include_ml_aggregate_algo == False + + + test_config["analysis"]["ml"]["include"] = include_combos[2][0] + test_config["analysis"]["ml"]["aggregate_per_algorithm"] = include_combos[2][1] + config.init_global(test_config) + assert config.config.analysis_include_ml == False and config.config.analysis_include_ml_aggregate_algo == False + + + test_config["analysis"]["ml"]["include"] = include_combos[3][0] + test_config["analysis"]["ml"]["aggregate_per_algorithm"] = include_combos[3][1] + config.init_global(test_config) + assert config.config.analysis_include_ml == False and config.config.analysis_include_ml_aggregate_algo == False + + def test_eval_agg_algo_coupling(self): + + test_config = get_test_config() + test_config["analysis"]["ml"]["include"] = True + test_config["analysis"]["ml"]["aggregate_per_algorithm"] = True + + include_combos = [(True, True), (True, False), (False, True), (False, False)] + + test_config["analysis"]["evaluation"]["include"] = include_combos[0][0] + test_config["analysis"]["evaluation"]["aggregate_per_algorithm"] = include_combos[0][1] + config.init_global(test_config) + assert config.config.analysis_include_evaluation == True and config.config.analysis_include_evaluation_aggregate_algo == True + + + test_config["analysis"]["evaluation"]["include"] = include_combos[1][0] + test_config["analysis"]["evaluation"]["aggregate_per_algorithm"] = include_combos[1][1] + config.init_global(test_config) + assert config.config.analysis_include_evaluation == True and config.config.analysis_include_evaluation_aggregate_algo == False + + + test_config["analysis"]["evaluation"]["include"] = include_combos[2][0] + test_config["analysis"]["evaluation"]["aggregate_per_algorithm"] = include_combos[2][1] + config.init_global(test_config) + assert config.config.analysis_include_evaluation == False and config.config.analysis_include_evaluation_aggregate_algo == False + + + test_config["analysis"]["evaluation"]["include"] = include_combos[3][0] + test_config["analysis"]["evaluation"]["aggregate_per_algorithm"] = include_combos[3][1] + config.init_global(test_config) + assert config.config.analysis_include_evaluation == False and config.config.analysis_include_evaluation_aggregate_algo == False + + def test_eval_ml_agg_algo_coupling(self): + + # the value of ml include and ml aggregate_per_algorithm can affect the value of evaluation include and evaluation aggregate_per_algorithm + + test_config = get_test_config() + + test_config["analysis"]["ml"]["include"] = False + test_config["analysis"]["ml"]["aggregate_per_algorithm"] = True + test_config["analysis"]["evaluation"]["include"] = True + test_config["analysis"]["evaluation"]["aggregate_per_algorithm"] = True + config.init_global(test_config) + assert config.config.analysis_include_evaluation == False and config.config.analysis_include_evaluation_aggregate_algo == False and config.config.analysis_include_ml == False and config.config.analysis_include_ml_aggregate_algo == False + + test_config["analysis"]["ml"]["include"] = True + test_config["analysis"]["ml"]["aggregate_per_algorithm"] = False + test_config["analysis"]["evaluation"]["include"] = True + test_config["analysis"]["evaluation"]["aggregate_per_algorithm"] = True + config.init_global(test_config) + assert config.config.analysis_include_evaluation == True and config.config.analysis_include_evaluation_aggregate_algo == False and config.config.analysis_include_ml == True and config.config.analysis_include_ml_aggregate_algo == False + + test_config["analysis"]["ml"]["include"] = False + test_config["analysis"]["ml"]["aggregate_per_algorithm"] = False + test_config["analysis"]["evaluation"]["include"] = True + test_config["analysis"]["evaluation"]["aggregate_per_algorithm"] = True + config.init_global(test_config) + assert config.config.analysis_include_evaluation == False and config.config.analysis_include_evaluation_aggregate_algo == False and config.config.analysis_include_ml == False and config.config.analysis_include_ml_aggregate_algo == False + + test_config["analysis"]["ml"]["include"] = True + test_config["analysis"]["ml"]["aggregate_per_algorithm"] = True + test_config["analysis"]["evaluation"]["include"] = True + test_config["analysis"]["evaluation"]["aggregate_per_algorithm"] = True + config.init_global(test_config) + assert config.config.analysis_include_evaluation == True and config.config.analysis_include_evaluation_aggregate_algo == True and config.config.analysis_include_ml == True and config.config.analysis_include_ml_aggregate_algo == True + + test_config["analysis"]["ml"]["include"] = True + test_config["analysis"]["ml"]["aggregate_per_algorithm"] = False + test_config["analysis"]["evaluation"]["include"] = False + test_config["analysis"]["evaluation"]["aggregate_per_algorithm"] = False + config.init_global(test_config) + assert config.config.analysis_include_evaluation == False and config.config.analysis_include_evaluation_aggregate_algo == False and config.config.analysis_include_ml == True and config.config.analysis_include_ml_aggregate_algo == False