From a80bdc9af30ce6a168eb8b0b2b196f48f9d0aa7c Mon Sep 17 00:00:00 2001 From: "Jackson M. Tsuji" Date: Mon, 2 Mar 2020 01:17:20 -0500 Subject: [PATCH 01/10] Updated conda env requirements and added data dump to heatmap script --- envs/conda_requirements.yaml | 30 +++---- scripts/combine_tables.R | 3 +- scripts/generate_heatmap.R | 43 ++++------ snakemake/Snakefile | 2 +- .../blast_tables_combined.csv | 76 +++++++++--------- .../heatmap/BackBLAST_heatmap.pdf | Bin 6164 -> 6169 bytes .../heatmap/BackBLAST_heatmap.tsv | 36 +++++++++ 7 files changed, 108 insertions(+), 82 deletions(-) create mode 100644 testing/outputs_expected/heatmap/BackBLAST_heatmap.tsv diff --git a/envs/conda_requirements.yaml b/envs/conda_requirements.yaml index bad6a1e..db5ee2f 100644 --- a/envs/conda_requirements.yaml +++ b/envs/conda_requirements.yaml @@ -4,26 +4,26 @@ channels: - bioconda - defaults dependencies: - - python=3.7 - - snakemake=5.5.4 - - biopython=1.74 - - blast=2.9 - - r=3.5.1 - - r-getopt=1.20.2 + - python=3.7.6 + - snakemake=5.10.0 + - biopython=1.76 + - blast=2.9.0 + - r-base=3.6.2 + - r-getopt=1.20.3 - r-argparser=0.4 - r-futile.logger=1.4.3 - - r-glue=1.3.0 + - r-glue=1.3.1 - r-plyr=1.8.4 - - r-dplyr=0.7.6 - - r-tibble=2.1.3 + - r-dplyr=0.8.0.1 + - r-tibble=2.1.1 - r-reshape2=1.4.3 - - r-rcolorbrewer=1.1 + - r-rcolorbrewer=1.1_2 - r-ggplot2=3.1.1 - - r-ape=5.2 + - r-ape=5.3 - r-maps=3.3.0 - - r-phytools=0.6 - - r-tidytree=0.2.5 - - bioconductor-treeio=1.6.1 - - bioconductor-ggtree=1.14.4 + - r-phytools=0.6_99 + - r-tidytree=0.3.1 + - bioconductor-treeio=1.10.0 + - bioconductor-ggtree=2.0.0 - r-gridextra=2.3 - r-egg=0.4.5 diff --git a/scripts/combine_tables.R b/scripts/combine_tables.R index 43f570a..6f89746 100755 --- a/scripts/combine_tables.R +++ b/scripts/combine_tables.R @@ -83,7 +83,8 @@ main <- function() { output_table <- dplyr::bind_rows(blast_tables) flog.info("Writing combining BLAST table to file (**with headers**)") - write.table(output_table, file = params$output_filename, sep = ",", row.names = FALSE, col.names = TRUE) + write.table(output_table, file = params$output_filename, sep = ",", row.names = FALSE, + col.names = TRUE, quote = FALSE) flog.info("combine_tables.R: Done.") } diff --git a/scripts/generate_heatmap.R b/scripts/generate_heatmap.R index 8a4a824..8695764 100755 --- a/scripts/generate_heatmap.R +++ b/scripts/generate_heatmap.R @@ -12,6 +12,7 @@ # can be used. library(argparser) library(futile.logger) +library(tools) library(glue, warn.conflicts = FALSE) library(plyr, warn.conflicts = FALSE) library(dplyr, warn.conflicts = FALSE) @@ -23,6 +24,7 @@ library(ape, warn.conflicts = FALSE) library(maps, warn.conflicts = FALSE) library(phytools, warn.conflicts = FALSE) library(tidytree, warn.conflicts = FALSE) +library(treeio, warn.conflicts = FALSE) suppressPackageStartupMessages(library(ggtree)) library(gridExtra, warn.conflicts = FALSE) library(egg, warn.conflicts = FALSE) @@ -41,31 +43,6 @@ read_tibble <- function(file, sep = "\t", header = TRUE, stringsAsFactors = FALS return(data_table) } -#' Chooses a nice discrete colour scale of the desired length -#' -#' @param length Numeric; length of the desired colour scale vector -#' @return Character vector of HTML colour codes -#' @export -choose_discrete_colour_scale <- function(length) { - - # Choose the best colour scale based on the number of entries to be plotted - if (length == 2) { - colour_palette <- RColorBrewer::brewer.pal(n = 3, name = "Dark2")[c(1, 2)] - } else if (length <= 8) { - colour_palette <- RColorBrewer::brewer.pal(n = length, name = "Dark2") - } else if (length <= 12) { - colour_palette <- RColorBrewer::brewer.pal(n = length, name = "Set3") - } else if (length > 12) { - colour_palette <- scales::hue_pal(h = c(20, 290))(length) - } else { - futile.logger::flog.error(glue::glue("Something is wrong with the provided length ('", length, - "'). Is it non-numeric? Exiting...")) - quit(save = "no", status = 1) - } - - return(colour_palette) -} - #' Convert character "NA" (from command line) into constant NA #' #' @param entry single-length vector of any type @@ -111,7 +88,7 @@ reroot_ggtree <- function(phylo_tree, root_name) { } # Re-root - tree_rooted <- ggtree::reroot(phylo_tree, node = tip_label_index) + tree_rooted <- treeio::root(phylo_tree, outgroup = root_name) return(tree_rooted) } @@ -497,6 +474,7 @@ main <- function(params) { params$gene_metadata_filepath)) futile.logger::flog.info(glue::glue("Plot width (mm): ", params$plot_width)) futile.logger::flog.info(glue::glue("Plot height (mm): ", params$plot_height)) + futile.logger::flog.info(glue::glue("Write data table: ", params$write_data)) futile.logger::flog.info("############################") # Convert character "NA" (from command line) into true NA @@ -521,7 +499,15 @@ main <- function(params) { blast_results_list <- load_and_plot_blast_results(params$input_blast_table_filepath, tip_order, params$gene_metadata_filepath, params$genome_metadata_filepath) - + + # Save the heatmap data + if (params$write_data == TRUE) { + futile.logger::flog.info("Saving raw heatmap data to file") + output_table_filepath = paste(tools::file_path_sans_ext(params$output_pdf_filepath), ".tsv", sep = "") + write.table(blast_results_list[[1]], file = output_table_filepath, sep = "\t", + col.names = TRUE, row.names = FALSE, quote = FALSE) + } + # Save the plot if (!is.na(params$input_phylogenetic_tree_filepath)) { # Combine the tree and heatmap @@ -584,6 +570,9 @@ if ( !interactive() ) { parser <- argparser::add_argument(parser = parser, arg = "--plot_height", short = "-z", help = "Plot height (mm)", type = "numeric", default = 200) + parser <- argparser::add_argument(parser = parser, arg = "--write_data", short = "-d", + help = "Write raw plotting data to disk (same basepath as the PDF, but as a .tsv file)", + flag = TRUE) params <- argparser::parse_args(parser) diff --git a/snakemake/Snakefile b/snakemake/Snakefile index 122315a..65e9fbc 100644 --- a/snakemake/Snakefile +++ b/snakemake/Snakefile @@ -167,7 +167,7 @@ rule generate_heatmap: plot_height = config.get("plot_height_mm", 200) shell: "backblast generate_heatmap -m {params.genome_metadata} -g {params.gene_metadata} " - "-b {params.bootstrap_cutoff} -r {params.root_name} -w {params.plot_width} -z {params.plot_height} " + "-b {params.bootstrap_cutoff} -r {params.root_name} -w {params.plot_width} -z {params.plot_height} -d " "{input.tree_file} {input.blast_table} {output} 2>&1 | tee {log} && " "if [[ -f Rplots.pdf ]]; then rm Rplots.pdf; fi" diff --git a/testing/outputs_expected/blast/combine_blast_tables/blast_tables_combined.csv b/testing/outputs_expected/blast/combine_blast_tables/blast_tables_combined.csv index 81b299d..25a2bd5 100644 --- a/testing/outputs_expected/blast/combine_blast_tables/blast_tables_combined.csv +++ b/testing/outputs_expected/blast/combine_blast_tables/blast_tables_combined.csv @@ -1,38 +1,38 @@ -"subject_name","qseqid","sseqid","pident","evalue","qcovhsp","bitscore" -"Chl_ferrooxidans_KoFox","WP_006365810.1","WP_006365810.1",100,5.58e-115,100,313 -"Chl_ferrooxidans_KoFox","WP_006367305.1","WP_006367305.1",100,0,100,602 -"Chl_ferrooxidans_KoFox","WP_006367307.1","WP_006367307.1",100,0,100,493 -"Chl_ferrooxidans_KoFox","WP_006367308.1","WP_006367308.1",100,0,100,611 -"Chl_ferrooxidans_KoFox","WP_006367309.1","WP_006367309.1",100,0,100,1232 -"Chl_ferrooxidans_KoFox","WP_006367310.1","WP_006367310.1",100,1.25e-114,100,312 -"Chl_ferrooxidans_KoFox","WP_006367320.1","WP_006367320.1",100,0,100,732 -"Chl_ferrooxidans_KoFox","WP_006367321.1","WP_006367321.1",100,0,100,572 -"Chl_ferrooxidans_KoFox","WP_006367322.1","WP_006367322.1",100,0,100,555 -"Chl_ferrooxidans_KoFox","WP_006367323.1","WP_006367323.1",100,0,100,690 -"Chl_ferrooxidans_KoFox","WP_006367388.1","WP_006367388.1",100,0,100,839 -"Chl_ferrooxidans_KoFox","WP_006367389.1","WP_006367389.1",100,3.82e-90,100,248 -"Chl_luteolum_DSM_273","WP_006365810.1","WP_011356871.1",71.429,4.53e-83,99,233 -"Chl_luteolum_DSM_273","WP_006367305.1","WP_011358288.1",53.004,3.83e-113,96,321 -"Chl_luteolum_DSM_273","WP_006367307.1","WP_011358287.1",67.257,1.85e-116,95,324 -"Chl_luteolum_DSM_273","WP_006367308.1","WP_011358286.1",79.522,0,100,505 -"Chl_luteolum_DSM_273","WP_006367309.1","WP_011358285.1",71.953,0,100,887 -"Chl_luteolum_DSM_273","WP_006367310.1","WP_041463880.1",53.147,8.67e-59,95,171 -"Chl_luteolum_DSM_273","WP_006367320.1","WP_011358284.1",75.35,0,100,560 -"Chl_luteolum_DSM_273","WP_006367321.1","WP_011358283.1",71.831,5.83e-144,98,398 -"Chl_luteolum_DSM_273","WP_006367322.1","WP_011358282.1",77.256,3.78e-157,100,430 -"Chl_luteolum_DSM_273","WP_006367323.1","WP_011358281.1",76.347,0,98,528 -"Chl_luteolum_DSM_273","WP_006367388.1","WP_011358173.1",53.364,2.54e-149,100,421 -"Chl_phaeoferrooxidans_KB01","WP_006365810.1","WP_076790319.1",98.71,6.57e-114,100,311 -"Chl_phaeoferrooxidans_KB01","WP_006367305.1","WP_076789648.1",93.151,0,100,571 -"Chl_phaeoferrooxidans_KB01","WP_006367307.1","WP_076790657.1",95.359,3.02e-175,100,473 -"Chl_phaeoferrooxidans_KB01","WP_006367308.1","WP_076790659.1",97.952,0,100,603 -"Chl_phaeoferrooxidans_KB01","WP_006367309.1","WP_076790661.1",97.496,0,100,1201 -"Chl_phaeoferrooxidans_KB01","WP_006367310.1","WP_076790662.1",82.667,6.27e-95,100,263 -"Chl_phaeoferrooxidans_KB01","WP_006367320.1","WP_076790666.1",96.078,0,100,704 -"Chl_phaeoferrooxidans_KB01","WP_006367321.1","WP_076790668.1",93.426,0,100,543 -"Chl_phaeoferrooxidans_KB01","WP_006367322.1","WP_076790669.1",98.917,0,100,552 -"Chl_phaeoferrooxidans_KB01","WP_006367323.1","WP_076790671.1",97.345,0,100,674 -"Chl_phaeoferrooxidans_KB01","WP_006367388.1","WP_076792910.1",89.538,0,100,744 -"Chl_phaeoferrooxidans_KB01","WP_006367389.1","WP_076793039.1",94.915,7.5e-87,100,239 -"Chl_sp_N1","WP_006367388.1","WP_131354685.1",57.178,5.41e-158,98,442 -"Ignavibacterium_album_JCM_16511_outgroup","WP_006367320.1","WP_014560057.1",44.558,2.72e-82,79,246 +subject_name,qseqid,sseqid,pident,evalue,qcovhsp,bitscore +Chl_ferrooxidans_KoFox,WP_006365810.1,WP_006365810.1,100,5.58e-115,100,313 +Chl_ferrooxidans_KoFox,WP_006367305.1,WP_006367305.1,100,0,100,602 +Chl_ferrooxidans_KoFox,WP_006367307.1,WP_006367307.1,100,0,100,493 +Chl_ferrooxidans_KoFox,WP_006367308.1,WP_006367308.1,100,0,100,611 +Chl_ferrooxidans_KoFox,WP_006367309.1,WP_006367309.1,100,0,100,1232 +Chl_ferrooxidans_KoFox,WP_006367310.1,WP_006367310.1,100,1.25e-114,100,312 +Chl_ferrooxidans_KoFox,WP_006367320.1,WP_006367320.1,100,0,100,732 +Chl_ferrooxidans_KoFox,WP_006367321.1,WP_006367321.1,100,0,100,572 +Chl_ferrooxidans_KoFox,WP_006367322.1,WP_006367322.1,100,0,100,555 +Chl_ferrooxidans_KoFox,WP_006367323.1,WP_006367323.1,100,0,100,690 +Chl_ferrooxidans_KoFox,WP_006367388.1,WP_006367388.1,100,0,100,839 +Chl_ferrooxidans_KoFox,WP_006367389.1,WP_006367389.1,100,3.82e-90,100,248 +Chl_luteolum_DSM_273,WP_006365810.1,WP_011356871.1,71.429,4.53e-83,99,233 +Chl_luteolum_DSM_273,WP_006367305.1,WP_011358288.1,53.004,3.83e-113,96,321 +Chl_luteolum_DSM_273,WP_006367307.1,WP_011358287.1,67.257,1.85e-116,95,324 +Chl_luteolum_DSM_273,WP_006367308.1,WP_011358286.1,79.522,0,100,505 +Chl_luteolum_DSM_273,WP_006367309.1,WP_011358285.1,71.953,0,100,887 +Chl_luteolum_DSM_273,WP_006367310.1,WP_041463880.1,53.147,8.67e-59,95,171 +Chl_luteolum_DSM_273,WP_006367320.1,WP_011358284.1,75.35,0,100,560 +Chl_luteolum_DSM_273,WP_006367321.1,WP_011358283.1,71.831,5.83e-144,98,398 +Chl_luteolum_DSM_273,WP_006367322.1,WP_011358282.1,77.256,3.78e-157,100,430 +Chl_luteolum_DSM_273,WP_006367323.1,WP_011358281.1,76.347,0,98,528 +Chl_luteolum_DSM_273,WP_006367388.1,WP_011358173.1,53.364,2.54e-149,100,421 +Chl_phaeoferrooxidans_KB01,WP_006365810.1,WP_076790319.1,98.71,6.57e-114,100,311 +Chl_phaeoferrooxidans_KB01,WP_006367305.1,WP_076789648.1,93.151,0,100,571 +Chl_phaeoferrooxidans_KB01,WP_006367307.1,WP_076790657.1,95.359,3.02e-175,100,473 +Chl_phaeoferrooxidans_KB01,WP_006367308.1,WP_076790659.1,97.952,0,100,603 +Chl_phaeoferrooxidans_KB01,WP_006367309.1,WP_076790661.1,97.496,0,100,1201 +Chl_phaeoferrooxidans_KB01,WP_006367310.1,WP_076790662.1,82.667,6.27e-95,100,263 +Chl_phaeoferrooxidans_KB01,WP_006367320.1,WP_076790666.1,96.078,0,100,704 +Chl_phaeoferrooxidans_KB01,WP_006367321.1,WP_076790668.1,93.426,0,100,543 +Chl_phaeoferrooxidans_KB01,WP_006367322.1,WP_076790669.1,98.917,0,100,552 +Chl_phaeoferrooxidans_KB01,WP_006367323.1,WP_076790671.1,97.345,0,100,674 +Chl_phaeoferrooxidans_KB01,WP_006367388.1,WP_076792910.1,89.538,0,100,744 +Chl_phaeoferrooxidans_KB01,WP_006367389.1,WP_076793039.1,94.915,7.5e-87,100,239 +Chl_sp_N1,WP_006367388.1,WP_131354685.1,57.178,5.41e-158,98,442 +Ignavibacterium_album_JCM_16511_outgroup,WP_006367320.1,WP_014560057.1,44.558,2.72e-82,79,246 diff --git a/testing/outputs_expected/heatmap/BackBLAST_heatmap.pdf b/testing/outputs_expected/heatmap/BackBLAST_heatmap.pdf index 1582c5f8cc1a1738cbec7d9c0a0425348ccb8fcd..f4a68cb8a3a81fa37d446b013859017e1cca939b 100644 GIT binary patch delta 2159 zcmZXNc{mde1ILqd!yGTkk+~)@nayTMa^#wGQjFv%_Z%A$YPmLya{v5@Xtf$Y@*_+n zR~Ydt^m@YN%6;GDc6Uj%Bb zD*{f6g7oid3HA1kr>W^gR*oJqwk^u9mvNt`WEF8pk;YU))l@c}twT)9!Pf-dIa`z` zd-mynKgqq$5b;a(OLel`dgkUiv>HIcP0Y?OgEe+pEuT;Gvds7Obt9|Cvz<3~C)v@h zUHfqvZQRhwI9}7&68(S$S2!`Jz%TZqgjsp`n1M}4Ul4C9yXdAyQIqhvVkg*Q>HU5? zsxH@`gn9DZEqzLDBWU%@%f$hRNb1s2%|S@i-H6a?C0nL)blDFzLe#!|_xNsG{LrXx zAB#11aJ(|mdvLtSBJ3B*fG<9{3{sTVR(HS5Z6%KwWcdCV--JNGf0)E2QVR;Ub&n7J zSvcG}$US_k1jH>$tiN_fcRsHg#Iz=MvhY@<2S+hce^-pKePYgNP_tMy#T_LXzEc~= zE1vfOn>&YdHI)R5NAIl`ZGn`3wU^V$;_*#KKw3h0$+R`1kwZePq+v$y1GnV&6m*E- z7}GuQ8LzSq?|leAIBgkgYzZ_D)beZ?{6q%2__p6=cz96vjTCG3^gIq$4l~$p$%do> zJSspc&e25RvD$&%QlOm%x#6r3b0WK^<>3+7u}2kj6T;x?zMq5d7%s6_u~GkFpo+5e zz-D42u*wC{OHgaKZ^XL!K#;gBD04#^lp}NVVDdrOMByCVJNiB0Ht^cWmI}0{AcS}o z*NY*%5hj-^zY$K_=?+rBDB8wCNL|~?6%U`d$hZPYAvj-E0-~ArPHcoP!Dl+%XoRP) zzGPIcDew%Tg|XSY=2e|8y?6c-m6q0PR3R4LIB$TWu7@A2THC%n8QTh*X3x*BEn3@l z9xlz#fBFe#V+BPvC%s$E$qf+4jb$&iR*9Z~r7$hrJbR}RtlLT{&L9Qh9#1fB($plk=*o3ER8he129d=-ivsj1~^SqEJX zYT&*C@1Umh@b}?J7xL&bp6Zg4DID0_PH)R8X4*%~aSxqRoDs3fQ}v*0 z7G{b9q)RNV3M;XO>?xei-a9TwKA&~D8d_o;o-=}eZ%~0mZ^}K*U{sJupsXb;=3Yp|D%$~KU=IJ8IQ}Ih+P23+pvHVkAIq-qcZT@St+;*gYu7jU9mYVn0 zc=#c}3z_+A0g&e7i*ohn=XlBW0`5szt2wKK*u+GaA|wx=#3leUUwW%@0? zohJR7B<9Zmh31?GMd_B-39x6J$bhh9NooPTZV3L$NSzumsB~Kk(PrNI5m%ES8QLYx zwua3PlS>CPnWok*f8~6nozYZmxwRWd`g!7T`4t7t8ZMTqQL3Y*o5Ay1Xn;R3eD9<4vbTLBKYFyPTaa!o>m zYpkSmj~!EU0^P}Wic?LfW*RZGmg&*Uk+P9m`Vf5Ps8eu?&WfF&Z{?T)pkA%KiTpW) z@3FrC&JvN;UG3o+RjWL=)VKnY=e%KnYPhpYalD~FTIG(qf^m07AhpRPF<07nbJxab z;IwFMa_}_cYnNbK=CNr}ews`Yf!*amR2;H;w(D~*2t{B*y1irSzjo!coU6}x>8+@J z(RI4&5>mRJk_=C~gw!Uq=`1azUDA%`M^53Nzp6F3kh0cKS$s{{H;vj^wh~|;s6|5d z=+n{*JIlM-lONsE zhntd-z0iMa;r6tNYax2;=Ctt>0>};FDt#yPjOcNlP9*fJOS?iH^XS7 zU@h2ZTyW@G*UsKkaY&Yq{oSD`W8Iwmo$JI=E}^eGd4-;Lu|!fzRW~hSDd@-I`pOpl z-FfRbL%IbDy=GV$GgqVhhA00--Z4qbmw?XpekV#61FfJJbR#up|Jz945|Jq>ZpEqt$u z;kQJ!m$PX2Q&Pnfq0;k{!g8kL!fWd7JWVKk=TW_h+;{gVQ4#4B9UFH*+r2tH1sK-~ z*vh1tZ&^t}bu=6ABmxawNlj`Fe*hCP@6Uc}VnP=GY?DaH96l#@5CLDKXqc^Q?zLz3=UW3cx9|=cU3PdwwFu%z}ao9(xK^39i1?c*ZX6$o`n%bVgR$ zEUS!FZ`a-G8RvHNO_$M7EFLX=aS+Gz+LzK9JUOfb9Q z+23@H{rD#^ZtC*AafPQ5Fd)uj{a|Fz6JPz>K+~341W=^ZWwB*5zdAUPY-&~9M4oxr z*CrEQo?bf4zECF_k{Xig?yyznLm2&dneI8ivi3m*F5WvIN4*)Kaabolt7{aPz5guc zOEDty`EQHC0495mI`ocq^@d5#fcJSW$~@;z)a7FFyUfR|Nq~P#(CD2CxIhhKfv&ML zn?NY=!ZuN0&Ukq5rfh6W!uHzURcirge;Y={d2sz?_=QE#Xh3Ugn|ei#N}0%YGm`{v z+Hzr`Zp)YGlJMp)zPJn3Ulw*e??aZ%W9nRF<77&&PUE9|C8f7ih*bIN;dtx5K9b@^Ui_;`98G#tG`f4^fP zl1~X|Js1stmBSgDi3T_kFcQzB1oLeCxV~6LO9J=Q5_Bzx^-@u&5*MenIT>B5` zDQS3;1OBZ^U_&2~23y(wb^T<~p3D5aYG2#DOH~=xcZz=Usr1EeqKeF2M`~ykU5>A; z@`ZQ^8r~NN0JtZcG)mltPK+@AWK^_EQ5K^L&5eOLmd?H;KMQ~4MdICMd@xjq6%|lD zRH($6|3$O*ILDvjO?sPv+#4HM?8YmNb#-)K;}O}&nQiAk zdW8xJ_`$iW8)AY`o$|HP zT9M+$Y!-xx;r-HUzOoUuyPeWG@zL#lPmIK9sD@rybPL zT5}c_YVTbpI4|<1>r|+u>w|7mLi!HW<{iZK!2{X4uU;FV>YNL}L9zy^A6 zhzv~&kn6%K?evndBaL0^ecwwWrS_xXyP5++=%t%+x0yAf|E*`2_gmKBeK~z-hZ$sX zAh5&C&t}IHk-6h(GOFW}>?tK*+0mKJrktkemK@hJ5WyYzbpnrcf?SKSD%n+{3$NKX|J=3{2!AU0~s3gsCT4vZOpf?U@TOeR4mq)m6>b9$(`w3^Uj;(&=hw>@v zK?&-?2Fvq_B*#IXK+!Y#Nj2}efu2h(yo34-ov8{)41c2l&%l+1DL3+F#p*rEr=)pz z%5A2dLMyfUx%WI;sv7U5YI&<%jt%8Say9Dq9eloQu-jrIEFpw@w(frggQEa z*PhrlByW(An|9EJ!$>h7WT8^vkTB?Q5^{>SruU|{a-)a5;nTS-_<&*YreNbr(46R~ zTkpWhsW7Yqqu(QESnNx+J_ZeQ>*7;_2@Q zNow?-y_FjqHi>|`k4z~^Qfb{lX{v_Pn7vg%uH*+dEm+kzNvfLGfs1;&ON?1EP+V3> zHhh=O+39+bxAAZTp}3o55LO|6k@m%$-)7DRvnQ_6N4rQ$-aLj?so|KTl)#L5!%B;{ ziGtjPqcX3}rgc9o91Rd-`lD8ryGmDfjih~0DqTJU7?Z)$>4)8!14-_3#R|5}>b_PN zPAB1p&%S)o$i-JF&qU=z Date: Mon, 2 Mar 2020 01:32:27 -0500 Subject: [PATCH 02/10] Added midpoint rooting --- README.md | 4 +++- scripts/generate_heatmap.R | 46 +++++++++++++++++--------------------- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index d7ac81b..10d8729 100644 --- a/README.md +++ b/README.md @@ -92,9 +92,11 @@ mkdir -p testing/outputs # Make sure backblast is added to your PATH before running the test backblast run testing/inputs/config.yaml testing/outputs --notemp -# See if the output file looks as expected +# See if the output files looks as expected cmp testing/outputs/blast/combine_blast_tables/blast_tables_combined.csv \ testing/outputs_expected/blast/combine_blast_tables/blast_tables_combined.csv +cmp testing/outputs/heatmap/BackBLAST_heatmap.tsv \ + testing/outputs_expected/heatmap/BackBLAST_heatmap.tsv # Clean up test if everything looks good rm -r testing/outputs diff --git a/scripts/generate_heatmap.R b/scripts/generate_heatmap.R index 8695764..34fc59a 100755 --- a/scripts/generate_heatmap.R +++ b/scripts/generate_heatmap.R @@ -66,30 +66,27 @@ convert_to_constant_NA <- function(entry) { #' @return ggtree-format tree, rooted #' @export reroot_ggtree <- function(phylo_tree, root_name) { - - # Extract the data from the tree in tabular format - tree_data <- ggtree::ggtree(phylo_tree)$data - - # Check that the root_name exists - if ( !(root_name %in% tree_data$label) ) { - futile.logger::flog.error(glue::glue("Could not find the root_name '", root_name, - "' in the provided tree. Cannot re-root. Exiting...")) - quit(save = "no", status = 1) - } - - # Get the ggtree ID # of the to-be root - tip_label_index <- match(x = root_name, table = phylo_tree$tip.label) - - # Check that only one matching parent node exists - if (length(tip_label_index) != 1) { - futile.logger::flog.error(glue::glue("The provided root_name '", root_name, - "' appears to have more than one associated node. Cannot re-root. Exiting...")) - quit(save = "no", status = 1) + + # Midpoint root the tree if requested; otherwise root to a name + if (root_name == "midpoint") { + futile.logger::flog.info("Midpoint rooting the tree") + tree_rooted <- phytools::midpoint.root(phylo_tree) + } else { + futile.logger::flog.info(glue::glue("Re-rooting tree to '", root_name, "'")) + # Extract the data from the tree in tabular format + tree_data <- ggtree::ggtree(phylo_tree)$data + + # Check that the root_name exists + if ( !(root_name %in% tree_data$label) ) { + futile.logger::flog.error(glue::glue("Could not find the root_name '", root_name, + "' in the provided tree. Cannot re-root. Exiting...")) + quit(save = "no", status = 1) + } + + # Re-root + tree_rooted <- treeio::root(phylo_tree, outgroup = root_name) } - - # Re-root - tree_rooted <- treeio::root(phylo_tree, outgroup = root_name) - + return(tree_rooted) } @@ -174,7 +171,6 @@ load_and_plot_phylogenetic_tree <- function(input_phylogenetic_tree_filepath, ro # Optionally re-root tree if ( !is.na(root_name) ) { - futile.logger::flog.info(glue::glue("Re-rooting tree to '", root_name, "'")) phylo_tree <- reroot_ggtree(phylo_tree, root_name) } @@ -562,7 +558,7 @@ if ( !interactive() ) { help = "Bootstrap cutoff value", type = "numeric", default = NA) parser <- argparser::add_argument(parser = parser, arg = "--root_name", short = "-r", - help = "Root name", + help = "Root name ('midpoint' to midpoint root or NA to keep the existing root; default NA)", type = "character", default = NA) parser <- argparser::add_argument(parser = parser, arg = "--plot_width", short = "-w", help = "Plot width (mm)", From dee5dff13089a50c9d8647a8f76a38dc708cc6c6 Mon Sep 17 00:00:00 2001 From: "Jackson M. Tsuji" Date: Mon, 2 Mar 2020 01:33:43 -0500 Subject: [PATCH 03/10] Improved docs --- snakemake/template_config.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/snakemake/template_config.yaml b/snakemake/template_config.yaml index d75d5a8..e78291d 100644 --- a/snakemake/template_config.yaml +++ b/snakemake/template_config.yaml @@ -13,7 +13,8 @@ threads: 1 phylogenetic_tree_newick: NA # Optional: set the cutoff for displayed bootstrap values. Anything greater than to equal to the cutoff will be shown. bootstrap_cutoff: 80 -# Optional: if you want to re-root your tree, then give the exact name of the tip you want to re-root by. Otherwise, 'NA'. +# Optional: if you want to re-root your tree, then give the exact name of the tip you want to re-root by. + # 'midpoint' will midpoint root the tree. 'NA' will keep the current tree topology. root_name: NA From bafc5551e82d4c60a8324e1d2d1aaabea5962d77 Mon Sep 17 00:00:00 2001 From: "Jackson M. Tsuji" Date: Wed, 18 Aug 2021 16:10:13 +0900 Subject: [PATCH 04/10] Simplified library loading --- scripts/generate_heatmap.R | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/scripts/generate_heatmap.R b/scripts/generate_heatmap.R index 34fc59a..0504443 100755 --- a/scripts/generate_heatmap.R +++ b/scripts/generate_heatmap.R @@ -1,33 +1,29 @@ #!/usr/bin/env Rscript # generate_heatmap.R -# Copyright Lee H. Bergstrand and Jackson M. Tsuji, 2019 +# Copyright Lee H. Bergstrand and Jackson M. Tsuji, 2021 # Plots a newick treefile and BLAST table together as a phylogenetic tree and heatmap # Part of the BackBLAST pipeline # Load libraries -# Note: warn.conflicts: Normally, when a library is loaded that has a function with identical - # name to another function (e.g., setdiff() in dplyr), a warning is given during package - # load. The warnings are disabled here to prevent excessive messages when the script is run. - # Long-term, once possible to switch to R 3.6.0, options(conflicts.policy(list(warn = FALSE))) - # can be used. +library(conflicted) library(argparser) library(futile.logger) library(tools) -library(glue, warn.conflicts = FALSE) -library(plyr, warn.conflicts = FALSE) -library(dplyr, warn.conflicts = FALSE) -library(tibble, warn.conflicts = FALSE) -library(reshape2, warn.conflicts = FALSE) -library(RColorBrewer, warn.conflicts = FALSE) -library(ggplot2, warn.conflicts = FALSE) -library(ape, warn.conflicts = FALSE) -library(maps, warn.conflicts = FALSE) -library(phytools, warn.conflicts = FALSE) -library(tidytree, warn.conflicts = FALSE) -library(treeio, warn.conflicts = FALSE) +library(glue) +library(plyr) +library(dplyr) +library(tibble) +library(reshape2) +library(RColorBrewer) +library(ggplot2) +library(ape) +library(maps) +library(phytools) +library(tidytree) +suppressPackageStartupMessages(library(treeio)) suppressPackageStartupMessages(library(ggtree)) -library(gridExtra, warn.conflicts = FALSE) -library(egg, warn.conflicts = FALSE) +library(gridExtra) +library(egg) #' Reads a data table as a tibble with several default parameters. All parameters below are the same as read.table() #' From ca443a2a6b314b321550c760741894d8c95bc565 Mon Sep 17 00:00:00 2001 From: "Jackson M. Tsuji" Date: Wed, 18 Aug 2021 16:50:53 +0900 Subject: [PATCH 05/10] Updated R package versions --- README.md | 2 +- envs/conda_requirements.yaml | 47 ++++++++++++++++++------------------ envs/gtotree.yaml | 2 +- scripts/generate_heatmap.R | 2 +- 4 files changed, 27 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 10d8729..2947751 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ BackBLAST_Reciprocal_BLAST ========================== [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3465955.svg)](https://doi.org/10.5281/zenodo.3465955) -Copyright Lee H. Bergstrand and Jackson M. Tsuji, 2019 +Copyright Lee H. Bergstrand and Jackson M. Tsuji, 2021 This repository contains a reciprocal BLAST program for filtering down BLAST results to best bidirectional hits. It also contains a toolkit for finding and visualizing BLAST hits for gene clusters within multiple bacterial genomes. diff --git a/envs/conda_requirements.yaml b/envs/conda_requirements.yaml index db5ee2f..3cb6a9b 100644 --- a/envs/conda_requirements.yaml +++ b/envs/conda_requirements.yaml @@ -4,26 +4,27 @@ channels: - bioconda - defaults dependencies: - - python=3.7.6 - - snakemake=5.10.0 - - biopython=1.76 - - blast=2.9.0 - - r-base=3.6.2 - - r-getopt=1.20.3 - - r-argparser=0.4 - - r-futile.logger=1.4.3 - - r-glue=1.3.1 - - r-plyr=1.8.4 - - r-dplyr=0.8.0.1 - - r-tibble=2.1.1 - - r-reshape2=1.4.3 - - r-rcolorbrewer=1.1_2 - - r-ggplot2=3.1.1 - - r-ape=5.3 - - r-maps=3.3.0 - - r-phytools=0.6_99 - - r-tidytree=0.3.1 - - bioconductor-treeio=1.10.0 - - bioconductor-ggtree=2.0.0 - - r-gridextra=2.3 - - r-egg=0.4.5 + - python>=3.8 + - snakemake>=6 + - biopython>=1.76 + - blast>=2.9.0 + - r-base>=4 + - r-conflicted + - r-getopt>=1.20.3 + - r-argparser>=0.4 + - r-futile.logger>=1.4.3 + - r-glue>=1.3.1 + - r-plyr>=1.8.4 + - r-dplyr<=1.0.5 + - r-tibble>=2.1.1 + - r-reshape2>=1.4.3 + - r-rcolorbrewer>=1.1_2 + - r-ggplot2>=3.1.1 + - r-ape>=5.3 + - r-maps>=3.3.0 + - r-phytools>=0.6_99 + - r-tidytree>=0.3.1 + - bioconductor-treeio>=1.10.0 + - bioconductor-ggtree>=3.0.0 + - r-gridextra>=2.3 + - r-egg>=0.4.5 diff --git a/envs/gtotree.yaml b/envs/gtotree.yaml index 0635022..f9fe5ab 100644 --- a/envs/gtotree.yaml +++ b/envs/gtotree.yaml @@ -4,4 +4,4 @@ channels: - astrobiomike - defaults dependencies: - - gtotree=1.4.3 + - gtotree>=1.6 diff --git a/scripts/generate_heatmap.R b/scripts/generate_heatmap.R index 0504443..d0eae72 100755 --- a/scripts/generate_heatmap.R +++ b/scripts/generate_heatmap.R @@ -163,7 +163,7 @@ plot_ggtree <- function(phylo_tree, bootstrap_label_data) { load_and_plot_phylogenetic_tree <- function(input_phylogenetic_tree_filepath, root_name, bootstrap_cutoff) { # Read tree futile.logger::flog.info("Reading input phylogenetic tree") - phylo_tree <- ape::read.tree(input_phylogenetic_tree_filepath) + phylo_tree <- treeio::read.tree(input_phylogenetic_tree_filepath) # Optionally re-root tree if ( !is.na(root_name) ) { From f9cbcb9792c044aec6a3df49d32600b9c7d870ae Mon Sep 17 00:00:00 2001 From: "Jackson M. Tsuji" Date: Wed, 18 Aug 2021 16:52:56 +0900 Subject: [PATCH 06/10] Recommend mamba --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2947751..aa46531 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,8 @@ cd BackBLAST_Reciprocal_BLAST git checkout develop # optionally go to a specific branch # Create the conda env based on the YAML file in the repo -conda env create -n backblast --file=envs/conda_requirements.yaml +# It is recommended that you run this command using mamba instead of conda to save time - but conda should also work. +mamba env create -n backblast --file=envs/conda_requirements.yaml # Copy the key repo contents into a conda share folder conda activate backblast From 9606c0f8290d08c2857654c87ceb41e6ba2af0aa Mon Sep 17 00:00:00 2001 From: "Jackson M. Tsuji" Date: Wed, 18 Aug 2021 09:02:10 -0400 Subject: [PATCH 07/10] Updated dplyr version requirements --- envs/conda_requirements.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/envs/conda_requirements.yaml b/envs/conda_requirements.yaml index 3cb6a9b..f997af1 100644 --- a/envs/conda_requirements.yaml +++ b/envs/conda_requirements.yaml @@ -15,7 +15,7 @@ dependencies: - r-futile.logger>=1.4.3 - r-glue>=1.3.1 - r-plyr>=1.8.4 - - r-dplyr<=1.0.5 + - r-dplyr>=1.0.5 - r-tibble>=2.1.1 - r-reshape2>=1.4.3 - r-rcolorbrewer>=1.1_2 From 3625e1d6a2b71429bf9f26b71dadcb25d227bdb0 Mon Sep 17 00:00:00 2001 From: "Jackson M. Tsuji" Date: Thu, 19 Aug 2021 10:50:33 +0900 Subject: [PATCH 08/10] Fix versions rather than using open-ended versioning --- envs/conda_requirements.yaml | 47 ++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/envs/conda_requirements.yaml b/envs/conda_requirements.yaml index 3cb6a9b..9e791ce 100644 --- a/envs/conda_requirements.yaml +++ b/envs/conda_requirements.yaml @@ -4,27 +4,28 @@ channels: - bioconda - defaults dependencies: - - python>=3.8 - - snakemake>=6 - - biopython>=1.76 - - blast>=2.9.0 - - r-base>=4 + - python=3.9 + - snakemake=6 + - biopython>=1.76,<=1.79 + - blast>=2.9,<=2.12 + - r-base>=4.0,<=4.2 + - r-codetools - r-conflicted - - r-getopt>=1.20.3 - - r-argparser>=0.4 - - r-futile.logger>=1.4.3 - - r-glue>=1.3.1 - - r-plyr>=1.8.4 - - r-dplyr<=1.0.5 - - r-tibble>=2.1.1 - - r-reshape2>=1.4.3 - - r-rcolorbrewer>=1.1_2 - - r-ggplot2>=3.1.1 - - r-ape>=5.3 - - r-maps>=3.3.0 - - r-phytools>=0.6_99 - - r-tidytree>=0.3.1 - - bioconductor-treeio>=1.10.0 - - bioconductor-ggtree>=3.0.0 - - r-gridextra>=2.3 - - r-egg>=0.4.5 + - r-getopt=1.20 + - r-argparser=0.7 + - r-futile.logger=1.4 + - r-glue=1 + - r-plyr=1.8 + - r-dplyr>=1.0.5,<=1.0.7 + - r-tibble=3.1 + - r-reshape2=1.4 + - r-rcolorbrewer=1.1_2 + - r-ggplot2=3 + - r-ape>=5.3,<=5.5 + - r-maps=3.3 + - r-phytools>=0.6_99,<=0.7_80 + - r-tidytree=0.3 + - bioconductor-treeio>=1.10,<=1.17 + - bioconductor-ggtree=3.0 + - r-gridextra=2.3 + - r-egg=0.4 From 30e14fa7957f4917bb9cbf91cbea09a6ed46337f Mon Sep 17 00:00:00 2001 From: "Jackson M. Tsuji" Date: Thu, 19 Aug 2021 10:50:43 +0900 Subject: [PATCH 09/10] Bump version --- README.md | 2 +- backblast | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index aa46531..d96ce67 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ cd BackBLAST_Reciprocal_BLAST git checkout develop # optionally go to a specific branch # Create the conda env based on the YAML file in the repo -# It is recommended that you run this command using mamba instead of conda to save time - but conda should also work. +# It is recommended that you run this command using mamba instead of conda - conda might fail during install. mamba env create -n backblast --file=envs/conda_requirements.yaml # Copy the key repo contents into a conda share folder diff --git a/backblast b/backblast index 3423d3e..c1aa38f 100755 --- a/backblast +++ b/backblast @@ -1,11 +1,11 @@ #!/usr/bin/env bash set -euo pipefail # BackBLAST -# Copyright Lee H. Bergstrand and Jackson M. Tsuji, 2019 +# Copyright Lee H. Bergstrand and Jackson M. Tsuji, 2021 # The entry script for running BackBLAST via command line # GLOBAL variables -readonly VERSION="2.0.0-alpha" +readonly VERSION="2.0.0-alpha5" readonly SCRIPT_NAME="${0##*/}" readonly SCRIPT_DIR="$(realpath ${0%/*})" readonly UTILS_DIR="${SCRIPT_DIR}/scripts" From 9ab5ce37086e39ecb8fc0ad8240c2031c3948e25 Mon Sep 17 00:00:00 2001 From: "Jackson M. Tsuji" Date: Thu, 19 Aug 2021 10:55:19 +0900 Subject: [PATCH 10/10] Using fixed version for gtotree --- envs/gtotree.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/envs/gtotree.yaml b/envs/gtotree.yaml index f9fe5ab..84c6d88 100644 --- a/envs/gtotree.yaml +++ b/envs/gtotree.yaml @@ -4,4 +4,4 @@ channels: - astrobiomike - defaults dependencies: - - gtotree>=1.6 + - gtotree=1.6