Skip to content

Commit

Permalink
Merge pull request #54 from LeeBergstrand/update_R
Browse files Browse the repository at this point in the history
Update R code and conda install
  • Loading branch information
Jackson M. Tsuji authored Sep 2, 2021
2 parents dd2ddb6 + 9ab5ce3 commit 8bc563f
Show file tree
Hide file tree
Showing 11 changed files with 164 additions and 140 deletions.
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ BackBLAST_Reciprocal_BLAST
==========================
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3465955.svg)](https://doi.org/10.5281/zenodo.3465955)

Copyright Lee H. Bergstrand and Jackson M. Tsuji, 2020
Copyright Lee H. Bergstrand and Jackson M. Tsuji, 2021

# Software overview
(To be updated once BackBLAST2 is complete)
Expand Down Expand Up @@ -34,7 +34,8 @@ cd BackBLAST_Reciprocal_BLAST
git checkout develop # optionally go to a specific branch or version tag

# Create the conda env based on the YAML file in the repo
conda env create -n backblast --file=envs/conda_requirements.yaml
# It is recommended that you run this command using mamba instead of conda - conda might fail during install.
mamba env create -n backblast --file=envs/conda_requirements.yaml

# Copy the key repo contents into a conda share folder
conda activate backblast
Expand Down Expand Up @@ -89,9 +90,11 @@ mkdir -p testing/outputs
# Make sure backblast is added to your PATH before running the test
backblast run testing/inputs/config.yaml testing/outputs --notemp

# See if the output file looks as expected
# See if the output files looks as expected
cmp testing/outputs/blast/combine_blast_tables/blast_tables_combined.csv \
testing/outputs_expected/blast/combine_blast_tables/blast_tables_combined.csv
cmp testing/outputs/heatmap/BackBLAST_heatmap.tsv \
testing/outputs_expected/heatmap/BackBLAST_heatmap.tsv

# Clean up test if everything looks good
rm -r testing/outputs
Expand Down
4 changes: 2 additions & 2 deletions backblast
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#!/usr/bin/env bash
set -euo pipefail
# BackBLAST
# Copyright Lee H. Bergstrand and Jackson M. Tsuji, 2019
# Copyright Lee H. Bergstrand and Jackson M. Tsuji, 2021
# The entry script for running BackBLAST via command line

# GLOBAL variables
readonly VERSION="2.0.0-alpha4"
readonly VERSION="2.0.0-alpha5"
readonly SCRIPT_NAME="${0##*/}"
readonly SCRIPT_DIR="$(realpath ${0%/*})"
readonly UTILS_DIR="${SCRIPT_DIR}/scripts"
Expand Down
46 changes: 24 additions & 22 deletions envs/conda_requirements.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,28 @@ channels:
- bioconda
- defaults
dependencies:
- python=3.7
- snakemake=5.5.4
- biopython=1.74
- blast=2.9
- r=3.5.1
- r-getopt=1.20.2
- r-argparser=0.4
- r-futile.logger=1.4.3
- r-glue=1.3.0
- r-plyr=1.8.4
- r-dplyr=0.7.6
- r-tibble=2.1.3
- r-reshape2=1.4.3
- r-rcolorbrewer=1.1
- r-ggplot2=3.1.1
- r-ape=5.2
- r-maps=3.3.0
- r-phytools=0.6
- r-tidytree=0.2.5
- bioconductor-treeio=1.6.1
- bioconductor-ggtree=1.14.4
- python=3.9
- snakemake=6
- biopython>=1.76,<=1.79
- blast>=2.9,<=2.12
- r-base>=4.0,<=4.2
- r-codetools
- r-conflicted
- r-getopt=1.20
- r-argparser=0.7
- r-futile.logger=1.4
- r-glue=1
- r-plyr=1.8
- r-dplyr>=1.0.5,<=1.0.7
- r-tibble=3.1
- r-reshape2=1.4
- r-rcolorbrewer=1.1_2
- r-ggplot2=3
- r-ape>=5.3,<=5.5
- r-maps=3.3
- r-phytools>=0.6_99,<=0.7_80
- r-tidytree=0.3
- bioconductor-treeio>=1.10,<=1.17
- bioconductor-ggtree=3.0
- r-gridextra=2.3
- r-egg=0.4.5
- r-egg=0.4
2 changes: 1 addition & 1 deletion envs/gtotree.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ channels:
- astrobiomike
- defaults
dependencies:
- gtotree=1.4.3
- gtotree=1.6
3 changes: 2 additions & 1 deletion scripts/combine_tables.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ main <- function() {
output_table <- dplyr::bind_rows(blast_tables)

flog.info("Writing combining BLAST table to file (**with headers**)")
write.table(output_table, file = params$output_filename, sep = ",", row.names = FALSE, col.names = TRUE)
write.table(output_table, file = params$output_filename, sep = ",", row.names = FALSE,
col.names = TRUE, quote = FALSE)

flog.info("combine_tables.R: Done.")
}
Expand Down
123 changes: 52 additions & 71 deletions scripts/generate_heatmap.R
Original file line number Diff line number Diff line change
@@ -1,31 +1,29 @@
#!/usr/bin/env Rscript
# generate_heatmap.R
# Copyright Lee H. Bergstrand and Jackson M. Tsuji, 2019
# Copyright Lee H. Bergstrand and Jackson M. Tsuji, 2021
# Plots a newick treefile and BLAST table together as a phylogenetic tree and heatmap
# Part of the BackBLAST pipeline

# Load libraries
# Note: warn.conflicts: Normally, when a library is loaded that has a function with identical
# name to another function (e.g., setdiff() in dplyr), a warning is given during package
# load. The warnings are disabled here to prevent excessive messages when the script is run.
# Long-term, once possible to switch to R 3.6.0, options(conflicts.policy(list(warn = FALSE)))
# can be used.
library(conflicted)
library(argparser)
library(futile.logger)
library(glue, warn.conflicts = FALSE)
library(plyr, warn.conflicts = FALSE)
library(dplyr, warn.conflicts = FALSE)
library(tibble, warn.conflicts = FALSE)
library(reshape2, warn.conflicts = FALSE)
library(RColorBrewer, warn.conflicts = FALSE)
library(ggplot2, warn.conflicts = FALSE)
library(ape, warn.conflicts = FALSE)
library(maps, warn.conflicts = FALSE)
library(phytools, warn.conflicts = FALSE)
library(tidytree, warn.conflicts = FALSE)
library(tools)
library(glue)
library(plyr)
library(dplyr)
library(tibble)
library(reshape2)
library(RColorBrewer)
library(ggplot2)
library(ape)
library(maps)
library(phytools)
library(tidytree)
suppressPackageStartupMessages(library(treeio))
suppressPackageStartupMessages(library(ggtree))
library(gridExtra, warn.conflicts = FALSE)
library(egg, warn.conflicts = FALSE)
library(gridExtra)
library(egg)

#' Reads a data table as a tibble with several default parameters. All parameters below are the same as read.table()
#'
Expand All @@ -41,31 +39,6 @@ read_tibble <- function(file, sep = "\t", header = TRUE, stringsAsFactors = FALS
return(data_table)
}

#' Chooses a nice discrete colour scale of the desired length
#'
#' @param length Numeric; length of the desired colour scale vector
#' @return Character vector of HTML colour codes
#' @export
choose_discrete_colour_scale <- function(length) {

# Choose the best colour scale based on the number of entries to be plotted
if (length == 2) {
colour_palette <- RColorBrewer::brewer.pal(n = 3, name = "Dark2")[c(1, 2)]
} else if (length <= 8) {
colour_palette <- RColorBrewer::brewer.pal(n = length, name = "Dark2")
} else if (length <= 12) {
colour_palette <- RColorBrewer::brewer.pal(n = length, name = "Set3")
} else if (length > 12) {
colour_palette <- scales::hue_pal(h = c(20, 290))(length)
} else {
futile.logger::flog.error(glue::glue("Something is wrong with the provided length ('", length,
"'). Is it non-numeric? Exiting..."))
quit(save = "no", status = 1)
}

return(colour_palette)
}

#' Convert character "NA" (from command line) into constant NA
#'
#' @param entry single-length vector of any type
Expand All @@ -89,30 +62,27 @@ convert_to_constant_NA <- function(entry) {
#' @return ggtree-format tree, rooted
#' @export
reroot_ggtree <- function(phylo_tree, root_name) {
# Extract the data from the tree in tabular format
tree_data <- ggtree::ggtree(phylo_tree)$data

# Check that the root_name exists
if ( !(root_name %in% tree_data$label) ) {
futile.logger::flog.error(glue::glue("Could not find the root_name '", root_name,
"' in the provided tree. Cannot re-root. Exiting..."))
quit(save = "no", status = 1)
}

# Get the ggtree ID # of the to-be root
tip_label_index <- match(x = root_name, table = phylo_tree$tip.label)

# Check that only one matching parent node exists
if (length(tip_label_index) != 1) {
futile.logger::flog.error(glue::glue("The provided root_name '", root_name,
"' appears to have more than one associated node. Cannot re-root. Exiting..."))
quit(save = "no", status = 1)

# Midpoint root the tree if requested; otherwise root to a name
if (root_name == "midpoint") {
futile.logger::flog.info("Midpoint rooting the tree")
tree_rooted <- phytools::midpoint.root(phylo_tree)
} else {
futile.logger::flog.info(glue::glue("Re-rooting tree to '", root_name, "'"))
# Extract the data from the tree in tabular format
tree_data <- ggtree::ggtree(phylo_tree)$data

# Check that the root_name exists
if ( !(root_name %in% tree_data$label) ) {
futile.logger::flog.error(glue::glue("Could not find the root_name '", root_name,
"' in the provided tree. Cannot re-root. Exiting..."))
quit(save = "no", status = 1)
}

# Re-root
tree_rooted <- treeio::root(phylo_tree, outgroup = root_name)
}

# Re-root
tree_rooted <- ggtree::reroot(phylo_tree, node = tip_label_index)


return(tree_rooted)
}

Expand Down Expand Up @@ -193,11 +163,10 @@ plot_ggtree <- function(phylo_tree, bootstrap_label_data) {
load_and_plot_phylogenetic_tree <- function(input_phylogenetic_tree_filepath, root_name, bootstrap_cutoff) {
# Read tree
futile.logger::flog.info("Reading input phylogenetic tree")
phylo_tree <- ape::read.tree(input_phylogenetic_tree_filepath)
phylo_tree <- treeio::read.tree(input_phylogenetic_tree_filepath)

# Optionally re-root tree
if ( !is.na(root_name) ) {
futile.logger::flog.info(glue::glue("Re-rooting tree to '", root_name, "'"))
phylo_tree <- reroot_ggtree(phylo_tree, root_name)
}

Expand Down Expand Up @@ -497,6 +466,7 @@ main <- function(params) {
params$gene_metadata_filepath))
futile.logger::flog.info(glue::glue("Plot width (mm): ", params$plot_width))
futile.logger::flog.info(glue::glue("Plot height (mm): ", params$plot_height))
futile.logger::flog.info(glue::glue("Write data table: ", params$write_data))
futile.logger::flog.info("############################")

# Convert character "NA" (from command line) into true NA
Expand All @@ -521,7 +491,15 @@ main <- function(params) {
blast_results_list <- load_and_plot_blast_results(params$input_blast_table_filepath,
tip_order, params$gene_metadata_filepath,
params$genome_metadata_filepath)


# Save the heatmap data
if (params$write_data == TRUE) {
futile.logger::flog.info("Saving raw heatmap data to file")
output_table_filepath = paste(tools::file_path_sans_ext(params$output_pdf_filepath), ".tsv", sep = "")
write.table(blast_results_list[[1]], file = output_table_filepath, sep = "\t",
col.names = TRUE, row.names = FALSE, quote = FALSE)
}

# Save the plot
if (!is.na(params$input_phylogenetic_tree_filepath)) {
# Combine the tree and heatmap
Expand Down Expand Up @@ -576,14 +554,17 @@ if ( !interactive() ) {
help = "Bootstrap cutoff value",
type = "numeric", default = NA)
parser <- argparser::add_argument(parser = parser, arg = "--root_name", short = "-r",
help = "Root name",
help = "Root name ('midpoint' to midpoint root or NA to keep the existing root; default NA)",
type = "character", default = NA)
parser <- argparser::add_argument(parser = parser, arg = "--plot_width", short = "-w",
help = "Plot width (mm)",
type = "numeric", default = 400)
parser <- argparser::add_argument(parser = parser, arg = "--plot_height", short = "-z",
help = "Plot height (mm)",
type = "numeric", default = 200)
parser <- argparser::add_argument(parser = parser, arg = "--write_data", short = "-d",
help = "Write raw plotting data to disk (same basepath as the PDF, but as a .tsv file)",
flag = TRUE)

params <- argparser::parse_args(parser)

Expand Down
2 changes: 1 addition & 1 deletion snakemake/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ rule generate_heatmap:
plot_height = config.get("plot_height_mm", 200)
shell:
"backblast generate_heatmap -m {params.genome_metadata} -g {params.gene_metadata} "
"-b {params.bootstrap_cutoff} -r {params.root_name} -w {params.plot_width} -z {params.plot_height} "
"-b {params.bootstrap_cutoff} -r {params.root_name} -w {params.plot_width} -z {params.plot_height} -d "
"{input.tree_file} {input.blast_table} {output} 2>&1 | tee {log} && "
"if [[ -f Rplots.pdf ]]; then rm Rplots.pdf; fi"

Expand Down
3 changes: 2 additions & 1 deletion snakemake/template_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ threads: 1
phylogenetic_tree_newick: NA
# Optional: set the cutoff for displayed bootstrap values. Anything greater than to equal to the cutoff will be shown.
bootstrap_cutoff: 80
# Optional: if you want to re-root your tree, then give the exact name of the tip you want to re-root by. Otherwise, 'NA'.
# Optional: if you want to re-root your tree, then give the exact name of the tip you want to re-root by.
# 'midpoint' will midpoint root the tree. 'NA' will keep the current tree topology.
root_name: NA


Expand Down
Original file line number Diff line number Diff line change
@@ -1,38 +1,38 @@
"subject_name","qseqid","sseqid","pident","evalue","qcovhsp","bitscore"
"Chl_ferrooxidans_KoFox","WP_006365810.1","WP_006365810.1",100,5.58e-115,100,313
"Chl_ferrooxidans_KoFox","WP_006367305.1","WP_006367305.1",100,0,100,602
"Chl_ferrooxidans_KoFox","WP_006367307.1","WP_006367307.1",100,0,100,493
"Chl_ferrooxidans_KoFox","WP_006367308.1","WP_006367308.1",100,0,100,611
"Chl_ferrooxidans_KoFox","WP_006367309.1","WP_006367309.1",100,0,100,1232
"Chl_ferrooxidans_KoFox","WP_006367310.1","WP_006367310.1",100,1.25e-114,100,312
"Chl_ferrooxidans_KoFox","WP_006367320.1","WP_006367320.1",100,0,100,732
"Chl_ferrooxidans_KoFox","WP_006367321.1","WP_006367321.1",100,0,100,572
"Chl_ferrooxidans_KoFox","WP_006367322.1","WP_006367322.1",100,0,100,555
"Chl_ferrooxidans_KoFox","WP_006367323.1","WP_006367323.1",100,0,100,690
"Chl_ferrooxidans_KoFox","WP_006367388.1","WP_006367388.1",100,0,100,839
"Chl_ferrooxidans_KoFox","WP_006367389.1","WP_006367389.1",100,3.82e-90,100,248
"Chl_luteolum_DSM_273","WP_006365810.1","WP_011356871.1",71.429,4.53e-83,99,233
"Chl_luteolum_DSM_273","WP_006367305.1","WP_011358288.1",53.004,3.83e-113,96,321
"Chl_luteolum_DSM_273","WP_006367307.1","WP_011358287.1",67.257,1.85e-116,95,324
"Chl_luteolum_DSM_273","WP_006367308.1","WP_011358286.1",79.522,0,100,505
"Chl_luteolum_DSM_273","WP_006367309.1","WP_011358285.1",71.953,0,100,887
"Chl_luteolum_DSM_273","WP_006367310.1","WP_041463880.1",53.147,8.67e-59,95,171
"Chl_luteolum_DSM_273","WP_006367320.1","WP_011358284.1",75.35,0,100,560
"Chl_luteolum_DSM_273","WP_006367321.1","WP_011358283.1",71.831,5.83e-144,98,398
"Chl_luteolum_DSM_273","WP_006367322.1","WP_011358282.1",77.256,3.78e-157,100,430
"Chl_luteolum_DSM_273","WP_006367323.1","WP_011358281.1",76.347,0,98,528
"Chl_luteolum_DSM_273","WP_006367388.1","WP_011358173.1",53.364,2.54e-149,100,421
"Chl_phaeoferrooxidans_KB01","WP_006365810.1","WP_076790319.1",98.71,6.57e-114,100,311
"Chl_phaeoferrooxidans_KB01","WP_006367305.1","WP_076789648.1",93.151,0,100,571
"Chl_phaeoferrooxidans_KB01","WP_006367307.1","WP_076790657.1",95.359,3.02e-175,100,473
"Chl_phaeoferrooxidans_KB01","WP_006367308.1","WP_076790659.1",97.952,0,100,603
"Chl_phaeoferrooxidans_KB01","WP_006367309.1","WP_076790661.1",97.496,0,100,1201
"Chl_phaeoferrooxidans_KB01","WP_006367310.1","WP_076790662.1",82.667,6.27e-95,100,263
"Chl_phaeoferrooxidans_KB01","WP_006367320.1","WP_076790666.1",96.078,0,100,704
"Chl_phaeoferrooxidans_KB01","WP_006367321.1","WP_076790668.1",93.426,0,100,543
"Chl_phaeoferrooxidans_KB01","WP_006367322.1","WP_076790669.1",98.917,0,100,552
"Chl_phaeoferrooxidans_KB01","WP_006367323.1","WP_076790671.1",97.345,0,100,674
"Chl_phaeoferrooxidans_KB01","WP_006367388.1","WP_076792910.1",89.538,0,100,744
"Chl_phaeoferrooxidans_KB01","WP_006367389.1","WP_076793039.1",94.915,7.5e-87,100,239
"Chl_sp_N1","WP_006367388.1","WP_131354685.1",57.178,5.41e-158,98,442
"Ignavibacterium_album_JCM_16511_outgroup","WP_006367320.1","WP_014560057.1",44.558,2.72e-82,79,246
subject_name,qseqid,sseqid,pident,evalue,qcovhsp,bitscore
Chl_ferrooxidans_KoFox,WP_006365810.1,WP_006365810.1,100,5.58e-115,100,313
Chl_ferrooxidans_KoFox,WP_006367305.1,WP_006367305.1,100,0,100,602
Chl_ferrooxidans_KoFox,WP_006367307.1,WP_006367307.1,100,0,100,493
Chl_ferrooxidans_KoFox,WP_006367308.1,WP_006367308.1,100,0,100,611
Chl_ferrooxidans_KoFox,WP_006367309.1,WP_006367309.1,100,0,100,1232
Chl_ferrooxidans_KoFox,WP_006367310.1,WP_006367310.1,100,1.25e-114,100,312
Chl_ferrooxidans_KoFox,WP_006367320.1,WP_006367320.1,100,0,100,732
Chl_ferrooxidans_KoFox,WP_006367321.1,WP_006367321.1,100,0,100,572
Chl_ferrooxidans_KoFox,WP_006367322.1,WP_006367322.1,100,0,100,555
Chl_ferrooxidans_KoFox,WP_006367323.1,WP_006367323.1,100,0,100,690
Chl_ferrooxidans_KoFox,WP_006367388.1,WP_006367388.1,100,0,100,839
Chl_ferrooxidans_KoFox,WP_006367389.1,WP_006367389.1,100,3.82e-90,100,248
Chl_luteolum_DSM_273,WP_006365810.1,WP_011356871.1,71.429,4.53e-83,99,233
Chl_luteolum_DSM_273,WP_006367305.1,WP_011358288.1,53.004,3.83e-113,96,321
Chl_luteolum_DSM_273,WP_006367307.1,WP_011358287.1,67.257,1.85e-116,95,324
Chl_luteolum_DSM_273,WP_006367308.1,WP_011358286.1,79.522,0,100,505
Chl_luteolum_DSM_273,WP_006367309.1,WP_011358285.1,71.953,0,100,887
Chl_luteolum_DSM_273,WP_006367310.1,WP_041463880.1,53.147,8.67e-59,95,171
Chl_luteolum_DSM_273,WP_006367320.1,WP_011358284.1,75.35,0,100,560
Chl_luteolum_DSM_273,WP_006367321.1,WP_011358283.1,71.831,5.83e-144,98,398
Chl_luteolum_DSM_273,WP_006367322.1,WP_011358282.1,77.256,3.78e-157,100,430
Chl_luteolum_DSM_273,WP_006367323.1,WP_011358281.1,76.347,0,98,528
Chl_luteolum_DSM_273,WP_006367388.1,WP_011358173.1,53.364,2.54e-149,100,421
Chl_phaeoferrooxidans_KB01,WP_006365810.1,WP_076790319.1,98.71,6.57e-114,100,311
Chl_phaeoferrooxidans_KB01,WP_006367305.1,WP_076789648.1,93.151,0,100,571
Chl_phaeoferrooxidans_KB01,WP_006367307.1,WP_076790657.1,95.359,3.02e-175,100,473
Chl_phaeoferrooxidans_KB01,WP_006367308.1,WP_076790659.1,97.952,0,100,603
Chl_phaeoferrooxidans_KB01,WP_006367309.1,WP_076790661.1,97.496,0,100,1201
Chl_phaeoferrooxidans_KB01,WP_006367310.1,WP_076790662.1,82.667,6.27e-95,100,263
Chl_phaeoferrooxidans_KB01,WP_006367320.1,WP_076790666.1,96.078,0,100,704
Chl_phaeoferrooxidans_KB01,WP_006367321.1,WP_076790668.1,93.426,0,100,543
Chl_phaeoferrooxidans_KB01,WP_006367322.1,WP_076790669.1,98.917,0,100,552
Chl_phaeoferrooxidans_KB01,WP_006367323.1,WP_076790671.1,97.345,0,100,674
Chl_phaeoferrooxidans_KB01,WP_006367388.1,WP_076792910.1,89.538,0,100,744
Chl_phaeoferrooxidans_KB01,WP_006367389.1,WP_076793039.1,94.915,7.5e-87,100,239
Chl_sp_N1,WP_006367388.1,WP_131354685.1,57.178,5.41e-158,98,442
Ignavibacterium_album_JCM_16511_outgroup,WP_006367320.1,WP_014560057.1,44.558,2.72e-82,79,246
Binary file modified testing/outputs_expected/heatmap/BackBLAST_heatmap.pdf
Binary file not shown.
Loading

0 comments on commit 8bc563f

Please sign in to comment.