Skip to content

Commit

Permalink
Add hex. Add clustermap and networkmap funcs
Browse files Browse the repository at this point in the history
  • Loading branch information
bschilder committed Mar 3, 2023
1 parent c5fa944 commit bf71a55
Show file tree
Hide file tree
Showing 22 changed files with 803 additions and 39 deletions.
10 changes: 8 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ Depends: R (>= 4.0.0)
biocViews:
Imports:
MASS,
methods
methods,
data.table
Suggests:
rworkflows,
markdown,
Expand All @@ -27,7 +28,12 @@ Suggests:
knitr,
covr,
testthat (>= 3.0.0),
utils
utils,
heatmaply,
ggplot2,
tidygraph,
ggnetwork,
pals
Remotes:
github::baolinwu/MSKAT
RoxygenNote: 7.2.3
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,14 @@ export(T_3)
export(T_eta)
export(approximate_distribution_coefficient_estimate_T3)
export(chisq_test)
export(clustermap)
export(coefficient_estimate)
export(gen_autoregressive)
export(generate_null_distribution_T3)
export(metaCCA)
export(networkmap)
export(postprocess_data)
import(data.table)
importFrom(MASS,ginv)
importFrom(MASS,mvrnorm)
importFrom(stats,pchisq)
8 changes: 7 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
## New features

* Add hex sticker!
* Add Docker vignette.
* Add link to Docker vignette in README.
* New functions:
- `clustermap`
- `postprocess_data`
- `networkmap`

## Bug fixes

Expand All @@ -12,7 +18,7 @@
* Recompress built-in data with `tools::resaveRdaFiles`
* Fix `Dependence on R version ‘4.0.2’ not with patchlevel 0`
* Rename vignette *getting_started.Rmd* to *ThreeWayTest.Rmd* convention.

* Organize *getting_started.Rmd* chunks.


# ThreeWayTest 0.99.0
Expand Down
69 changes: 69 additions & 0 deletions R/clustermap.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#' Cluster map
#'
#' Generate an interactive clustered heatmap of summary statistics data.
#' @param dat Summary statistics data.
#' @param cols Numeric columns to plot in the heatmap.
#' @param i Indices of rows to include. Set to \code{NULL} to include all rows,
#' but be warned that this can become very computationally expensive.
#' @param agg_var Variable to aggregate data by.
#' Set to \code{NULL} to skip this step.
#' @param agg_fun Function to aggregate \code{cols} with.
#' @param as_cor Show the heatmap as a correlation matrix
#' instead of a feature x sample matrix.
#' @param annot_vars Variables in \code{dat} to include as row-wise annotations.
#' @param show_plot Print the plot.
#' @inheritParams heatmaply::heatmaply
#' @inheritDotParams heatmaply::heatmaply
#' @returns A named list containing
#' an interactive heatmaply object and the data used to create it.
#'
#' @export
#' @import data.table
#' @examples
#' dat <- ThreeWayTest::data_matrix_final
#' cm <- clustermap(dat = dat)
clustermap <- function(dat,
cols = grep("^w",names(dat), value = TRUE),
i = seq_len(50),
agg_var = NULL,
agg_fun = mean,
as_cor = FALSE,
k_row = 3,
annot_vars = c("TYPE","GENE"),
show_plot = TRUE,
...){

requireNamespace("heatmaply")

if(is.null(agg_var)) agg_var <- "SNP"
dat <- postprocess_data(dat = dat,
cols = cols,
agg_var = agg_var,
agg_fun = agg_fun)
#### Subset data ####
if(!is.null(i)) dat <- dat[i,]
#### Create matrix ####
X <- as.matrix(dat[,cols,with=FALSE]) |> `rownames<-`(
dat[[agg_var]]
)
#### Convert to correlation matrix ####
if(isTRUE(as_cor)) X <- stats::cor(X)
#### Create row annotations ####
annot_vars <- annot_vars[annot_vars %in% names(dat) &
(!annot_vars %in% agg_var)]
row_side_colors <- if(length(annot_vars)>0) {
dat[,annot_vars,with=FALSE]
} else {
NULL
}
#### Create heatmap ####
cm <- heatmaply::heatmaply(X,
row_side_colors = row_side_colors,
k_row = min(nrow(X),k_row),
...)
#### Show plot ####
if(isTRUE(show_plot)) methods::show(cm)
return(list(plot=cm,
data=list(X=X,
row_side_colors=row_side_colors)))
}
68 changes: 68 additions & 0 deletions R/networkmap.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#' Network map
#'
#' Generate an interactive network map of the summary statistics data.
#' @source
#' \href{https://ggraph.data-imaginist.com/articles/Layouts.html}{
#' ggraph layouts}
#' @source \href{https://github.com/thomasp85/ggraph/issues/75}{
#' ggraph issue with scale functions}
#' @param node_vars Columns within \code{dat} to set as nodes.
#' Each node variable will link to the next node in the character vector.
#' You can repeat column names to create more connections between nodes.
#' @param snps A character vector of SNP RSIDs to subset \code{dat} by.
#' @param node_size_range The minimum / maximum size of each node.
#' @inheritParams clustermap
#' @inheritParams ggraph::ggraph
#' @returns A named list containing
#' a network plot and the data used to create it.
#'
#' @export
#' @import data.table
#' @examples
#' dat <- ThreeWayTest::data_matrix_final
#' nm <- networkmap(dat = dat)
networkmap <- function(dat,
cols = grep("^w",names(dat), value = TRUE),
node_vars = c("dataset","GENE","SNP"),
i = seq_len(50),
snps = unique(dat$SNP)[i],
agg_var = NULL,
agg_fun = mean,
as_cor = FALSE,
k_row = 3,
annot_vars = c("TYPE","GENE"),
show_plot = TRUE,
layout = "nicely",
node_size_range = NULL){
# templateR:::args2vars(networkmap)
SNP <- NULL;

if(is.null(agg_var)) agg_var <- "SNP"
dat <- postprocess_data(dat = dat,
cols = cols,
agg_var = agg_var,
agg_fun = agg_fun)
#### Subset data ####
if(length(snps)>0) dat <- dat[SNP %in% snps,]
if(length(i)>0) dat <- dat[i,]
#### Make graph data ####
tg <- networkmap_tidygraph(dat = dat,
cols = cols,
node_vars = node_vars)
#### Estimate a reasonable point size range ####
if(is.null(node_size_range)){
node_size_range <- c(3,20)*(69/length(tg$graph))
}
#### Make plot ####
# gg <- networkmap_ggraph(tg = tg,
# layout = layout,
# node_size_range = node_size_range)
gg <- networkmap_ggnetwork(tg = tg,
layout = layout,
node_size_range = node_size_range)
#### Show ####
if(isTRUE(show_plot)) methods::show(gg)
#### Return ####
return(list(plot=gg,
data=tg))
}
125 changes: 125 additions & 0 deletions R/networkmap_ggnetwork.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#' Network map: make ggraph
#'
#' Subfunction of \link[ThreeWayTest]{networkmap}.
#' @inheritParams networkmap
#' @returns \link[ggnetwork]{ggnetwork}
#' @keywords internal
networkmap_ggnetwork <- function(tg,
layout = "nicely",
node_size_range = NULL,
edge_alpha=.25,
seed=2023){
requireNamespace("ggnetwork")
requireNamespace("ggplot2")
requireNamespace("pals")
mean_value <- xend <- yend <- name <-x <- y<- node_type <- NULL;

set.seed(seed)
ig <- tidygraph::as.igraph(tg$graph) #|> igraph::simplify()
# ggnet <- tg$graph |> tidygraph::activate("edges") |>tidygraph::as_tibble()
ggnet <- suppressWarnings(
ggnetwork::fortify(ig, list(layout="graphopt"))
)

( ggplot2::ggplot(ggnet,
ggplot2::aes(x=x,y=y,
xend=xend,
yend=yend,
label=name)) +
ggplot2::geom_density2d_filled(adjust=.75) +
ggnetwork::geom_edges(
ggplot2::aes(color=mean_value),
alpha=.5,
arrow = ggplot2::arrow(length = ggplot2::unit(5, "pt"),
type = "closed"),
curvature = 0.2) +
ggplot2::scale_color_gradientn(colors = pals::plasma(20)) +
ggnetwork::geom_nodes(ggplot2::aes(color=mean_value,
shape=node_type,
size=as.numeric(node_type))) +
ggnetwork::geom_nodelabel(ggplot2::aes(color=as.numeric(node_type)),
fill=ggplot2::alpha("white",.7)) +
ggplot2::scale_size_continuous(range = node_size_range) +
ggnetwork::theme_blank() )#|>

# plotly::ggplotly()
}



#### GGRAPH APPROACH ####
## IMmplemented this originally but then abandoned when I realized it can't be
## used without importing it.
# networkmap_ggraph <- function(tg,
# layout = "nicely",
# node_size_range = NULL,
# edge_alpha=.25){
# requireNamespace("ggraph")
# requireNamespace("ggplot2")
# requireNamespace("pals")
# mean_value <- node_type <- name <- value <- NULL;
# #### Get to/from for edge bundling ####
# con_data <- ggraph::get_con(match(tg$edges$from, tg$nodes$name),
# match(tg$edges$to, tg$nodes$name),
# value=tg$edges$value)
# # root <- which((tidygraph::activate(tg$graph,"nodes")|>
# # data.frame())$node_type=="dataset")
# ggraph::ggraph(graph = tg$graph,
# layout = layout
# # charge = 0.1,
# # spring.length = 1,
# ) + # "tree" "stress"
# ggraph::geom_node_voronoi(ggplot2::aes(color=mean_value,
# fill=mean_value),
# show.legend = FALSE) +
# ggplot2::scale_fill_gradientn(colors=pals::ocean.thermal(1000)[1:200]) +
# # ggraph::geom_node_tile(ggplot2::aes(width=as.numeric(node_type)/10,
# # height=as.numeric(node_type)/10,
# # fill=mean_value)) +
# # ggraph::geom_edge_density(ggplot2::aes(fill=value)) +
# # ggraph::geom_edge_diagonal(alpha=.25,
# # strength = .5,
# # ggplot2::aes(color=value)) +
# ggraph::geom_conn_bundle(
# ggplot2::aes(colour = value),#ggplot2::after_stat(index)),
# data = con_data,
# tension = 1,
# edge_alpha = edge_alpha
# ) +
# # ggraph::scale_edge_size(range = c(3,10)) +
# # ggraph::scale_edge_colour_distiller(palette = "RdPu") +
# ggraph::scale_edge_colour_gradientn(
# colours = ggplot2::alpha(pals::parula(100),alpha = edge_alpha)) +
# ggraph::geom_node_point(ggplot2::aes(fill=mean_value,
# color=as.numeric(node_type),
# size=as.numeric(node_type)*mean_value,
# # size=mean_value,
# shape=node_type),
# # fill="white",
# stroke=5,
# alpha=0.8) +
# ggplot2::scale_size_continuous(range = node_size_range) +
# ggplot2::scale_shape_manual(
# values = seq(21,21+length(unique(tg$nodes$node_type)))
# ) +
# # ggraph::geom_node_label(ggplot2::aes(label = name),
# # fill=ggplot2::alpha("white",alpha = .5)
# # ) +
# ggraph::geom_node_text(ggplot2::aes(label = name,
# color=as.numeric(node_type)),
# alpha=1,
# family = "mono", fontface = "bold",
# # color=ggplot2::alpha("white",alpha = .9)
# ) +
# ggraph::geom_node_text(ggplot2::aes(label = name),
# family = "mono",
# color="white",
# alpha=.85
# ) +
# ggraph::scale_color_viridis() +
# ggplot2::scale_color_gradientn(colors=pals::gnuplot(100)[1:80]) +
# # ggraph::scale_fill_viridis() +
# ggraph::theme_graph()
# # plotly::ggplotly(gg)
# }

41 changes: 41 additions & 0 deletions R/networkmap_tidygraph.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#' Network map: make tidygraph
#'
#' Subfunction of \link[ThreeWayTest]{networkmap}.
#' @inheritParams networkmap
#' @returns \link[tidygraph]{tbl_graph}
#' @keywords internal
networkmap_tidygraph <- function(dat,
cols,
node_vars){

requireNamespace("tidygraph")
value <- node_type <- name <- NULL;

dat_melt <- data.table::melt.data.table(dat,
measure.vars = cols,
variable.name = "dataset",
na.rm = TRUE)
edge_meta_vars <- names(dat_melt)
edges <- lapply(seq_len(length(node_vars)-1), function(i){
dat_melt[,c(node_vars[i],node_vars[i+1],
unique(c("value", edge_meta_vars))),
with=FALSE] |>
`colnames<-`(c("from","to",unique(c("value",edge_meta_vars))))
}) |> data.table::rbindlist()

nodes <- data.table::melt.data.table(
dat_melt,
measure.vars = node_vars,
variable.name = 'node_type',
value.name = "name")[,list(mean_value=mean(value,na.rm=TRUE),
node_type=unique(node_type)),
by=c("name")]|> data.table::setkeyv("name")
# tg <- tidygraph::tbl_graph(nodes = nodes, edges = edges)
g <- tidygraph::as_tbl_graph(x = edges)
g <- g |> tidygraph::activate("nodes") |>
tidygraph::mutate(mean_value=nodes[name,]$mean_value,
node_type=nodes[name,]$node_type)
return(list(graph=g,
nodes=nodes,
edges=edges))
}
Loading

0 comments on commit bf71a55

Please sign in to comment.