Skip to content

Commit

Permalink
Add figure generation script to .circleCI
Browse files Browse the repository at this point in the history
- convert final png figure to pdf
- add figure generation script to `run-figures.sh`
- use color palette generated in PR AlexsLemonade#510 for figure color scheme
- format treemap to have less redundant values (treemap as is in final figure 1 panel does not show redundant values and represents the `short_histology` and `integrated_diagnosis` values which I believe should be fine in this case)
- redundant text does show up on the treemap plot in `analyses/sample-distribution-analysis/plots` directory (still looking into this)
  • Loading branch information
cbethell committed Mar 18, 2020
1 parent a570b07 commit f813130
Show file tree
Hide file tree
Showing 8 changed files with 175 additions and 119 deletions.
4 changes: 4 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ jobs:
name: Sample Distribution Analyses
command: ./scripts/run_in_ci.sh bash "analyses/sample-distribution-analysis/run-sample-distribution.sh"

- run:
name: Sample Distribution Figure
command: ./scripts/run_in_ci.sh Rscript figures/scripts/fig1-sample-distribution.R

- run:
name: Transcriptome dimensionality reduction
command: ./scripts/run_in_ci.sh ./analyses/transcriptomic-dimension-reduction/ci-dimension-reduction-plots.sh
Expand Down
100 changes: 58 additions & 42 deletions analyses/sample-distribution-analysis/02-multilayer-plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,50 +67,66 @@ final_df <- histologies_df %>%
readr::write_tsv(final_df, file.path(results_dir, "plots_df.tsv"))

# Create and save treemap using ggplot2
# Number of palettes needed
n <- length(unique(final_df$level3))

# Now calculate the colors for each data point
# Read in the histology color palette
color_palette <-
readr::read_tsv(file.path(
root_dir,
"figures",
"palettes",
"histology_color_palette.tsv"
))

# Join the color palette for the colors for each short histology value --
# palette is generated in `figures/scripts/color_palettes.R`
final_df2 <- final_df %>%
dplyr::mutate(index = as.numeric(factor(final_df$level3))- 1) %>%
dplyr::group_by(index) %>%
dplyr::mutate(
max_size = max(size),
color = gradient_n_pal(
sequential_hcl(
6,
h = 360 * index[1]/n,
c = c(45, 20),
l = c(30, 80),
power = .5)
)(size/max_size)
)

# Now plot using `geom_treemap` and save
ggsave(
ggplot(final_df2, aes(area = size, fill = color, label=level3, subgroup=level2, subgroup2=level1)) +
dplyr::left_join(color_palette, by = c("level2" = "color_names"))

# Plot the treemap
treemap <-
ggplot(
final_df2,
aes(
area = size,
fill = hex_codes,
label = level1,
subgroup = level2,
subgroup2 = level3
)
) +
geom_treemap() +
geom_treemap_subgroup_border(colour="white") +
geom_treemap_text(fontface = "italic",
colour = "white",
place = "centre",
grow = F,
reflow=T) +
geom_treemap_subgroup_text(place = "top",
grow = T,
reflow = T,
alpha = 0.6,
colour = "#FAFAFA",
min.size = 0) +
geom_treemap_subgroup2_text(place = "centre",
grow = T,
alpha = 0.8,
colour = "#FAFAFA",
min.size = 0) +
scale_fill_identity(),
file = file.path(plots_dir, "distribution_across_cancer_types_treemap.pdf"),
width = 22,
height = 10
geom_treemap_subgroup_border(colour = "white") +
geom_treemap_text(
fontface = "italic",
colour = "white",
place = "center",
alpha = 0.4,
grow = T,
reflow = T
) +
geom_treemap_subgroup_text(
place = "top",
grow = T,
reflow = T,
alpha = 0.6,
colour = "#FAFAFA",
min.size = 0
) +
geom_treemap_subgroup2_text(
place = "bottom",
grow = T,
reflow = T,
alpha = 0.5,
colour = "#FAFAFA",
min.size = 0
) +
theme(legend.position = "none")

# Save treemap
ggsave(
treemap,
file = file.path(plots_dir, "distribution_across_cancer_types_treemap.pdf"),
width = 22,
height = 10
)

# Create a treemap (for interactive treemap)
Expand Down
Binary file not shown.
38 changes: 38 additions & 0 deletions figures/palettes/histology_color_palette.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
color_names hex_codes
Adenoma #f23d3d
ATRT #731d1d
Central neurocytoma #b38686
Chondrosarcoma #cc5c33
Chordoma #331c0d
Choroid plexus tumor #ffb380
CNS EFT-CIC #b25f00
CNS lymphoma #f2d6b6
CNS neuroblastoma #736556
CNS Rhabdomyosarcoma #ffaa00
CNS sarcoma #4c3d00
Craniopharyngioma #e2f200
DNET #919926
Dysplasia #d6f2b6
Embryonal Tumor #304d26
Ependymoma #00f241
ETMR #009929
Ganglioglioma #698c7c
Germinoma #39e6c3
Glial-neuronal tumor NOS #005359
Gliosis #263233
Hemangioblastoma #00c2f2
Hemangioma #40a6ff
HGAT #406280
Langerhans Cell histiocytosis #0044ff
LGAT #00144d
LGMT #acbbe6
Medulloblastoma #7373e6
Meningioma #3d0099
MPNST #c200f2
Neurofibroma #917399
none #f1f1f1
Oligodendroglioma #f279da
Other #cc0052
Pineoblastoma #994d6b
Schwannoma #4d2636
Teratoma #ffbfd9
Binary file added figures/pngs/fig1-openpbta-distribution.pdf
Binary file not shown.
Binary file removed figures/pngs/fig1-openpbta-distribution.png
Binary file not shown.
7 changes: 6 additions & 1 deletion figures/run-figures.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@ cd "$(dirname "${BASH_SOURCE[0]}")"
# Make an output folder for all directories
mkdir -p pngs

## Sample distribution
################ Sample distribution figure
# Run sample distribution analysis
bash ../analyses/sample-distribution-analysis/run-sample-distribution.sh

# Run the figure assembly
Rscript scripts/fig1-sample-distribution.R

################ Mutational landscape figure
# Run both SNV caller consensus scripts
Expand Down
145 changes: 69 additions & 76 deletions figures/scripts/fig1-sample-distribution.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@

# Load in libraries
library(dplyr)
library(ggplot2)
library(colorspace)
library(scales)
library(treemapify)
library(patchwork)

# Magrittr pipe
`%>%` <- dplyr::`%>%`
Expand Down Expand Up @@ -38,74 +40,72 @@ sample_distribution_dir <- file.path(
disease_expression <-
readr::read_tsv(file.path(sample_distribution_dir, "results", "disease_expression.tsv"))

# Read in plots data.frame file from `02-multilaye-plots.R`
# Read in plots data.frame file from `02-multilayer-plots.R`
plots_df <-
readr::read_tsv(file.path(sample_distribution_dir, "results", "plots_df.tsv"))

# Reorder the columns to be displayed in descending order by count on the plot
disease_expression$integrated_diagnosis <- with(disease_expression,
reorder(integrated_diagnosis, -count))

#### Re-run the individual plots ----------------------------------------------
# Read in the histology color palette
color_palette <-
readr::read_tsv(file.path(
root_dir,
"figures",
"palettes",
"histology_color_palette.tsv"
))

# Create a bar plot of sample distribution across cancer types
gg_types_bar <- disease_expression %>%
ggplot2::ggplot(ggplot2::aes(x = integrated_diagnosis, y = count, fill = count)) +
ggplot2::geom_col() +
ggplot2::theme_bw() +
ggplot2::labs(x = "Cancer Types", y = "Count",
title = "Sample Distribution Across Cancer Types") +
ggplot2::scale_y_continuous(breaks = seq(0, 500, by = 100)) +
ggplot2::theme(axis.text.x = ggplot2::element_text(
angle = 75,
hjust = 1,
size = 8
),
panel.grid = ggplot2::element_blank()) +
ggplot2::geom_text(nudge_y = 5.5, size = 1,
ggplot2::aes(label = paste0(disease_expression$percent)))
#### Re-run the individual plots ----------------------------------------------

# Create a treemap of broad histology, short histology, and integrated diagnosis
# Number of palettes needed
n <- length(unique(plots_df$level3))

# Now calculate the colors for each data point
# Join the color palette for the colors for each short histology value --
# palette is generated in `figures/scripts/color_palettes.R`
plots_df2 <- plots_df %>%
mutate(index = as.numeric(factor(plots_df$level3))- 1) %>%
group_by(index) %>%
mutate(
max_size = max(size),
color = gradient_n_pal(
sequential_hcl(
6,
h = 360 * index[1]/n,
c = c(45, 20),
l = c(30, 80),
power = .5)
)(size/max_size)
)

# Plot the treemap
treemap <- ggplot(plots_df2, aes(area = size, fill = color, label=level3, subgroup=level2, subgroup2=level1)) +
geom_treemap() +
geom_treemap_subgroup_border(colour="white") +
geom_treemap_text(fontface = "italic",
colour = "white",
place = "centre",
grow = F,
reflow=T) +
geom_treemap_subgroup_text(place = "top",
grow = T,
reflow = T,
alpha = 0.6,
colour = "#FAFAFA",
min.size = 0) +
geom_treemap_subgroup2_text(place = "centre",
grow = T,
alpha = 0.8,
colour = "#FAFAFA",
min.size = 0) +
scale_fill_identity()
left_join(color_palette, by = c("level2" = "color_names"))

# Plot the treemap where level1 is `broad_histology`,
# level2 is `short_histology`, and level3 is `integrated_diagnosis`
treemap <-
ggplot(
plots_df2,
aes(
area = size,
fill = hex_codes,
label = level1,
subgroup = level2,
subgroup2 = level3
)
) +
geom_treemap() +
geom_treemap_subgroup_border(colour = "white") +
geom_treemap_text(
fontface = "italic",
colour = "white",
place = "center",
alpha = 0.4,
grow = T,
reflow = T
) +
geom_treemap_subgroup_text(
place = "top",
grow = T,
reflow = T,
alpha = 0.6,
colour = "#FAFAFA",
min.size = 0
) +
geom_treemap_subgroup2_text(
place = "bottom",
grow = T,
reflow = T,
alpha = 0.5,
colour = "#FAFAFA",
min.size = 0
) +
theme(legend.position = "none")

## TODO: Re-run Github Contributions plot/table here -- for now we will define
## this plot as NULL
Expand All @@ -117,26 +117,19 @@ project_assays_plot <- NULL
project_features_plot <- NULL

#### Assemble multipanel plot -------------------------------------------------
ggpubr::ggarrange(ggpubr::ggarrange(treemap,
project_features_plot,
ncol = 2,
labels = c("A", "C"),
widths = c(2, 1.6),
font.label = list(size = 20)
),
ggpubr::ggarrange(project_assays_plot,
github_contributions_plot,
ncol = 2,
labels = c("B", "D"),
widths = c(1.6, 2),
font.label = list(size = 20)
),
nrow = 2,
heights = c(2, 2)
)

# Save to PNG
ggplot2::ggsave(file.path(output_dir, "fig1-openpbta-distribution.png"),
width = 16.5, height = 15,
# Combine plots with patchwork
# Layout of the four plots will be two over the other two
# (2 columns and 2 rows)
combined_plot <- treemap + project_features_plot +
project_assays_plot + github_contributions_plot +
plot_layout(ncol = 2, nrow = 2) &
theme(# add uniform labels
axis.text.x = element_text(size = 9),
axis.text.y = element_text(size = 9))

# Save to PDF
ggplot2::ggsave(file.path(output_dir, "fig1-openpbta-distribution.pdf"),
width = 10, height = 8,
units = "in"
)

0 comments on commit f813130

Please sign in to comment.