diff --git a/DESCRIPTION b/DESCRIPTION
index e4e908a..c7c4965 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: oncoEnrichR
Type: Package
Title: Cancer-dedicated gene set interpretation
Version: 1.3.0
-Date: 2022-09-12
+Date: 2022-09-23
Authors@R: person(given = "Sigve", family = "Nakken", role = c("aut", "cre"),
email = "sigven@ifi.uio.no",
comment = c(ORCID = "0000-0001-8468-2050"))
diff --git a/R/disease_drug.R b/R/disease_drug.R
index 8696e21..2095369 100644
--- a/R/disease_drug.R
+++ b/R/disease_drug.R
@@ -329,7 +329,11 @@ target_drug_associations <- function(qgenes,
.data$targeted_cancer_drugs_ep,
.data$approved_drugs) |>
dplyr::filter(!is.na(.data$targeted_cancer_drugs_lp) |
- !is.na(.data$targeted_cancer_drugs_ep))
+ !is.na(.data$targeted_cancer_drugs_ep)) |>
+ dplyr::rename(
+ drugs_late_phase = .data$targeted_cancer_drugs_lp,
+ drugs_early_phase = .data$targeted_cancer_drugs_ep
+ )
lgr::lgr$info( paste0("Open Targets Platform: annotation of target tractabilities (druggability)"))
diff --git a/R/onco_enrichr.R b/R/onco_enrichr.R
index 9f6ea8a..4c8eec1 100644
--- a/R/onco_enrichr.R
+++ b/R/onco_enrichr.R
@@ -1610,25 +1610,36 @@ onco_enrich <- function(query = NULL,
PROTEIN_DOMAIN = dplyr::if_else(
!is.na(.data$PFAM_ID),
paste0(
- "",
.data$PFAM_DOMAIN_NAME,
""),
as.character(NA)
)
) |>
- dplyr::select(-c(.data$PFAM_DOMAIN_NAME, .data$PFAM_ID)) |>
- dplyr::left_join(dplyr::select(oeDB[['genedb']][['all']],
- .data$symbol, .data$ensembl_gene_id),
- by = c("SYMBOL" = "symbol")) |>
- dplyr::rename(ENSEMBL_GENE_ID = .data$ensembl_gene_id) |>
- dplyr::mutate(ENSEMBL_TRANSCRIPT_ID =
- paste0("",
- .data$ENSEMBL_TRANSCRIPT_ID,"")) |>
+ dplyr::select(
+ -c(.data$PFAM_DOMAIN_NAME, .data$PFAM_ID)) |>
+ dplyr::left_join(
+ dplyr::select(oeDB[['genedb']][['all']],
+ .data$symbol, .data$ensembl_gene_id),
+ by = c("SYMBOL" = "symbol")) |>
+ dplyr::mutate(
+ ENSEMBL_GENE_ID =
+ paste0(
+ "",
+ .data$ensembl_gene_id,"")) |>
+ dplyr::mutate(
+ ENSEMBL_TRANSCRIPT_ID =
+ paste0(
+ "",
+ .data$ENSEMBL_TRANSCRIPT_ID,"")) |>
dplyr::select(-.data$VAR_ID) |>
+ dplyr::rename(CONSEQUENCE_ALTERNATE = .data$VEP_ALL_CSQ) |>
dplyr::select(.data$SYMBOL,
.data$CONSEQUENCE,
.data$PROTEIN_CHANGE,
@@ -1637,7 +1648,11 @@ onco_enrich <- function(query = NULL,
.data$LOSS_OF_FUNCTION,
.data$ENSEMBL_GENE_ID,
.data$ENSEMBL_TRANSCRIPT_ID,
- dplyr::everything())
+ .data$PRIMARY_SITE,
+ .data$SITE_RECURRENCE,
+ .data$TOTAL_RECURRENCE,
+ .data$COSMIC_MUTATION_ID,
+ .data$CONSEQUENCE_ALTERNATE)
}
for(psite in names(onc_rep[["data"]][["tcga"]][["aberration"]][["table"]][["snv_indel"]])){
@@ -2106,6 +2121,7 @@ write <- function(report,
"subcellcomp",
"cell_tissue",
"aberration",
+ "recurrent_variants",
"coexpression",
"prognostic_association_I",
"prognostic_association_II"
@@ -2115,6 +2131,10 @@ write <- function(report,
if(elem == "cancer_association"){
show_elem <- "disease"
}
+
+ if(elem == "recurrent_variants"){
+ show_elem <- "aberration"
+ }
if(elem == "prognostic_association_I"){
show_elem <- "cancer_prognosis"
}
diff --git a/R/sysdata.rda b/R/sysdata.rda
index 57410bc..2f56bd2 100644
Binary files a/R/sysdata.rda and b/R/sysdata.rda differ
diff --git a/R/utils.R b/R/utils.R
index 0aeeb9d..339a110 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -1077,11 +1077,11 @@ add_excel_sheet <- function(
)
) |>
dplyr::mutate(
- targeted_cancer_drugs_lp =
+ drugs_late_phase =
stringr::str_replace_all(
stringr::str_squish(
stringr::str_trim(
- textclean::replace_html(.data$targeted_cancer_drugs_lp)
+ textclean::replace_html(.data$drugs_late_phase)
)
),
" , ",
@@ -1089,11 +1089,11 @@ add_excel_sheet <- function(
)
) |>
dplyr::mutate(
- targeted_cancer_drugs_ep =
+ drugs_early_phase =
stringr::str_replace_all(
stringr::str_squish(
stringr::str_trim(
- textclean::replace_html(.data$targeted_cancer_drugs_ep)
+ textclean::replace_html(.data$drugs_early_phase)
)
),
" , ",
@@ -1420,6 +1420,78 @@ add_excel_sheet <- function(
}
+ if(analysis_output == "recurrent_variants"){
+
+ if(is.data.frame(report$data$tcga$recurrent_variants)){
+ if(NROW(report$data$tcga$recurrent_variants) > 0){
+ df <-
+ report$data$tcga$recurrent_variants
+
+ colnames(df) <- tolower(colnames(df))
+ df <- as.data.frame(
+ df |>
+ dplyr::mutate(
+ site_recurrence = as.numeric(.data$site_recurrence)
+ ) |>
+ dplyr::arrange(
+ dplyr::desc(.data$total_recurrence),
+ dplyr::desc(.data$site_recurrence)) |>
+ dplyr::mutate(
+ annotation_source = report$config$resources$tcga$name,
+ version = report$config$resources$tcga$version) |>
+ dplyr::mutate(
+ ensembl_gene_id =
+ stringr::str_trim(
+ textclean::replace_html(.data$ensembl_gene_id)
+ )
+ ) |>
+ dplyr::mutate(
+ ensembl_transcript_id =
+ stringr::str_trim(
+ textclean::replace_html(.data$ensembl_transcript_id)
+ )
+ ) |>
+ dplyr::mutate(
+ protein_domain =
+ stringr::str_trim(
+ textclean::replace_html(.data$protein_domain)
+ )
+ ) |>
+ dplyr::mutate(
+ cosmic_mutation_id =
+ stringr::str_trim(
+ textclean::replace_html(.data$cosmic_mutation_id)
+ )
+ ) |>
+ dplyr::mutate(
+ site_recurrence = paste(.data$primary_site,
+ .data$site_recurrence, sep=":")
+ ) |>
+ dplyr::group_by(
+ .data$symbol, .data$consequence,
+ .data$protein_change, .data$protein_domain,
+ .data$mutation_hotspot,
+ .data$loss_of_function,
+ .data$ensembl_gene_id,
+ .data$ensembl_transcript_id,
+ .data$total_recurrence,
+ .data$cosmic_mutation_id
+ ) |>
+ dplyr::summarise(site_recurrence = paste(
+ .data$site_recurrence, collapse=", "
+ ), .groups = "drop") |>
+ dplyr::arrange(
+ dplyr::desc(.data$total_recurrence)
+ )
+
+ )
+
+ target_df <- target_df |>
+ dplyr::bind_rows(df)
+ }
+ }
+ }
+
if(analysis_output == "aberration"){
## cna aberrations
diff --git a/README.md b/README.md
index d2df8b6..99fd25d 100644
--- a/README.md
+++ b/README.md
@@ -20,13 +20,14 @@ Web-based access to **oncoEnrichR** is available at [**https://oncotools.elixir.
## News
+- September 23rd 2022: [**1.3.1 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-3-1)
- September 12th 2022: [**1.3.0 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-3-0)
- September 2nd 2022: [**1.2.2 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-2-2)
- July 13th 2022: [**1.2.1 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-2-1)
## Example report
-
+
### Contact
@@ -34,7 +35,7 @@ sigven AT ifi.uio.no
### Funding and Collaboration
-OncoEnrichR is supported by the [Centre for Cancer Cell Reprogramming](https://www.med.uio.no/cancell/english/) at the [University of Oslo](https://www.uio.no)/[Oslo University Hospital](https://radium.no), and [Elixir Norway (Oslo node)](https://elixir.no/organization/organisation/elixir-uio).
+oncoEnrichR is supported by the [Centre for Cancer Cell Reprogramming](https://www.med.uio.no/cancell/english/) at the [University of Oslo](https://www.uio.no)/[Oslo University Hospital](https://radium.no), and [Elixir Norway (Oslo node)](https://elixir.no/organization/organisation/elixir-uio).
diff --git a/data_processing_code/RELEASE_NOTES.txt b/data_processing_code/RELEASE_NOTES.txt
index c6d2c81..956cd94 100644
--- a/data_processing_code/RELEASE_NOTES.txt
+++ b/data_processing_code/RELEASE_NOTES.txt
@@ -1,26 +1,26 @@
-##ONCOENRICHR_DB_VERSION = 20220910
-oncoEnrichR https://gihtub.com/sigven/oncoEnrichR R package for functional interrogation of genesets in the context of cancer v1.3.0 oncoEnrichR software
+##ONCOENRICHR_DB_VERSION = 20220921
+oncoEnrichR https://gihtub.com/sigven/oncoEnrichR R package for functional interrogation of genesets in the context of cancer v1.3.1 oncoEnrichR software
Omnipath https://omnipathdb.org/ Database of molecular biology prior knowledge: gene regulatory interactions, enzyme-PTM relationships, protein complexes, protein annotations etc. v3.4.0/OmnipathR omnipath db
hu.MAP http://humap2.proteincomplexes.org/ Human Protein Complex Map v2.0 humap2 db
dorothea https://saezlab.github.io/dorothea/ Gene set resource containing signed transcription factor (TF) - target interactions v1.8.0 dorothea db
tissueEnrich https://www.bioconductor.org/packages/release/bioc/vignettes/TissueEnrich/inst/doc/TissueEnrich.html R package used to calculate enrichment of tissue-specific genes in a set of input genes v1.16.0 tissueenrich software
-oncoPhenoMap https://github.com/sigven/oncoPhenoMap Crossmapped phenotype ontologies for the oncology domain v0.3.8 oncophenomap software
+oncoPhenoMap https://github.com/sigven/oncoPhenoMap Crossmapped phenotype ontologies for the oncology domain v0.4.0 oncophenomap software
STRING https://string-db.org Protein-protein interaction database v11.5 string db
GENCODE https://www.gencodegenes.org/ High quality reference gene annotation and experimental validation v41 gencode db
-TCGA https://cancergenome.nih.gov The Cancer Genome Atlas - Tumor gene expression and somatic DNA aberrations v32.0 (March 29th 2022) tcga db
+TCGA https://cancergenome.nih.gov The Cancer Genome Atlas - Tumor gene expression and somatic DNA aberrations v34.0 (July 27th 2022) tcga db
UniProtKB http://www.uniprot.org Comprehensive resource of protein sequence and functional information v2022_03 uniprot db
NetPath http://www.netpath.org Manually curated resource of signal transduction pathways in humans v1 (2010) netpath db
-EFO https://github.com/EBISPOT/efo Experimental Factor Ontology v3.43.0 efo db
-DiseaseOntology https://github.com/DiseaseOntology Human Disease Ontology 2022-06-07 do db
+EFO https://github.com/EBISPOT/efo Experimental Factor Ontology v3.46.0 efo db
+DiseaseOntology https://github.com/DiseaseOntology Human Disease Ontology 2022-08-29 do db
COMPPI https://comppi.linkgroup.hu/ Compartmentalized protein-protein interaction database v2.1.1 (October 2018) comppi db
-WikiPathways https://www.wikipathways.org A database of biological pathways maintained by and for the scientific community 20220810 wikipathway db
+WikiPathways https://www.wikipathways.org A database of biological pathways maintained by and for the scientific community 20220910 wikipathway db
MSigDB http://software.broadinstitute.org/gsea/msigdb/index.jsp Molecular Signatures Database - collection of annotated gene sets August 2022 (MSigDB v2022.1) msigdb db
REACTOME https://reactome.org Manually curated and peer-reviewed pathway database v81 (MSigDB v2022.1) reactome db
CellChatDB http://www.cellchat.org/cellchatdb/ Multimeric ligand-receptor complexes v1 (2021) cellchatdb db
CellTalkDB http://tcm.zju.edu.cn/celltalkdb/ A manually curated database of literature-supported ligand-receptor interactions in human and mouse Nov 2020 celltalkdb db
GeneOntology https://geneontology.org Knowledgebase that contains the largest structural source of information on the functions of genes August 2022 (MSigDB v2022.1) go db
KEGG https://www.genome.jp/kegg/pathway.html Collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks 20220809 kegg db
-CancerMine http://bionlp.bcgsc.ca/cancermine/ Literature-mined database of tumor suppressor genes/proto-oncogenes v47 - 20220708 cancermine db
+CancerMine http://bionlp.bcgsc.ca/cancermine/ Literature-mined database of tumor suppressor genes/proto-oncogenes v48 - 20220920 cancermine db
NCG http://ncg.kcl.ac.uk/index.php Network of cancer genes - a web resource to analyze duplicability, orthology and network properties of cancer genes v7.0 ncg db
CGC https://cancer.sanger.ac.uk/census Cancer Gene Census v96 cgc db
Pfam http://pfam.xfam.org Collection of protein families/domains 2021_11 (v35.0) pfam db
diff --git a/data_processing_code/data_raw.R b/data_processing_code/data_raw.R
index 7e1899c..91f54b3 100644
--- a/data_processing_code/data_raw.R
+++ b/data_processing_code/data_raw.R
@@ -4,7 +4,7 @@ library(gganatogram)
source('data_processing_code/data_utility_functions.R')
msigdb_version <- '2022.1'
-wikipathways_version <- "20220810"
+wikipathways_version <- "20220910"
netpath_version <- "2010"
opentargets_version <- "2022.06"
kegg_version <- "20220809"
@@ -14,17 +14,17 @@ uniprot_release <- "2022_03"
## Which databases to update or retrieve from last updated state
update_omnipathdb <- F
update_hpa <- F
-update_ncbi_gene_summary <- T
+update_ncbi_gene_summary <- F
update_project_score <- F
update_project_survival <- F
-update_tcga <- F
+update_tcga <- T
update_cancer_hallmarks <- F
update_omnipath_regulatory <- F
update_omnipath_complexdb <- F
update_gencode <- F
update_ligand_receptor_db <- T
-oe_version <- "1.3.0"
+oe_version <- "1.3.1"
data_raw_dir <- "/Users/sigven/project_data/package__oncoEnrichR/db/raw"
data_output_dir <- "/Users/sigven/project_data/package__oncoEnrichR/db/output"
@@ -99,7 +99,7 @@ ts_oncogene_annotations <-
get_ts_oncogene_annotations(
raw_db_dir = data_raw_dir,
gene_info = gene_info,
- version = "47") |>
+ version = "48") |>
dplyr::select(
entrezgene, tumor_suppressor,
oncogene, citation_links_oncogene,
diff --git a/data_processing_code/data_utility_functions.R b/data_processing_code/data_utility_functions.R
index 49a6ae0..dc5f5bf 100644
--- a/data_processing_code/data_utility_functions.R
+++ b/data_processing_code/data_utility_functions.R
@@ -3868,6 +3868,8 @@ get_tcga_db <- function(
gene_xref = NULL,
update = F){
+ tcga_release <- 'release34_20220727'
+
rds_fname <- file.path(
raw_db_dir,
"tcga",
@@ -3876,7 +3878,10 @@ get_tcga_db <- function(
coexpression_tsv <- file.path(
raw_db_dir,
"tcga",
- "co_expression_strong_moderate.release32_20220329.tsv.gz")
+ paste0(
+ "co_expression_strong_moderate.",
+ tcga_release,
+ ".tsv.gz"))
tcga_clinical_rds <- file.path(
raw_db_dir,
@@ -3970,16 +3975,22 @@ get_tcga_db <- function(
primary_site <- maf_codes[i,]$primary_site
maf_code <- maf_codes[i,]$code
maf_file <- file.path(
- maf_path, paste0("tcga_mutation_grch38_release32_20220329.",maf_code,"_0.maf.gz"))
+ maf_path, paste0(
+ "tcga_mutation_grch38_",
+ tcga_release,
+ ".",
+ maf_code,"_0.maf.gz"))
if(file.exists(maf_file)){
tmp <- read.table(gzfile(maf_file), quote="",
header = T, stringsAsFactors = F,
sep="\t", comment.char="#")
tmp$primary_site <- NULL
tmp$site_diagnosis_code <- NULL
- tmp$Tumor_Sample_Barcode <- stringr::str_replace(tmp$Tumor_Sample_Barcode,"-[0-9][0-9][A-Z]$","")
+ tmp$Tumor_Sample_Barcode <- stringr::str_replace(
+ tmp$Tumor_Sample_Barcode,"-[0-9][0-9][A-Z]$","")
- clinical <- tcga_clinical |> dplyr::filter(primary_site == primary_site) |>
+ clinical <- tcga_clinical |>
+ dplyr::filter(primary_site == primary_site) |>
dplyr::select(bcr_patient_barcode, primary_diagnosis_very_simplified,
MSI_status, Gleason_score, ER_status,
PR_status, HER2_status,
@@ -4029,7 +4040,9 @@ get_tcga_db <- function(
ENSEMBL_TRANSCRIPT_ID,
SYMBOL,
COSMIC_MUTATION_ID,
- Consequence) |>
+ Consequence,
+ AMINO_ACID_START,
+ VEP_ALL_CSQ) |>
dplyr::mutate(VAR_ID = paste(
CHROM, POS, REF, ALT, sep = "_")
) |>
@@ -4047,13 +4060,15 @@ get_tcga_db <- function(
VAR_ID,
CONSEQUENCE,
PROTEIN_CHANGE,
+ AMINO_ACID_START,
PFAM_ID,
MUTATION_HOTSPOT,
LOSS_OF_FUNCTION,
ENSEMBL_TRANSCRIPT_ID,
COSMIC_MUTATION_ID,
TCGA_SITE_RECURRENCE,
- TOTAL_RECURRENCE) |>
+ TOTAL_RECURRENCE,
+ VEP_ALL_CSQ) |>
tidyr::separate_rows(TCGA_SITE_RECURRENCE, sep=",") |>
tidyr::separate(TCGA_SITE_RECURRENCE, into =
c("PRIMARY_SITE","SITE_RECURRENCE", "TCGA_SAMPLES"),
@@ -4077,6 +4092,81 @@ get_tcga_db <- function(
CONSEQUENCE,"^(intron|intergenic|mature|non_coding|synonymous|upstream|downstream|3_prime|5_prime)"))
)
+ csq_all_fixed <- as.data.frame(
+ recurrent_tcga_variants |>
+ dplyr::select(VAR_ID, VEP_ALL_CSQ) |>
+ tidyr::separate_rows(VEP_ALL_CSQ, sep=",") |>
+ tidyr::separate(
+ VEP_ALL_CSQ, c("V1","V2","V3","V4",
+ "V5","V6","V7","V8","V9"),
+ sep = ":") |>
+ dplyr::mutate(VEP_ALL_CSQ = paste(
+ V1,V2,V5,V6, sep=":"
+ )) |>
+ dplyr::group_by(VAR_ID) |>
+ dplyr::summarise(VEP_ALL_CSQ = paste(
+ unique(VEP_ALL_CSQ), collapse=", "
+ ), .groups = "drop")
+ )
+
+
+ hotspots_fixed <- recurrent_tcga_variants |>
+ dplyr::filter(!is.na(MUTATION_HOTSPOT)) |>
+ tidyr::separate_rows(MUTATION_HOTSPOT, sep="&") |>
+ dplyr::select(SYMBOL, PROTEIN_CHANGE, VAR_ID, MUTATION_HOTSPOT) |>
+ tidyr::separate(MUTATION_HOTSPOT, c("genesym","aapos","altaa","qvalue"),
+ sep = "\\|", remove = F) |>
+ dplyr::mutate(hs_hgvsp = paste0(
+ "p.",aapos,altaa
+ )) |>
+ dplyr::mutate(mismatch = dplyr::if_else(
+ PROTEIN_CHANGE != hs_hgvsp &
+ nchar(altaa) > 0 &
+ !(stringr::str_detect(
+ PROTEIN_CHANGE,"\\?")),
+ TRUE,
+ FALSE
+ )) |>
+ dplyr::filter(mismatch == T) |>
+ dplyr::mutate(
+ MUTATION_HOTSPOT_FIXED = paste0(
+ genesym, "|",
+ aapos,"/",
+ stringr::str_replace_all(
+ PROTEIN_CHANGE,"p\\.|[A-Z]$",""
+ ), "|",
+ altaa, "|", qvalue
+ )
+ ) |>
+ dplyr::select(
+ SYMBOL, PROTEIN_CHANGE,
+ VAR_ID, MUTATION_HOTSPOT_FIXED
+ ) |>
+ dplyr::distinct()
+
+ recurrent_tcga_variants$VEP_ALL_CSQ <- NULL
+ recurrent_tcga_variants <- recurrent_tcga_variants |>
+ dplyr::left_join(csq_all_fixed, by = "VAR_ID") |>
+ dplyr::left_join(
+ hotspots_fixed,
+ by = c("SYMBOL","PROTEIN_CHANGE","VAR_ID")) |>
+ dplyr::mutate(MUTATION_HOTSPOT = dplyr::if_else(
+ !is.na(MUTATION_HOTSPOT_FIXED),
+ as.character(MUTATION_HOTSPOT_FIXED),
+ as.character(MUTATION_HOTSPOT)
+ )) |>
+ dplyr::select(-MUTATION_HOTSPOT_FIXED) |>
+ dplyr::select(
+ SYMBOL, VAR_ID, CONSEQUENCE,
+ PROTEIN_CHANGE, MUTATION_HOTSPOT,
+ AMINO_ACID_START, PFAM_ID,
+ LOSS_OF_FUNCTION, ENSEMBL_TRANSCRIPT_ID,
+ COSMIC_MUTATION_ID, PRIMARY_SITE,
+ SITE_RECURRENCE, TOTAL_RECURRENCE,
+ VEP_ALL_CSQ
+ )
+
+
##TCGA co-expression data
raw_coexpression <-
readr::read_tsv(coexpression_tsv,
@@ -4115,7 +4205,6 @@ get_tcga_db <- function(
gdc_projects <- as.data.frame(TCGAbiolinks::getGDCprojects()) |>
dplyr::filter(is.na(dbgap_accession_number) & startsWith(id,"TCGA"))
- tcga_release <- 'release32_20220329'
all_tcga_mean_median_tpm <- data.frame()
@@ -4123,8 +4212,8 @@ get_tcga_db <- function(
tumor_code <- gdc_projects[i,]$tumor
rnaseq_rds_fname <-
file.path("","Users","sigven", "project_data","analysis__tcga",
- "tcga", "data", "rnaseq",
- paste0("tcga_rnaseq_",tumor_code,"_",
+ "tcga", "output", "rnaseq",
+ paste0("rnaseq_",tumor_code,"_",
tcga_release,".rds"))
cat(tumor_code, '\n')
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 7fd1590..782ed75 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -14,7 +14,7 @@ RUN sudo apt update -qq
# install two helper packages we need
RUN sudo apt-get update && sudo apt-get -y install --no-install-recommends software-properties-common dirmngr
# add the signing key (by Michael Rutter) for these repos
-# To verify key, run gpg --show-keys /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc
+# To verify key, run gpg --show-keys /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc
# Fingerprint: E298A3A825C0D65DFD57CBB651716619E084DAB9
RUN wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | sudo tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc
# add the R 4.0 repo from CRAN -- adjust 'focal' to 'groovy' or 'bionic' as needed
@@ -45,7 +45,7 @@ RUN cd cmake-3.20.0 && ./bootstrap && make && make install
## PINNED versions of all packages can be found in /renv.lock (JSON format)
RUN R -e "install.packages(repos = NULL, 'https://cloud.r-project.org/src/contrib/renv_0.15.5.tar.gz', lib = '/usr/lib/R/library/')"
COPY renv.lock /
-COPY oncoEnrichR_1.3.0.tar.gz /
+COPY oncoEnrichR_1.3.1.tar.gz /
#RUN R -e "library(renv)"
@@ -53,7 +53,7 @@ RUN R -e "library(renv); renv::restore(prompt = F, library = '/usr/lib/R/library
WORKDIR /
## PINNED version - oncoEnrichR
-RUN R -e "install.packages(repos = NULL, 'oncoEnrichR_1.3.0.tar.gz', lib = '/usr/lib/R/library/')"
+RUN R -e "install.packages(repos = NULL, 'oncoEnrichR_1.3.1.tar.gz', lib = '/usr/lib/R/library/')"
## PINNED VERSION: pandoc (for HTML report generation)
RUN wget https://github.com/jgm/pandoc/releases/download/2.13/pandoc-2.13-1-amd64.deb && \
@@ -62,5 +62,5 @@ RUN wget https://github.com/jgm/pandoc/releases/download/2.13/pandoc-2.13-1-amd6
apt-get clean
RUN rm -rf /root/.cache
-RUN rm -rf /oncoEnrichR_1.3.0.tar.gz
+RUN rm -rf /oncoEnrichR_1.3.1.tar.gz
RUN rm -rf /cmake-3.20.0*
diff --git a/docker/oncoenrichr_wrapper.xml b/docker/oncoenrichr_wrapper.xml
index ff3dc3f..89938a7 100755
--- a/docker/oncoenrichr_wrapper.xml
+++ b/docker/oncoenrichr_wrapper.xml
@@ -1,7 +1,7 @@
-
+
Cancer-dedicated gene set interpretation
- sigven/oncoenrichr:1.2.2
+ sigven/oncoenrichr:1.3.1
')
* Browse recurrent, protein-coding somatic SNVs/InDels from TCGA in the query set
* Variants are listed as one record _per tissue/site_, in effect making the same variant occurring in multiple rows
* Only variants with a site-specific frequency >= 2 are shown
- * Variants can be filtered based on various properties, e.g. __Site/tissue variant recurrence__, as well as overall recurrence across all tumor sites (column __Pancancer variant recurrence__)
+ * Variants can be filtered based on various properties, e.g. __Site/tissue variant recurrence__, as well as overall recurrence across all tumor sites (column __Pan-cancer variant recurrence__)
* Notably, each variant have been annotated/classified with a
1. _loss-of-function_ status, based on the [LOFTEE plugin in VEP](https://github.com/konradjk/loftee)
- 2. Status as somatic mutation hotspots in cancer, according to [cancerhotspots.org](https://cancerhotspots.org). Format: _\|\|\_
+ 2. Status as somatic mutation hotspots in cancer, according to [cancerhotspots.org](https://cancerhotspots.org). Format: _\|\|\|\_
* Top 2,500 recurrent variants are listed here (all variants are listed in the Excel output of *oncoEnrichR*)
@@ -711,7 +711,9 @@ mutation_hotspot_levels <- levels(as.factor(
vars <- vars |>
- dplyr::arrange(dplyr::desc(SITE_RECURRENCE), dplyr::desc(LOSS_OF_FUNCTION)) |>
+ dplyr::arrange(
+ dplyr::desc(SITE_RECURRENCE),
+ dplyr::desc(LOSS_OF_FUNCTION)) |>
dplyr::mutate(CONSEQUENCE = stringr::str_replace_all(
CONSEQUENCE, "&", ", "
)) |>
@@ -784,7 +786,7 @@ cat('
')
1. sCNA - amplifications
2. sCNA - homozygous deletions
* The values in the heatmaps reflect the percent of all tumor samples pr. primary site with the gene amplified/lost (**percent_mutated**)
- * Genes in the heatmap are ranked according to alteration frequency across all sites (i.e. _pancancer_), limited to the top 75 genes in the query set
+ * Genes in the heatmap are ranked according to alteration frequency across all sites (i.e. _pan-cancer_), limited to the top 75 genes in the query set
* Frequencies across all subtypes per primary site are listed in an interactive table
* Only including genes that are aberrant in >= 1 percent of samples for a given tumor type/subtype
* Limited here to top 2,500 gene-subtype frequencies (all aberration frequencies are listed in the Excel output of *oncoEnrichR*)
diff --git a/inst/templates/_documentation_workflow.Rmd b/inst/templates/_documentation_workflow.Rmd
index d5d0307..666f568 100644
--- a/inst/templates/_documentation_workflow.Rmd
+++ b/inst/templates/_documentation_workflow.Rmd
@@ -30,7 +30,7 @@ The configurations set for this oncoEnrichR report is outlined below, in additio
* `show_aberration` = __`r onc_enrich_report[['config']][['show']][['aberration']]`__
* `show_prognostic` = __`r onc_enrich_report[['config']][['show']][['cancer_prognosis']]`__
-* Cancer assocations:
+* Cancer associations:
* `show_top_diseases_only` = __`r onc_enrich_report[['config']][['disease']][['show_top_diseases']]`__
* Regulatory interactions:
diff --git a/inst/templates/_drug_target_association.Rmd b/inst/templates/_drug_target_association.Rmd
index 25b3c9a..9441ed3 100644
--- a/inst/templates/_drug_target_association.Rmd
+++ b/inst/templates/_drug_target_association.Rmd
@@ -2,7 +2,10 @@
* Each protein/protein in the query set is annotated with:
* Targeted drugs (inhibitors/antagonists), as found through the [Open Targets Platform](https://targetvalidation.org), limited to compounds indicated for a cancer condition/phenotype
- * We distinguish between drugs in early clinical development/phase (ep), and drugs already in late clinical development/phase (lp)
+ * Drugs are organized into the following buckets:
+ * Targeted cancer drugs in early clinical development/phase (phase 1-2)(column drugs_early_phase)
+ * Targeted cancer drugs in late clinical development/phase (phase 3-4) (column drugs_late_phase)
+ * Approved drugs - here also showing the (cancer type) indications for which the drugs are approved for
diff --git a/inst/templates/_fitness_lof.Rmd b/inst/templates/_fitness_lof.Rmd
index 597ae15..395b4ed 100644
--- a/inst/templates/_fitness_lof.Rmd
+++ b/inst/templates/_fitness_lof.Rmd
@@ -121,7 +121,7 @@ cat('
')
#### Target priority scores
-* Promising candidate therapeutic targets are indicated through __[target priority scores](https://score.depmap.sanger.ac.uk/documentation#scores)__. Target priority scores are based on integration of CRISPR knockout gene fitness effects with genomic biomarker and patient data ([Behan et al., Nature, 2019](https://pubmed.ncbi.nlm.nih.gov/30971826/)). All genes are assigned a target priority score between 0 – 100 from lowest to highest priority. In the heatmap shown below, genes in the query set are ranked according to their respective priority scores across all cancers (i.e. _Pancancer_), limited to the top 100 candidates.
+* Promising candidate therapeutic targets are indicated through __[target priority scores](https://score.depmap.sanger.ac.uk/documentation#scores)__. Target priority scores are based on integration of CRISPR knockout gene fitness effects with genomic biomarker and patient data ([Behan et al., Nature, 2019](https://pubmed.ncbi.nlm.nih.gov/30971826/)). All genes are assigned a target priority score between 0 – 100 from lowest to highest priority. In the heatmap shown below, genes in the query set are ranked according to their respective priority scores across all cancers (i.e. _Pan-cancer_), limited to the top 100 candidates.
diff --git a/inst/templates/_functional_enrichment.Rmd b/inst/templates/_functional_enrichment.Rmd
index eb2ecdc..7f6001c 100644
--- a/inst/templates/_functional_enrichment.Rmd
+++ b/inst/templates/_functional_enrichment.Rmd
@@ -1,6 +1,6 @@
### Function and pathway enrichment
-* The query set is analyzed with [clusterProfiler](https://bioconductor.org/packages/release/bioc/vignettes/clusterProfiler/inst/doc/clusterProfiler.html) for functional enrichment/overrepresentation with respect to:
+* The query set is analyzed with [clusterProfiler](https://bioconductor.org/packages/release/bioc/vignettes/clusterProfiler/inst/doc/clusterProfiler.html) for functional enrichment/over-representation with respect to:
* [Gene Ontology terms](https://geneontology.org). All three subontologies: _Molecular Function_ (GO_MF), _Cellular Component_ (GO_CC) & _Biological Process_ (GO_BP)
* Molecular signalling networks from [KEGG](https://www.genome.jp/kegg/pathway.html)
* Cellular pathways from [Reactome](https://reactome.org/), and other curated gene signature sets from the [Molecular Signatures Database (MSiGDB)](http://software.broadinstitute.org/gsea/msigdb/index.jsp)
@@ -9,7 +9,7 @@
-* Enrichment/overrepresentation test settings (clusterProfiler)
+* Enrichment/over-representation test settings (_clusterProfiler_)
* P-value cutoff: __`r onc_enrich_report[['config']][['enrichment']][['p_value_cutoff']]`__
* Q-value cutoff: __`r onc_enrich_report[['config']][['enrichment']][['q_value_cutoff']]`__
* Correction for multiple testing: __`r onc_enrich_report[['config']][['enrichment']][['p_adjust_method']]`__
@@ -481,15 +481,11 @@ plot_data <- onc_enrich_report[['data']][['enrichment']][['go']] |>
plot_data$description <-
factor(plot_data$description, levels = plot_data$description)
-#max_y <- max(plyr::round_any(-log10(plot_data$qvalue), 10, f = ceiling))
max_y <- max(plyr::round_any(plot_data$enrichment_factor, 10, f = ceiling))
p <- ggplot2::ggplot(
plot_data,
- #ggplot2::aes( x = description, y = -log10(qvalue), fill = Subontology) ) +
ggplot2::aes( x = description, y = enrichment_factor, fill = qvalue) ) +
- #ggplot2::scale_fill_gradient(low = "yellow", high = "red", midpoint = midpoint_qvalue, na.value = NA) +
- #ggplot2::scale_fill_brewer(palette = "Dark2") +
ggplot2::geom_bar( stat = "identity" ) +
ggplot2::xlab("") +
ggplot2::ylab("Enrichment") +
@@ -497,7 +493,6 @@ p <- ggplot2::ggplot(
ggplot2::theme_classic() +
ggplot2::coord_flip() +
ggplot2::theme(
- #legend.position = "none",
legend.title = ggplot2::element_blank(),
axis.text.x = ggplot2::element_text(size = plot_fontsize, vjust = 0.5),
axis.text.y = ggplot2::element_text(family = "Helvetica", size = plot_fontsize),
diff --git a/inst/templates/_ligand_receptor.Rmd b/inst/templates/_ligand_receptor.Rmd
index 7b7f087..1cbdc07 100644
--- a/inst/templates/_ligand_receptor.Rmd
+++ b/inst/templates/_ligand_receptor.Rmd
@@ -2,7 +2,7 @@
-* Using data from the [CellChatDB](http://www.cellchat.org/) resource, we are here interrogating ligand-receptor interactions for members of the query set. Putative interactions are displayed along three different axes with respect to cell-cell comunication:
+* Using data from the [CellChatDB](http://www.cellchat.org/) resource, we are here interrogating ligand-receptor interactions for members of the query set. Putative interactions are displayed along three different axes with respect to cell-cell communication:
1) Secreted Signaling (Paracrine/autocrine signaling)
2) ECM-Receptor (extracellular matrix-receptor interactions)
diff --git a/pkgdown/index.md b/pkgdown/index.md
index 0e14eaf..de4a959 100644
--- a/pkgdown/index.md
+++ b/pkgdown/index.md
@@ -29,7 +29,7 @@ Web-based access to **oncoEnrichR** is available at
-## Questions adressed by oncoEnrichR
+## Questions addressed by oncoEnrichR
The contents of the analysis report provided by oncoEnrichR address the
following scientific questions for a given gene list:
@@ -38,7 +38,7 @@ following scientific questions for a given gene list:
the query set, and to what extent?
- Which genes in the query set are attributed with cancer hallmark
evidence?
-- Which proteins in the query sets are druggable in diffferent cancer
+- Which proteins in the query sets are druggable in different cancer
conditions (early and late clinical development phases)? For other
proteins in the query set, what is their likelihood of being
druggable?
@@ -85,6 +85,8 @@ See also the [output views](articles/output.html) that addresses each of the que
## News
+- September 23rd 2022: [**1.3.1
+ release**](articles/CHANGELOG.html#version-1-3-1)
- September 12th 2022: [**1.3.0
release**](articles/CHANGELOG.html#version-1-3-0)
- September 2nd 2022: [**1.2.2
@@ -94,7 +96,7 @@ See also the [output views](articles/output.html) that addresses each of the que
## Example report
-
+
diff --git a/tests/testthat/test_disease_drug.R b/tests/testthat/test_disease_drug.R
index 0cee065..f022ed9 100644
--- a/tests/testthat/test_disease_drug.R
+++ b/tests/testthat/test_disease_drug.R
@@ -36,8 +36,8 @@ test_that("Target drug associations", {
qgenes = c("EGFR","BRAF"),
genedb = oedb$genedb$all)$target_drugs),
c("symbol","genename",
- "targeted_cancer_drugs_lp",
- "targeted_cancer_drugs_ep",
+ "drugs_late_phase",
+ "drugs_early_phase",
"approved_drugs")
)
diff --git a/vignettes/CHANGELOG.Rmd b/vignettes/CHANGELOG.Rmd
index e477cda..46df9c9 100644
--- a/vignettes/CHANGELOG.Rmd
+++ b/vignettes/CHANGELOG.Rmd
@@ -7,6 +7,33 @@ vignette: >
%\usepackage[UTF-8]{inputenc}
---
+## Version 1.3.1
+
+* Date: 2022-09-23
+
+### Added
+
+* Data updates:
+ * CancerMine (20220920 - v48)
+ * TCGA (20220727 - v34.0)
+ * WikiPathways (20220910)
+ * Upgraded [sigven/oncoPhenoMap](https://github.com/sigven/oncoPhenoMap) to
+ v0.4.0:
+ * Experimental Factor Ontology (v3.46.0)
+ * Human Disease Ontology (2022-08-29)
+
+* Recurrent somatic variants (SNVs/InDels, as found in TCGA) are appended to
+Excel output worksheet, tab `RECURRENT_VARIANTS`
+
+### Fixed
+
+* A few erroneous mutation hotspots in `Tumor aberration frequencies` section
+
+### Changed
+
+* Slight modification to column names in `Drug associations` section
+* Added links to `ENSEMBL_GENE_ID` in `Tumor aberration frequencies` section
+
## Version 1.3.0
* Date: 2022-09-12
diff --git a/vignettes/running.Rmd b/vignettes/running.Rmd
index 97159c7..518b75c 100644
--- a/vignettes/running.Rmd
+++ b/vignettes/running.Rmd
@@ -102,7 +102,7 @@ A target list of *n = 134* high-confidence interacting proteins with the c-MYC o
- `project_title = "cMYC_BioID_screen"`
- `project_owner = "Raught et al."`
-and produced the [following HTML report with results](https://doi.org/10.5281/zenodo.7042674).
+and produced the [following HTML report with results](https://doi.org/10.5281/zenodo.7104355).
Below are R commands provided to reproduce the example output. **NOTE**: Replace "LOCAL_FOLDER" with a directory on your local computer: