diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..19582aa
Binary files /dev/null and b/.DS_Store differ
diff --git a/DESCRIPTION b/DESCRIPTION
index 263761f..604e0f8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: HPOExplorer
Title: Analysis and Visualisation of the Human Phenotype Ontology
-Version: 0.99.12
+Version: 1.0.0
Authors@R:
c(
person(given = "Brian",
@@ -18,25 +18,21 @@ Authors@R:
role = c("aut"),
email = "nathan.skene@gmail.com",
comment = c(ORCID = "0000-0002-6807-3180")))
-Description: HPOExplorer contains useful functions for working with
- the Human Phenotype Ontology (HPO).
- It allows you to create interactive phenotype network plots,
- which can be useful for making web applications.
- It also has functions for finding ontology level
- (n generations of terms below the selected term),
- and getting term definitions from HPO API.
+Description: Import, annotate and visualise the 18k+ hierarchically structured
+ clinical phenotypes across the Human Phenotype Ontology.
License: GPL-3
URL: https://github.com/neurogenomics/HPOExplorer
BugReports: https://github.com/neurogenomics/HPOExplorer/issues
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.0
Depends:
R (>= 4.1.0)
Imports:
+ KGExplorer,
+ simona,
ggplot2,
- plotly,
- ontologyIndex,
+ plotly,
utils,
stats,
methods,
@@ -45,31 +41,27 @@ Imports:
tools,
data.table,
stringr,
- EnsDb.Hsapiens.v75,
- ensembldb,
- AnnotationFilter,
- GenomicRanges,
- AnnotationHub
+ GenomicRanges
Suggests:
rmarkdown,
knitr,
devtools,
- BiocStyle,
rworkflows,
here,
httr,
jsonlite,
testthat (>= 3.0.0),
- igraph,
pals,
Matrix,
MASS,
scales,
htmlwidgets,
- R.utils,
piggyback,
patchwork,
- BiocParallel
+ igraph,
+ tidygraph
+Remotes:
+ github::neurogenomics/KGExplorer
VignetteBuilder: knitr
biocViews:
Genetics, Preprocessing, GeneTarget, SystemsBiology,
diff --git a/NAMESPACE b/NAMESPACE
index 1fd52e6..81ea4ea 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,14 +3,12 @@
export(add_ancestor)
export(add_death)
export(add_disease)
-export(add_disease_definition)
export(add_disease_genes)
export(add_evidence)
export(add_gene_frequency)
export(add_genes)
export(add_hpo_definition)
export(add_hpo_id)
-export(add_hpo_synonym)
export(add_info_content)
export(add_mondo)
export(add_ndisease)
@@ -21,89 +19,58 @@ export(add_pheno_frequency)
export(add_prevalence)
export(add_severity)
export(add_tier)
-export(adjacency_matrix)
export(annotate_phenos)
-export(assign_tiers)
-export(clear_cache)
-export(convert_ontology)
export(example_phenos)
-export(get_gencc)
+export(filter_descendants)
export(get_gene_lists)
export(get_hpo)
export(get_hpo_id_direct)
-export(get_monarch)
-export(get_mondo)
-export(get_ont_lvls)
-export(get_ont_lvls2)
-export(get_upheno)
-export(get_version)
export(ggnetwork_plot)
export(gpt_annot_check)
export(gpt_annot_codify)
export(gpt_annot_plot)
export(gpt_annot_read)
-export(harmonise_phenotypes)
export(hpo_api)
export(hpo_to_matrix)
-export(kde_surface)
export(list_columns)
export(list_deaths)
export(list_onsets)
-export(load_disease_genes)
export(load_phenotype_to_genes)
-export(make_hoverboxes)
export(make_igraph_object)
export(make_network_object)
export(make_phenos_dataframe)
-export(map_upheno)
-export(map_upheno_data)
-export(map_upheno_plot)
-export(network_3d)
+export(make_tiers)
+export(map_phenotypes)
export(newlines_to_definition)
export(per_branch_plot)
export(phenos_to_granges)
export(search_hpo)
-export(subset_descendants)
+import(KGExplorer)
import(data.table)
import(ggplot2)
import(network)
+import(simona)
+importFrom(KGExplorer,map_mondo)
importFrom(data.table,":=")
importFrom(data.table,.EACHI)
-importFrom(data.table,.N)
-importFrom(data.table,.SD)
importFrom(data.table,as.data.table)
importFrom(data.table,copy)
importFrom(data.table,data.table)
importFrom(data.table,dcast.data.table)
-importFrom(data.table,fcoalesce)
importFrom(data.table,fread)
importFrom(data.table,merge.data.table)
-importFrom(data.table,rbindlist)
-importFrom(data.table,setcolorder)
-importFrom(data.table,setkey)
-importFrom(data.table,setkeyv)
importFrom(data.table,setnafill)
importFrom(data.table,setnames)
importFrom(ggnetwork,geom_edges)
importFrom(ggnetwork,ggnetwork)
-importFrom(methods,show)
-importFrom(ontologyIndex,get_OBO)
-importFrom(ontologyIndex,get_ancestors)
-importFrom(ontologyIndex,get_descendants)
-importFrom(ontologyIndex,get_term_descendancy_matrix)
-importFrom(ontologyIndex,get_term_info_content)
importFrom(plotly,ggplotly)
importFrom(plotly,layout)
-importFrom(stats,as.dist)
importFrom(stats,as.formula)
importFrom(stats,cor)
-importFrom(stats,cutree)
-importFrom(stats,hclust)
importFrom(stats,na.omit)
importFrom(stats,setNames)
importFrom(stats,terms)
importFrom(stringr,str_split)
-importFrom(stringr,str_wrap)
importFrom(tools,R_user_dir)
importFrom(utils,data)
importFrom(utils,download.file)
diff --git a/NEWS.md b/NEWS.md
index 1d6750b..27c3b97 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,40 @@
+# HPOExplorer 1.0.0
+
+## New features
+
+* *MAJOR CHANGES*: Switched from `ontologyIndex` to `simona`,
+ which seems to be better written and has extensive analysis functions.
+* *MAJOR CHANGES*: Moved most non-HPO data annotation to [`KnowledgeNets`](https://github.com/neurogenomics/KnowledgeNets)
+ - Functions to assess disease or phenotype prevalence (https://github.com/neurogenomics/RareDiseasePrioritisation/issues/34)
+ - `get_prevalence_orphanet`
+ - `get_prevalence_oard`
+ - Functions to assess the existence of models for phenotypes or diseases (https://github.com/neurogenomics/RareDiseasePrioritisation/issues/33)
+ - `map_upheno`
+ - `map_upheno_data`
+ - `map_upheno_plots`
+ - `map_upheno_heatmap`
+ - `map_upheno_rainplot`
+ - `map_upheno_scatterplot`
+ - `get_monarch`
+ - `get_monarch_models`
+
+* New exported functions:
+ - Helper functions:
+ - `convert_hpo`
+ - Functions to map IDs across ontologies:
+ - `add_medgen`
+ - `add_omop`
+ - `add_prevalence`
+ - `add_prevalence_report`
+* New internal functions:
+ - `dt_to_matrix`
+
+## Bug fixes
+
+* `add_mondo`
+ - Drastically improved cross-ontology mappings to MONDO (https://github.com/monarch-initiative/mondo/issues/6873)
+ - Add subfunction `map_mondo`
+
# HPOExplorer 0.99.12
## New features
@@ -11,7 +48,7 @@
- `load_phenotypes_to_genes`: new attribute.
- New exported helper function: `get_version`
- New exported helper function: `clear_cache`
-* Add `make_hpo` internal function.
+* Add `make_ontology` internal function.
* Ensure consistent function naming:
- `create_node_data` --> `make_node_data`
* `get_gencc`
@@ -179,7 +216,7 @@
* `hpo_tiers`:
- Found lots of typos, outdated phenotype names, and mismatched HPO IDs.
We through and manually re-curated all of these and checked that they match up with the
- `harmonise_phenotypes` output.
+ `map_phenotypes` output.
- Add original `hpo_tiers` csv to *inst/extdata*.
* Delete `get_hpo_id` function in favor of `get_hpo_id_direct`
which is more reliable and comprehensive.
@@ -219,7 +256,7 @@
- `phenos_to_granges`
- `add_onset`
- `add_tier`
- - `harmonise_phenotypes`
+ - `map_phenotypes`
- `get_gene_lists`
- `get_gene_lengths`
- `list_onsets`
diff --git a/R/.DS_Store b/R/.DS_Store
new file mode 100644
index 0000000..5008ddf
Binary files /dev/null and b/R/.DS_Store differ
diff --git a/R/_docs.R b/R/_docs.R
new file mode 100644
index 0000000..1a388d0
--- /dev/null
+++ b/R/_docs.R
@@ -0,0 +1,118 @@
+#' @title Main functions
+#'
+#' @description
+#' Main functions.
+#' @param phenos A data.table containing HPO IDs and other metadata.
+#' @param hpo Human Phenotype Ontology object,
+#' loaded from \link[KGExplorer]{get_ontology}.
+#' @param add_ont_lvl_absolute Add the absolute ontology level of each HPO term.
+#' See \link[KGExplorer]{get_ontology_levels} for more details.
+#' @param add_ont_lvl_relative Add the relative ontology level of each HPO term.
+#' See \link[KGExplorer]{get_ontology_levels} for more details.
+#' @param add_description Whether to get the phenotype descriptions as
+#' well (slower).
+#' @param add_info_content Add information content column for each phenotype.
+#' @param add_disease_data Add all disease metadata columns.
+#' This will expand the data using \code{allow.cartesian=TRUE}.
+#' @param add_ndiseases Add the number of diseases per phenotype.
+#' @param add_pheno_frequencies Add the frequency of each phenotype in
+#' each disease.
+#' @param add_tiers Add severity Tiers column using
+#' \link[HPOExplorer]{add_tier}.
+#' @param add_severities Add severity column using
+#' \link[HPOExplorer]{add_severity}.
+#' @param add_hoverboxes Add hoverdata with
+#' \link[KGExplorer]{add_hoverboxes}.
+#' @param add_onsets Add age of onset columns using
+#' \link[HPOExplorer]{add_onset}.
+#' @param add_deaths Add age of death columns using
+#' \link[HPOExplorer]{add_death}.
+#' @param columns A named vector of columns in \code{phenos}
+#' to add to the hoverdata via \link[KGExplorer]{add_hoverboxes}.
+#' @param add_disease_definitions Add disease definitions.
+#' @param include_mondo Add \href{https://mondo.monarchinitiative.org/}{MONDO}
+#' IDs, names, and definitions to each disease.
+#' @param interactive Make the plot interactive.
+#' @family main
+#' @returns Data.
+#' @import data.table
+#' @import KGExplorer
+#' @name main
+NULL
+
+
+#' @title Get functions
+#'
+#' @description
+#' Functions to get data objects and extract elements.
+#' @param term One or more ontology IDs.
+#' @family get_
+#' @param force_new Force a new download or creation of a data resource.
+#' @param lvl How many levels deep into the ontology to get ancestors from.
+#' For example:
+#' \itemize{
+#' \item{1: }{"All"}
+#' \item{2: }{"Phenotypic abnormality"}
+#' \item{3: }{"Abnormality of the nervous system"}
+#' \item{4: }{"Abnormality of nervous system physiology"}
+#' \item{5: }{"Neurodevelopmental abnormality" or "Behavioral abnormality"}
+#' }
+#' @inheritParams main
+#' @inheritParams make_
+#' @returns Data.
+#' @name get_
+NULL
+
+#' @title Add functions
+#'
+#' @description
+#' Functions to add metadata to data.table objects.
+#' @family add_
+#' @param agg_by Column to aggregate metadata by.
+#' @param add_definitions Add disease definitions using \link{add_mondo}.
+#' @inheritParams main
+#' @inheritParams make_
+#' @inheritParams get_
+#' @inheritParams data.table::merge.data.table
+#' @returns Annotated data.
+#' @name add_
+NULL
+
+
+#' @title Filter functions
+#'
+#' @description
+#' Functions to filter data.table objects.
+#' @family filter_
+#' @inheritParams main
+#' @returns Filtered data.
+#' @name filter_
+NULL
+
+#' @title Map functions
+#'
+#' @description
+#' Functions to map IDs.
+#' @family map_
+#' @inheritParams main
+#' @inheritParams KGExplorer::map_ontology_terms
+#' @returns Mapped data.
+#' @name map_
+NULL
+
+
+#' @title Make functions
+#'
+#' @description
+#' Functions to make complex R objects.
+#' @param as R object class to return output as.
+#' @param phenotype_to_genes Output of
+#' \link{load_phenotype_to_genes} mapping phenotypes
+#' to gene annotations.
+#' @param ancestor The ancestor to get all descendants of. If \code{NULL},
+#' returns the entirely ontology.
+#' @family make_
+#' @inheritParams main
+#' @returns R object.
+#' @name make_
+NULL
diff --git a/R/add_ancestor.R b/R/add_ancestor.R
index 993e940..3f01697 100644
--- a/R/add_ancestor.R
+++ b/R/add_ancestor.R
@@ -1,59 +1,52 @@
+#' @describeIn add_ add_
#' Add ancestor
#'
#' Assign each HPO ID to the higher-order ancestral term that it is part of.
-#' @param lvl How many levels deep into the ontology to get ancestors from.
-#' For example:
-#' \itemize{
-#' \item{1: }{"All"}
-#' \item{2: }{"Phenotypic abnormality"}
-#' \item{3: }{"Abnormality of the nervous system"}
-#' \item{4: }{"Abnormality of nervous system physiology"}
-#' \item{5: }{"Neurodevelopmental abnormality" or "Behavioral abnormality"}
-#' }
#' @param remove_descendants Remove HPO terms that are descendants of a given
#' ancestral HPO term. Ancestral terms be provided as a character vector of
#' phenotype names (e.g. \code{c("Clinical course")}),
#' HPO IDs (e.g. \code{"HP:0031797" }) or a mixture of the two.
-#' See \link[HPOExplorer]{add_ancestor} for details.
-#' @inheritParams make_phenos_dataframe
-#' @inheritParams make_network_object
-#' @returns phenos data.table with extra column
+#' See \link{add_ancestor} for details.
#'
#' @export
-#' @importFrom data.table :=
+#' @import KGExplorer
#' @examples
#' phenos <- example_phenos()
#' phenos2 <- add_ancestor(phenos = phenos, lvl=5)
add_ancestor <- function(phenos,
lvl = 3,
hpo = get_hpo(),
- remove_descendants = NULL,
- verbose=TRUE){
-
- ancestor_name <- ancestor <- NULL;
+ remove_descendants = NULL){
+ ancestor <- NULL;
+ #### Check for existing columns ####
+ if(all(c("ancestor","ancestor_name") %in% names(phenos))){
+ messager("Ancestor columns already present. Skipping.")
+ return(phenos)
+ }
+ #### Add the new columns ####
if("hpo_id" %in% names(phenos)){
- messager(paste0("Adding level-",lvl),"ancestor to each HPO ID.",v=verbose)
- phenos$ancestor <- lapply(hpo$ancestors[phenos$hpo_id],
- function(x){
- if(length(x)>0){
- x[lvl]
- } else {NA}
- }) |> unlist()
- # phenos[,.(ancestor=unlist(ancestor))]
- phenos[,ancestor_name:=hpo$name[ancestor]]
+ messager(paste0("Adding level-",lvl),"ancestor to each HPO ID.")
+ hpo <- KGExplorer::add_ancestors(ont = hpo,
+ lvl = lvl)
+ ancestors <- hpo@elementMetadata[,c("id","ancestor","ancestor_name")] |>
+ unique()
+ phenos <- data.table::merge.data.table(phenos,
+ ancestors,
+ by.x = "hpo_id",
+ by.y = "id",
+ all.x = TRUE)
} else {
- messager("hpo_id column not found. Cannot add ancestors.",v=verbose)
+ messager("hpo_id column not found. Cannot add ancestors.")
}
#### Filter ####
if(!is.null(remove_descendants)){
messager("Removing remove descendants of:",
- paste(shQuote(remove_descendants),collapse = "\n -"),v=verbose)
- rmd <- HPOExplorer::harmonise_phenotypes(phenotypes = remove_descendants,
- hpo = hpo,
- as_hpo_ids = TRUE,
- keep_order = FALSE,
- verbose = verbose)
+ paste(shQuote(remove_descendants),collapse = "\n -"))
+ rmd <- map_phenotypes(terms = remove_descendants,
+ hpo = hpo,
+ to="id",
+ keep_order = FALSE)
phenos <- phenos[!ancestor %in% rmd,]
}
return(phenos)
diff --git a/R/add_death.R b/R/add_death.R
index 091d888..04b7c96 100644
--- a/R/add_death.R
+++ b/R/add_death.R
@@ -1,3 +1,4 @@
+#' @describeIn add_ add_
#' Add age of death
#'
#' Add age of death for each HPO ID.
@@ -18,10 +19,7 @@
#' @param keep_deaths The age of death associated with each HPO ID to keep.
#' If >1 age of death is associated with the term,
#' only the earliest age is considered.
-#' See \link[HPOExplorer]{add_death} for details.
-#' @param agg_by Column to aggregate age of death metadata by.
-#' @inheritParams make_network_object
-#' @inheritParams data.table::merge.data.table
+#' See \link{add_death} for details.
#' @returns phenos data.table with extra columns:
#' \itemize{
#' \item{"AgeOfDeath": }{AgeOfDeath HPO IDs of disease phenotypes associated
@@ -46,19 +44,17 @@ add_death <- function(phenos,
keep_deaths = NULL,
all.x = TRUE,
allow.cartesian = FALSE,
- agg_by = NULL,
- verbose = TRUE){
+ agg_by = NULL){
# devoptera::args2vars(add_death)
AgeOfDeath_earliest <- AgeOfDeath_name <- NULL;
if(!all(c("AgeOfDeath",
"AgeOfDeath_name") %in% names(phenos))){
- messager("Annotating phenos with AgeOfDeath.",v=verbose)
+ messager("Annotating phenos with AgeOfDeath.")
phenos <- add_disease(phenos = phenos,
all.x = all.x,
- allow.cartesian = allow.cartesian,
- verbose = verbose)
+ allow.cartesian = allow.cartesian)
annot <- pkg_data("hpo_deaths")
annot <- annot[,c("disease_id",
"AgeOfDeath_name",
diff --git a/R/add_disease.R b/R/add_disease.R
index 369efbd..bc50741 100644
--- a/R/add_disease.R
+++ b/R/add_disease.R
@@ -1,3 +1,4 @@
+#' @describeIn add_ add_
#' Add diseases
#'
#' Annotate each HPO term with diseases that they are associated with.
@@ -6,12 +7,6 @@
#' See
#' \href{https://hpo-annotation-qc.readthedocs.io/en/latest/annotationFormat.html}{
#' here for column descriptions}.
-#' @param add_definitions Add disease definitions using
-#' \link[HPOExplorer]{add_disease_definition}.
-#' @inheritParams add_disease_definition
-#' @inheritParams make_network_object
-#' @inheritParams data.table::merge.data.table
-#' @returns phenos data.table with extra columns
#'
#' @export
#' @importFrom data.table merge.data.table
@@ -22,23 +17,17 @@
add_disease <- function(phenos,
# extra_cols = c("Evidence","Reference","Biocuration"),
extra_cols = NULL,
- include_mondo = TRUE,
all.x = TRUE,
allow.cartesian = FALSE,
- add_definitions = FALSE,
- verbose = TRUE){
-
- # devoptera::args2vars(add_disease)
-
+ add_definitions = FALSE){
if(!"hpo_id" %in% names(phenos)){
stp <- paste("hpo_id column must be present in phenos.")
stop(stp)
}
if(!all(c("disease_name","disease_id") %in% names(phenos))){
- messager("Annotating phenos with Disease",v=verbose)
+ messager("Annotating phenos with Disease")
annot <- load_phenotype_to_genes(3)
#### From disease_id ####
- data.table::setnames(phenos,"disease_id","disease_id", skip_absent = TRUE)
if("disease_name" %in% names(phenos)){
return(phenos)
}
@@ -57,12 +46,9 @@ add_disease <- function(phenos,
all.x = all.x,
allow.cartesian = allow.cartesian)
}
- #### Add Definitions column and fill out missing disease_name columns ####
+ #### Add disease definitions and Mondo ID mappings ####
if(isTRUE(add_definitions)){
- phenos <- add_disease_definition(phenos = phenos,
- all.x = all.x,
- include_mondo = include_mondo,
- verbose = verbose)
+ phenos <- add_mondo(phenos = phenos)
}
return(phenos)
}
diff --git a/R/add_disease_definition.R b/R/add_disease_definition.R
deleted file mode 100644
index 266e68d..0000000
--- a/R/add_disease_definition.R
+++ /dev/null
@@ -1,88 +0,0 @@
-#' Add disease definition
-#'
-#' Add metadata for diseases using files from their respective databases:
-#' OMIM, DECIPHER, Orphanet.
-#' @param id_col Name of the disease ID column.
-#' @param cols Metadata columns to include.
-#' @param save_dir Directory to save metadata files to.
-#' @param include_mondo Add IDs/names/definitions from MONDO via
-#' \link[HPOExplorer]{add_mondo}.
-#' @inheritParams make_network_object
-#' @inheritParams data.table::merge.data.table
-#' @returns phenos data.table with extra columns.
-#'
-#' @export
-#' @importFrom data.table merge.data.table fcoalesce :=
-#' @importFrom utils data
-#' @examples
-#' phenos <- load_phenotype_to_genes(3)[seq(1000)]
-#' phenos2 <- add_disease_definition(phenos = phenos)
-add_disease_definition <- function(phenos,
- cols = c("Definitions","Preferred.Label"),
- id_col = "disease_id",
- save_dir = file.path(
- tools::R_user_dir("HPOExplorer",
- which="cache")),
- include_mondo = TRUE,
- all.x = TRUE,
- verbose = TRUE) {
-
- # devoptera::args2vars(add_disease_definition)
- disease_name <- disease_id <- Preferred.Label <- Definitions <-
- MONDO_definition <- NULL;
-
- #### Check if already filled out ####
- if(!is.null(phenos) &&
- all(cols %in% names(phenos))){
- return(phenos)
- }
- #### Gather metadata ####
- messager("Adding disease metadata:",paste(cols,collapse = ", "),v=verbose)
- meta <- list(
- ORPH = get_metadata_orphanet(save_dir = save_dir,
- verbose = verbose),
- OMIM = get_metadata_omim(save_dir = save_dir,
- verbose = verbose)
- ) |> data.table::rbindlist(fill=TRUE,
- use.names = TRUE,
- idcol = "Database")
- #### Return only metadata ####
- if(is.null(phenos)) {
- return(meta)
- #### Merge metdata ####
- } else {
- phenos <-
- data.table::merge.data.table(phenos,
- meta[,unique(c("id",cols)), with=FALSE],
- all.x = all.x,
- by.x = id_col[1],
- by.y = "id")
- if("disease_name" %in% names(phenos)){
- #### Report missing ####
- report_missing(phenos = phenos,
- id_col = id_col,
- report_col = c("disease_name","Definitions"),
- verbose = verbose)
- ## Needs to be lowercase to harmonise across metadata sources, eg.:
- ## "CEREBROOCULOFACIOSKELETAL SYNDROME 1" vs.
- ## "Cerebrooculofacioskeletal syndrome 1"
- phenos[,disease_name:=tolower(data.table::fcoalesce(Preferred.Label,
- disease_name,
- disease_id))]
- }
- #### Add mondo ####
- if(isTRUE(include_mondo)){
- phenos <- add_mondo(phenos = phenos,
- all.x = all.x,
- verbose = verbose)
- ## Coalesce definitions ##
- phenos[,Definitions:=tolower(data.table::fcoalesce(Definitions,
- MONDO_definition))]
- report_missing(phenos = phenos,
- id_col = id_col,
- report_col = "Definitions",
- verbose = verbose)
- }
- return(phenos)
- }
-}
diff --git a/R/add_disease_genes.R b/R/add_disease_genes.R
index 0204d4d..d96d398 100644
--- a/R/add_disease_genes.R
+++ b/R/add_disease_genes.R
@@ -1,13 +1,8 @@
+#' @describeIn add_ add_
#' Add disease genes
#'
#' Add genes that overlap between an HPO ID and an associated phenotype.
-#' @inheritParams make_network_object
-#' @inheritParams data.table::merge.data.table
-#' @returns phenos data.table with extra columns
-#'
#' @export
-#' @importFrom data.table merge.data.table
-#' @importFrom utils data
#' @examples
#' \dontrun{
#' phenos <- load_phenotype_to_genes()
@@ -30,7 +25,7 @@ add_disease_genes <- function(phenos,
dannot <- load_phenotype_to_genes(file = "phenotype.hpoa")
# annot <- annot[hpo_id %in% unique(phenos$hpo_id),]
# #### Add hpo_id associations ####
- # dgenes <- load_disease_genes()
+ # dgenes <- get_disease_genes()
# dgenes <- data.table::merge.data.table(
# dgenes,
# annot[,c("disease_id","disease_name","hpo_id")],
diff --git a/R/add_evidence.R b/R/add_evidence.R
index 97cb4e6..e28efd2 100644
--- a/R/add_evidence.R
+++ b/R/add_evidence.R
@@ -1,3 +1,4 @@
+#' @describeIn add_ add_
#' Add evidence
#'
#' Add the strength of evidence supporting each gene-disease association.
@@ -16,7 +17,6 @@
#' }
#' @param keep_evidence The evidence scores of each gene-disease
#' association to keep.
-#' @param agg_by Column to aggregate metadata by.
#' @param default_score Default evidence score to
#' apply to gene-disease associations that are present in the HPO annotations
#' but don't have evidence scores in the GenCC annotations.
@@ -40,21 +40,15 @@ add_evidence <- function(phenos,
allow.cartesian = FALSE,
agg_by = c("disease_id",
"gene_symbol"),
- default_score = 1,
- verbose = TRUE){
-
- # devoptera::args2vars(add_evidence)
+ default_score = 1){
evidence_score <- evidence_score_mean <- NULL;
if(!all(c("evidence_score_mean") %in% names(phenos))){
- messager("Annotating gene-disease associations with Evidence score",
- v=verbose)
+ messager("Annotating gene-disease associations with Evidence score")
phenos <- add_disease(phenos = phenos,
all.x = all.x,
- allow.cartesian = allow.cartesian,
- verbose = verbose)
- annot <- get_gencc(agg_by = agg_by,
- verbose = verbose)
+ allow.cartesian = allow.cartesian)
+ annot <- get_gencc(agg_by = agg_by)
#### Merge with input data ####
phenos <- data.table::merge.data.table(
x = phenos,
diff --git a/R/add_gene_frequency.R b/R/add_gene_frequency.R
index 23ea94c..16c1dd0 100644
--- a/R/add_gene_frequency.R
+++ b/R/add_gene_frequency.R
@@ -1,3 +1,4 @@
+#' @describeIn add_ add_
#' Add gene frequency
#'
#' Add gene-level frequency, i.e. how often mutations in a given gene
@@ -8,15 +9,12 @@
#' a gene that occurs 100% of the time in a given phenotype.
#' Include \code{NA} if you wish to retain genes that
#' do not have any frequency data.
-#' See \link[HPOExplorer]{add_gene_frequency} for details.
+#' See \link{add_gene_frequency} for details.
#' @inheritParams make_phenos_dataframe
#' @inheritParams data.table::merge.data.table
#' @returns phenos data.table with extra column
#'
#' @export
-#' @importFrom data.table setnames merge.data.table :=
-#' @importFrom utils data
-#' @importFrom stringr str_split
#' @examples
#' phenotype_to_genes <- load_phenotype_to_genes()[seq(1000),]
#' phenos2 <- add_gene_frequency(phenotype_to_genes = phenotype_to_genes)
@@ -31,8 +29,7 @@ add_gene_frequency <- function(phenotype_to_genes = load_phenotype_to_genes(),
gene_freq_min <- gene_freq_max <- . <- NULL;
phenotype_to_genes <- add_hpo_id(phenos = phenotype_to_genes,
- phenotype_to_genes= phenotype_to_genes,
- verbose = verbose)
+ phenotype_to_genes= phenotype_to_genes)
new_cols <- c("gene_freq_name","gene_freq_min",
"gene_freq_max","gene_freq_mean")
if(!all(new_cols %in% names(phenotype_to_genes))){
diff --git a/R/add_genes.R b/R/add_genes.R
index adc26d1..f84636b 100644
--- a/R/add_genes.R
+++ b/R/add_genes.R
@@ -1,3 +1,4 @@
+#' @describeIn add_ add_
#' Add genes
#'
#' Add genes associated with each phenotype
@@ -19,8 +20,7 @@ add_genes <- function(phenos = NULL,
by = c("hpo_id","disease_id"),
gene_col = "gene_symbol",
all.x = FALSE,
- allow.cartesian = FALSE,
- verbose = TRUE){
+ allow.cartesian = FALSE){
# devoptera::args2vars(add_genes, reassign = TRUE)
#### Prepare gene data ####
@@ -38,7 +38,7 @@ add_genes <- function(phenos = NULL,
## Genes driving celltype-symptom enrichment.
if(!is.null(gene_col) &&
gene_col %in% names(phenos)){
- phenos <- unlist_col(dt=phenos,
+ phenos <- unlist_col(dat=phenos,
col=gene_col)
data.table::setnames(phenos,gene_col,"gene_symbol")
by <- unique(c(by,"gene_symbol"))
@@ -46,11 +46,9 @@ add_genes <- function(phenos = NULL,
#### Ensure necessary columns are in phenos ####
phenos <- add_hpo_id(phenos = phenos,
phenotype_to_genes = phenotype_to_genes,
- hpo = hpo,
- verbose = verbose)
+ hpo = hpo)
phenos <- add_disease(phenos = phenos,
- allow.cartesian = allow.cartesian,
- verbose = verbose)
+ allow.cartesian = allow.cartesian)
#### Add Gene col to data ####
if(!"gene_symbol" %in% names(phenos)){
by <- by[by %in% names(phenos)]
diff --git a/R/add_hpo_definition.R b/R/add_hpo_definition.R
index a2a51ae..2137d25 100644
--- a/R/add_hpo_definition.R
+++ b/R/add_hpo_definition.R
@@ -1,3 +1,4 @@
+#' @describeIn add_ add_
#' Get term definition
#'
#' This function accesses the HPO API to get a description/definition of an
diff --git a/R/add_hpo_id.R b/R/add_hpo_id.R
index dcf2ffa..c8877b5 100644
--- a/R/add_hpo_id.R
+++ b/R/add_hpo_id.R
@@ -1,29 +1,19 @@
+#' @describeIn add_ add_
#' Add HPO ID column to dataframe
#'
-#' This adds the HPO term id column to the subset of ewce results data
-#' to be plotted
-#' in the cell select app. It also checks if it is a valid HPO term id to
-#' prevent error and adds
-#' a boolean column where TRUE if term is valid. If the HPO Id is not correct,
-#' it caused an error in the ontologyPlot package.
-#' @inheritParams make_phenos_dataframe
-#' @inheritParams make_network_object
-#' @returns The subset of EWCE results dataframe with a HPO Id column added.
-#'
+#' Adds the HPO term ID column "hpo_id".
#' @export
-#' @importFrom data.table := setkeyv
#' @examples
#' phenotype_to_genes <- load_phenotype_to_genes()
#' phenos <- unique(phenotype_to_genes[,c("hpo_id","hpo_name")])
#' phenos2 <- add_hpo_id(phenos=phenos)
add_hpo_id <- function(phenos,
hpo = get_hpo(),
- phenotype_to_genes = NULL,
- verbose = FALSE) {
+ phenotype_to_genes = NULL) {
HPO_term_valid <- hpo_id <- NULL;
if(!"hpo_id" %in% names(phenos)){
- messager("Adding HPO IDs.",v=verbose)
+ messager("Adding HPO IDs.")
alt_names <- grep("hpo_id","^id$",names(phenos),
value=TRUE, ignore.case = TRUE)
if(length(alt_names)>0){
@@ -33,10 +23,10 @@ add_hpo_id <- function(phenos,
if(is.null(phenotype_to_genes)) {
phenotype_to_genes <- load_phenotype_to_genes()
}
- phenos <- fix_hpo_ids(dt=phenos,
+ phenos <- fix_hpo_ids(dat=phenos,
phenotype_to_genes=phenotype_to_genes)
}
- phenos[,HPO_term_valid:=(hpo_id %in% hpo$id)]
+ phenos[,HPO_term_valid:=(hpo_id %in% hpo@terms)]
}
return(phenos)
}
diff --git a/R/add_hpo_synonym.R b/R/add_hpo_synonym.R
index 9d5f0cc..5aaa1ef 100644
--- a/R/add_hpo_synonym.R
+++ b/R/add_hpo_synonym.R
@@ -1,44 +1,44 @@
-#' Add HPO ID synonyms
-#'
-#' Map HPO IDs to synonynmous IDs from other ontologies
-#' (e.g. UMLS, SNOMEDCT_US, MSH, ICD-10)
-#' @inheritParams make_network_object
-#' @returns A data.table with the HPO ID synonyms in a new column with
-#' the same name as \code{to}.
-#'
-#' @param to The ontology to map to. Default is \code{UMLS}.
-#' @param to_col The name of the new column to add the synonyms to.
-#' @export
-#' @examples
-#' phenos <- HPOExplorer::example_phenos()
-#' phenos2 <- add_hpo_synonym(phenos)
-add_hpo_synonym <- function(phenos,
- to="UMLS",
- to_col=paste0(to,"_ID"),
- hpo=get_hpo(),
- verbose=TRUE){
- hpo_id <- NULL;
- messager("Adding HPO ID synonyms:",to,v=verbose)
- dict <- unlist(hpo$xref)
- ## Fix typos in mapping
- dict <- gsub("ICD10","ICD-10",dict)
- dict <- gsub("ICD9","ICD-10",dict)
- ## Get all prefixes
- # prefixes <- sort(
- # table(stringr::str_split(unname(dict),":",simplify = TRUE)[,1]),
- # decreasing = TRUE)
- if(!is.null(to)){
- dict <- grep(paste0("^",to,":"),dict, value = TRUE)
- phenos[,(to_col):=dict[hpo_id]]
- n_mapped <- length(unique(na.omit(phenos[[to_col]])))
- n_total <- length(unique(phenos$hpo_id))
- messager(formatC(n_mapped,big.mark = ","),"/",
- formatC(n_total,big.mark = ","),
- paste0("(",round(n_mapped/n_total*100,1),"%)"),
- "hpo_id mapped to",paste0(to_col,"."), v=verbose
- )
- return(phenos)
- } else{
- return(dict)
- }
-}
+##' @describeIn add_ add_
+##' Add HPO ID synonyms
+##'
+##' Map HPO IDs to synonynmous IDs from other ontologies
+##' (e.g. UMLS, SNOMEDCT_US, MSH, ICD-10),
+##' @returns A data.table with the HPO ID synonyms in a new column with
+##' the same name as \code{to}.
+##'
+##' @param to The ontology to map to. Default is \code{UMLS}.
+##' @param to_col The name of the new column to add the synonyms to.
+##' @export
+##' @examples
+##' phenos <- xample_phenos()
+##' phenos2 <- add_hpo_synonym(phenos)
+#add_hpo_synonym <- function(phenos,
+# to="UMLS",
+# to_col=paste0(to,"_ID"),
+# hpo=get_hpo()){
+# hpo_id <- NULL;
+# messager("Adding HPO ID synonyms:",to)
+# dict <- unlist(hpo$xref)
+# ## Fix typos in mapping
+# dict <- gsub("ICD10","ICD-10",dict)
+# dict <- gsub("ICD9","ICD-10",dict)
+# ## Get all prefixes
+# # prefixes <- sort(
+# # table(stringr::str_split(unname(dict),":",simplify = TRUE)[,1]),
+# # decreasing = TRUE)
+# if(!is.null(to)){
+# dict <- grep(paste0("^",to,":"),dict, value = TRUE)
+# phenos[,(to_col):=dict[hpo_id]]
+# n_mapped <- length(unique(na.omit(phenos[[to_col]])))
+# n_total <- length(unique(phenos$hpo_id))
+# messager(formatC(n_mapped,big.mark = ","),"/",
+# formatC(n_total,big.mark = ","),
+# paste0("(",round(n_mapped/n_total*100,1),"%)"),
+# "hpo_id mapped to",paste0(to_col,"."), v=verbose
+# )
+# return(phenos)
+# } else{
+# return(dict)
+# }
+#}
+#
diff --git a/R/add_info_content.R b/R/add_info_content.R
index a253bfb..655eb13 100644
--- a/R/add_info_content.R
+++ b/R/add_info_content.R
@@ -1,3 +1,4 @@
+#' @describeIn add_ add_
#' Add information content
#'
#' Add a column containing the information content score for each HPO ID.
@@ -6,22 +7,21 @@
#' @returns phenos data.table with extra column
#'
#' @export
-#' @importFrom data.table :=
-#' @importFrom ontologyIndex get_term_info_content
+#' @import simona
#' @examples
#' phenos <- example_phenos()
#' phenos2 <- add_info_content(phenos = phenos)
add_info_content <- function(phenos,
- hpo = get_hpo(),
- verbose = TRUE){
+ hpo = get_hpo()){
info_content <- hpo_id <- NULL;
if(!"info_content" %in% names(phenos)){
- messager("Adding information_content scores.",v=verbose)
- ic_dict <- ontologyIndex::get_term_info_content(
- ontology = hpo,
- term_sets = phenos$hpo_id,
- patch_missing = FALSE)
+ messager("Adding information_content scores.")
+ if(!"IC" %in% names(hpo@elementMetadata)){
+ simona::mcols(hpo)$IC <- simona::term_IC(hpo)
+ }
+ ic_dict <- KGExplorer::get_ontology_dict(ont=hpo,
+ to = "IC")
phenos[,info_content:=ic_dict[hpo_id]]
}
return(phenos)
diff --git a/R/add_medgen.R b/R/add_medgen.R
deleted file mode 100644
index 0545a93..0000000
--- a/R/add_medgen.R
+++ /dev/null
@@ -1,44 +0,0 @@
-add_medgen <- function(phenos,
- id_col = "hpo_id",
- to=""){
- source_id <- MONDO_ID<-definition<-DEF<-NULL;
-
- URL <- "https://ftp.ncbi.nlm.nih.gov/pub/medgen/"
- def <- data.table::fread(paste0(URL,"csv/MGDEF.csv.gz"), fill = TRUE)
- map <- data.table::fread(paste0(URL,"MedGenIDMappings.txt.gz"),
- header = TRUE,
- key = "source_id")
- data.table::setnames(map,"#CUI","CUI")
- # sort(table(map$source), decreasing = TRUE)
- #### Conform disease_id to MedGen format ####
- phenos <- HPOExplorer::make_phenos_dataframe(add_disease_data = FALSE)
- phenos <- HPOExplorer::add_mondo(phenos,
- id_col = id_col)
- phenos[,source_id:=get(id_col)]
- phenos <- phenos[!is.na(get(id_col))]
- if(id_col=="disease_id"){
- phenos[,source_id:=gsub("OMIM:","",source_id)]
- phenos[,source_id:=gsub("ORPHANET:","Orphanet_",source_id)]
- phenos[,source_id:=ifelse(source_id %in% unique(map$source_id), source_id, MONDO_ID)]
- }
- mapped <- data.table::merge.data.table(
- phenos,map,
- by="source_id",
- all.x = TRUE
- ) |>
- data.table::merge.data.table(def,
- by=c("CUI","source"),
- all.x = TRUE)
- data.table::uniqueN(phenos[!is.na(definition)]$hpo_id)
- data.table::uniqueN(grep("^HP:",map$source_id, value = TRUE))
- data.table::uniqueN(def[source=="HPO"]$CUI)
- data.table::uniqueN(mapped[!is.na(DEF)]$hpo_id)
- HPOExplorer:::report_missing(mapped,
- id_col = "disease_id",
- report_col = "CUI")
-
- ### CONCLUSIONS ####
- # Medgen has HPO-CUI mappings for 16569 HPO IDs
- # However, their definitions data only has definitions for 22 HPO IDs max
- # ....not very useful.
-}
diff --git a/R/add_mondo.R b/R/add_mondo.R
index c2f863a..d04d516 100644
--- a/R/add_mondo.R
+++ b/R/add_mondo.R
@@ -1,60 +1,29 @@
-#' Add MONDO
+#' @describeIn add_ add_
+#' Add Mondo metadata
+#'
+#' Add Mondo metadata (MONDO ID mappings, names, and definitions) for diseases
+#' using files from their respective databases:
+#' e.g. OMIM, DECIPHER, Orphanet.
+#' @inheritParams KGExplorer::map_mondo
+#' @inheritDotParams KGExplorer::map_mondo
+#' @returns phenos data.table with extra columns.
#'
-#' Add metadata from \href{https://mondo.monarchinitiative.org/}{MONDO},
-#' including:
-#' \itemize{
-#' \item{MONDO_ID: }{MONDO term ID.}
-#' \item{MONDO_name: }{MONDO term name.}
-#' \item{MONDO_definition: }{MONDO term definition.}
-#' }
-#' @param id_col ID column to map to MONDO IDs.
-#' @inheritParams add_disease
-#' @inheritParams data.table::merge.data.table
#' @export
-#' @importFrom data.table :=
+#' @importFrom KGExplorer map_mondo
#' @examples
-#' phenos <- example_phenos()
+#' phenos <- load_phenotype_to_genes(3)[seq(1000)]
#' phenos2 <- add_mondo(phenos = phenos)
add_mondo <- function(phenos,
- id_col = "disease_id",
- add_definitions = TRUE,
- all.x = TRUE,
- allow.cartesian = FALSE,
- verbose = TRUE){
-
- # devoptera::args2vars(add_mondo)
- MONDO_definition <- MONDO_name <- MONDO_ID <- NULL;
-
- if(!all(c("MONDO_ID","MONDO_name","MONDO_definition") %in% names(phenos))){
- phenos <- add_disease(phenos = phenos,
- all.x = all.x,
- allow.cartesian = allow.cartesian,
- add_definitions = add_definitions,
- include_mondo = FALSE,
- verbose = verbose)
- messager("Annotating phenos with MONDO metadata.",v=verbose)
- mondo <- get_mondo()
- dict <- mondo_dict(mondo = mondo)
-
- #### Assess missing values ####
- # sum(is.na(mondo$name))
- # sum(is.na(mondo$def))
- # xref_ids <- unique(names(unlist(mondo$xref)))
- # sum(!xref_ids %in% names(mondo$id))
- # sum(!xref_ids %in% names(mondo$name))
- # sum(!xref_ids %in% names(mondo$def))
- # names(mondo$name)[is.na(unname(mondo$name))]
- # sum(is.na(unname(mondo$name)))
- # names(unlist(mondo$xref))
+ input_col = "disease_id",
+ map_to = "hpo",
+ ...) {
- ### Add new columns ####
- phenos[,MONDO_ID:=dict[get(id_col)]]
- phenos[,MONDO_name:=mondo$name[MONDO_ID]]
- phenos[,MONDO_definition:=mondo$def[MONDO_ID]]
- report_missing(phenos = phenos,
- id_col = id_col,
- report_col = c("MONDO_ID","MONDO_name","MONDO_definition"),
- verbose = verbose)
- }
+ phenos[,(input_col):=gsub("^ORPHA","Orphanet",get(input_col))]
+ #### Map (skips is output_col already present) ####
+ phenos <- KGExplorer::map_mondo(dat = phenos,
+ input_col = input_col,
+ map_to = map_to,
+ ...)
+ phenos[,(input_col):=gsub("^Orphanet","ORPHA",get(input_col))]
return(phenos)
}
diff --git a/R/add_ndisease.R b/R/add_ndisease.R
index 96f8bd2..a2d7730 100644
--- a/R/add_ndisease.R
+++ b/R/add_ndisease.R
@@ -1,3 +1,4 @@
+#' @describeIn add_ add_
#' Add N diseases
#'
#' Annotate each HPO term with the total number of disease
diff --git a/R/add_omop.R b/R/add_omop.R
index 5a8ba9b..b580b0a 100644
--- a/R/add_omop.R
+++ b/R/add_omop.R
@@ -1,52 +1,50 @@
-#' Add MONDO
+#' @describeIn add_ add_
+#' Add OMOP
#'
#' Add metadata from \href{https://mondo.monarchinitiative.org/}{MONDO},
#' including:
#' \itemize{
-#' \item{MONDO_ID: }{MONDO term ID.}
-#' \item{MONDO_name: }{MONDO term name.}
-#' \item{MONDO_definition: }{MONDO term definition.}
+#' \item{mondo_id: }{MONDO term ID.}
+#' \item{mondo_name: }{MONDO term name.}
+#' \item{mondo_def: }{MONDO term definition.}
#' }
-#' @param id_col ID column to map to MONDO IDs.
+#' @param input_col ID column to map to MONDO IDs.
#' @param force_new Force a new query to the OARD API instead of
#' using pre-downloaded data.
-#' @inheritParams add_disease
-#' @inheritParams data.table::merge.data.table
+#'
#' @export
-#' @importFrom data.table :=
+#' @import data.table
#' @examples
#' phenos <- example_phenos()
#' phenos2 <- add_omop(phenos = phenos)
add_omop <- function(phenos,
- id_col = "hpo_id",
+ input_col = "hpo_id",
all.x = TRUE,
allow.cartesian = FALSE,
force_new = FALSE,
verbose = TRUE){
-
- # devoptera::args2vars(add_omop)
- if(!id_col %in% names(phenos)){
- stop("id_col not found in phenos.")
+ if(!input_col %in% names(phenos)){
+ stop("input_col not found in phenos.")
}
if(!all(c("OMOP_ID","OMOP_NAME") %in% names(phenos))){
messager("Annotating phenos with OMOP metadata.",v=verbose)
#### Select the correct map ####
- if(id_col=="hpo_id" || isTRUE(force_new)){
+ if(input_col=="hpo_id" || isTRUE(force_new)){
omop <- pkg_data("hpo_id_to_omop")
- } else if(id_col=="disease_id"){
+ } else if(input_col=="disease_id"){
omop <- pkg_data("disease_id_to_omop")
} else {
- omop <- oard_query_api(ids = phenos[[id_col]])
+ omop <- KGExplorer::query_oard(ids = phenos[[input_col]])
}
phenos2 <- data.table::merge.data.table(
phenos,
omop,
- by=id_col,
+ by=input_col,
all.x = all.x,
allow.cartesian = allow.cartesian,
)
- report_missing(phenos = phenos2,
- id_col = id_col,
+ report_missing(phenos2,
+ input_col = input_col,
report_col = c("OMOP_ID","OMOP_NAME"),
verbose = verbose)
return(phenos2)
diff --git a/R/add_onset.R b/R/add_onset.R
index e684c26..4dad09e 100644
--- a/R/add_onset.R
+++ b/R/add_onset.R
@@ -1,3 +1,4 @@
+#' @describeIn add_ add_
#' Add age of onset
#'
#' Add age of onset for each HPO ID.
@@ -45,19 +46,15 @@ add_onset <- function(phenos,
keep_onsets = NULL,
agg_by = NULL,
all.x = TRUE,
- allow.cartesian = FALSE,
- verbose = TRUE){
-
- # devoptera::args2vars(add_onset)
+ allow.cartesian = FALSE){
onset_latest <- onset_score <- onset_name <- NULL;
if(!any(c("onset","onset_name","onset_names","onset_score")
%in% names(phenos))){
- messager("Annotating phenos with onset.",v=verbose)
+ messager("Annotating phenos with onset.")
phenos <- add_disease(phenos = phenos,
all.x = all.x,
- allow.cartesian = allow.cartesian,
- verbose = verbose)
+ allow.cartesian = allow.cartesian)
hpo_onsets <- pkg_data("hpo_onsets")
dict <- hpo_dict(type = "onset")
hpo_onsets[,onset_score:=dict[onset_name]]
diff --git a/R/add_ont_lvl.R b/R/add_ont_lvl.R
index 1df0890..b080e9d 100644
--- a/R/add_ont_lvl.R
+++ b/R/add_ont_lvl.R
@@ -1,12 +1,12 @@
+#' @describeIn add_ add_
#' Add ontology level
#'
#' Add the relative ontology level for each HPO ID.
#' @param keep_ont_levels Only keep phenotypes at certain \emph{absolute}
#' ontology levels to keep.
-#' See \link[HPOExplorer]{add_ont_lvl} for details.
-#' @inheritParams get_ont_lvls
-#' @inheritParams make_network_object
-#' @inheritParams data.table::merge.data.table
+#' See \link{add_ont_lvl} for details.
+#' @inheritDotParams KGExplorer::get_ontology_levels
+#' @inheritParams KGExplorer::get_ontology_levels
#' @returns phenos data.table with extra column
#'
#' @export
@@ -16,35 +16,27 @@
#' phenos2 <- add_ont_lvl(phenos = phenos)
add_ont_lvl <- function(phenos,
hpo = get_hpo(),
- adjacency = NULL,
absolute = TRUE,
- exclude_top_lvl = TRUE,
keep_ont_levels = NULL,
- reverse = TRUE,
- verbose = TRUE){
-
+ ...){
ontLvl_geneCount_ratio <- geneCount <- hpo_id <- ontLvl <- tmp <- NULL;
col <- if(isTRUE(absolute)) "ontLvl" else "ontLvl_relative"
if(col %in% names(phenos)) return(phenos)
if("hpo_id" %in% names(phenos)){
- lvls_dict <- get_ont_lvls(terms = unique(phenos$hpo_id),
- hpo = hpo,
- adjacency = adjacency,
- absolute = absolute,
- reverse = reverse,
- exclude_top_lvl = exclude_top_lvl,
- verbose = verbose)
+ lvls_dict <- KGExplorer::get_ontology_levels(ont = hpo,
+ absolute = absolute,
+ ...)
#### Add the new column ####
phenos[,tmp:=lvls_dict[hpo_id]]
data.table::setnames(phenos,old = "tmp", new = col)
#### Compute gene ratio ####
if(all(c("ontLvl","geneCount") %in% names(phenos))){
- messager("Computing ontology level / gene count ratio.",v=verbose)
+ messager("Computing ontology level / gene count ratio.")
phenos[,ontLvl_geneCount_ratio:=(ontLvl/geneCount)]
}
} else {
- messager("hpo_id column not found. Cannot add ontology level.",v=verbose)
+ messager("hpo_id column not found. Cannot add ontology level.")
}
#### Filter ####
if(!is.null(keep_ont_levels)){
diff --git a/R/add_pheno_frequency.R b/R/add_pheno_frequency.R
index e73c832..14588e2 100644
--- a/R/add_pheno_frequency.R
+++ b/R/add_pheno_frequency.R
@@ -1,3 +1,4 @@
+#' @describeIn add_ add_
#' Add phenotype frequency
#'
#' Add phenotype-level frequency, i.e.
@@ -8,8 +9,6 @@
#' Include \code{NA} if you wish to retain phenotypes that
#' do not have any frequency data.
#' See \link[HPOExplorer]{add_pheno_frequency} for details.
-#' @inheritParams make_network_object
-#' @inheritParams data.table::merge.data.table
#' @returns phenos data.table with extra column
#'
#' @export
@@ -22,18 +21,15 @@
add_pheno_frequency <- function(phenos,
pheno_frequency_threshold = NULL,
all.x = TRUE,
- allow.cartesian = FALSE,
- verbose = TRUE){
- # devoptera::args2vars(add_pheno_frequency)
+ allow.cartesian = FALSE){
pheno_freq_mean <- NULL;
new_cols <- c("pheno_freq_min","pheno_freq_max","pheno_freq_mean")
if(!all(new_cols %in% names(phenos))){
- messager("Annotating phenotype frequencies.",v=verbose)
+ messager("Annotating phenotype frequencies.")
phenos <- add_disease(phenos = phenos,
all.x = all.x,
- allow.cartesian = allow.cartesian,
- verbose = verbose)
+ allow.cartesian = allow.cartesian)
#### Get precomputed phenotype-disease frequencies ####
hpo_frequencies <- pkg_data("hpo_frequencies")
# hpo_frequencies <- hpo_frequencies_agg(hpo_frequencies)
diff --git a/R/add_prevalence.R b/R/add_prevalence.R
index 8854db2..585ba05 100644
--- a/R/add_prevalence.R
+++ b/R/add_prevalence.R
@@ -1,31 +1,58 @@
+#' @describeIn add_ add_
#' Add prevalence
#'
#' Add a column containing the prevalence score for each disease ("disease_id")
#' or phenotype ("hpo_id").
#' @param method One of "orphanet" or "oard".
-#' @param id_col Name of the column containing the disease or phenotype IDs.
+#' @param input_col Name of the column containing the disease or phenotype IDs.
#' @param drop_na Whether to drop rows with missing prevalence data.
-#' @inheritParams make_network_object
-#' @returns phenos data.table with extra column
#'
#' @export
#' @examples
#' phenos <- example_phenos()
#' phenos2 <- add_prevalence(phenos = phenos)
add_prevalence <- function(phenos,
- id_col="disease_id",
+ input_col="disease_id",
drop_na=TRUE,
- verbose=TRUE,
method="orphanet"){
-
+ method <- match.arg(method)
+ #### Example phenos data
+ if(is.null(phenos)){
+ phenos <- make_phenos_dataframe(add_disease_data = TRUE,
+ include_mondo = FALSE)
+ } else {
+ phenos <- data.table::copy(phenos)
+ }
+ phenos <- add_disease(phenos)
#### From OARD ####
# res <- add_prevalence_oard(phenos$hpo_id[1:100])
#### From ORPHANET ####
if(method=="orphanet"){
- phenos2 <- add_prevalence_orphanet(phenos=phenos,
- id_col=id_col,
- drop_na=drop_na,
- verbose=verbose)
+ mean_prevalence <- NULL;
+ #### Add MONDO IDs ####
+ phenos <- add_mondo(phenos = phenos,
+ input_col = input_col)
+ #### Get Orphanet data ####
+ dprev <- KGExplorer:::get_prevalence_orphanet(agg_by=c("mondo_id",
+ "id","Name"))
+ phenos2 <- data.table::merge.data.table(
+ phenos,
+ dprev,
+ by="mondo_id",
+ all.x=TRUE
+ )[order(-mean_prevalence)]
+ #### Drop NAs ####
+ if(isTRUE(drop_na)) phenos2 <- phenos2[!is.na(mean_prevalence),]
+ #### Report missing ####
+ add_prevalence_report(phenos = phenos,
+ phenos2 = phenos2,
+ input_col = sort(
+ unique(c(input_col,
+ "hpo_id",
+ "disease_id",
+ "mondo_id")
+ )
+ ))
+ return(phenos2)
}
- return(phenos2)
}
diff --git a/R/add_prevalence_oard.R b/R/add_prevalence_oard.R
deleted file mode 100644
index 9d0484d..0000000
--- a/R/add_prevalence_oard.R
+++ /dev/null
@@ -1,10 +0,0 @@
-add_prevalence_oard <- function(ids){
-
- # ids <- unique(phenos$hpo_id)[1:100]
- res <- oard_query_api(ids=ids)
- res2 <- oard_query_api(ids=res$results$concept_id,
- dataset_id=1,
- endpoint="frequencies/singleConceptFreq",
- concept_prefix="concept_id=")
- return(res2)
-}
diff --git a/R/add_prevalence_orphanet.R b/R/add_prevalence_orphanet.R
deleted file mode 100644
index 6176906..0000000
--- a/R/add_prevalence_orphanet.R
+++ /dev/null
@@ -1,42 +0,0 @@
-add_prevalence_orphanet <- function(phenos=NULL,
- id_col="disease_id",
- drop_na=TRUE,
- verbose=TRUE){
- # devoptera::args2vars(add_prevalence_orphanet)
-
- mean_prevalence <- NULL;
- #### Example phenos data
- if(is.null(phenos)){
- phenos <- make_phenos_dataframe(add_disease_data = TRUE,
- include_mondo = FALSE,
- verbose = verbose)
- } else {
- phenos <- data.table::copy(phenos)
- }
- #### Add MONDO IDs ####
- phenos <- add_mondo(phenos = phenos,
- id_col = id_col,
- verbose = verbose)
- #### Get Orphanet data ####
- dprev <- get_orphanet_epidemiology(agg_by=c("MONDO_ID","id","Name"))
- phenos2 <- data.table::merge.data.table(
- phenos,
- dprev,
- by="MONDO_ID",
- all.x=TRUE
- )[order(-mean_prevalence)]
- #### Drop NAs ####
- if(isTRUE(drop_na)) phenos2[!is.na(mean_prevalence),]
- #### Report missing ####
- add_prevalence_report(phenos = phenos,
- phenos2 = phenos2,
- id_col = sort(
- unique(c(id_col,
- "hpo_id",
- "disease_id",
- "MONDO_ID")
- )
- ),
- verbose = verbose)
- return(phenos2)
-}
diff --git a/R/add_prevalence_report.R b/R/add_prevalence_report.R
index dd3e028..d8e7801 100644
--- a/R/add_prevalence_report.R
+++ b/R/add_prevalence_report.R
@@ -1,11 +1,11 @@
add_prevalence_report <- function(phenos,
phenos2,
- id_col,
+ input_col,
verbose=TRUE){
mean_prevalence <- NULL;
phenos2 <- data.table::copy(phenos2)
phenos2 <- phenos2[!is.na(mean_prevalence),]
- for(idc in id_col){
+ for(idc in input_col){
n_end <- length(unique(phenos2[[idc]]))
n_start <- length(unique(phenos[[idc]]))
messager(
diff --git a/R/add_severity.R b/R/add_severity.R
index 40db4d6..7f322db 100644
--- a/R/add_severity.R
+++ b/R/add_severity.R
@@ -1,3 +1,4 @@
+#' @describeIn add_ add_
#' Add HPO modifiers
#'
#' Annotate each HPO with modifier terms, including
@@ -28,21 +29,17 @@ add_severity <- function(phenos,
hpo = get_hpo(),
all.x = TRUE,
allow.cartesian = FALSE,
- severity_threshold = NULL,
- verbose = TRUE){
-
- # devoptera::args2vars(add_severity)
+ severity_threshold = NULL){
Severity_score <- NULL;
if(!all(c("modifier","modifier_name") %in% names(phenos))){
- messager("Annotating phenos with modifiers",v=verbose)
+ messager("Annotating phenos with modifiers")
phenos <- add_disease(phenos = phenos,
all.x = all.x,
- allow.cartesian = allow.cartesian,
- verbose = verbose)
+ allow.cartesian = allow.cartesian)
hpo_mod <- pkg_data("hpo_modifiers")
#### Aggregate to HPO level ####
- # hpo_agg <- hpo_modifiers_agg(dt = hpo_mod,
+ # hpo_agg <- hpo_modifiers_agg(dat = hpo_mod,
# by = by)
#### Add diseases ####
phenos <- data.table::merge.data.table(
diff --git a/R/add_tier.R b/R/add_tier.R
index 528d21f..cbbb7f1 100644
--- a/R/add_tier.R
+++ b/R/add_tier.R
@@ -1,3 +1,4 @@
+#' @describeIn add_ add_
#' Add severity Tiers
#'
#' Add severity Tier for each HPO ID, in accordance with the
diff --git a/R/adjacency_matrix.R b/R/adjacency_matrix.R
deleted file mode 100644
index c8b7369..0000000
--- a/R/adjacency_matrix.R
+++ /dev/null
@@ -1,55 +0,0 @@
-#' Adjacency matrix
-#'
-#' Create adjacency matrix of HPO child-parent relationships.
-#' @param terms Character vector of term IDs to include.
-#' @param remove_terms Character vector of term IDs to exclude.
-#' @param method Method to use to create the adjacency matrix.
-#' @inheritParams make_phenos_dataframe
-#' @inheritParams ontologyIndex::get_term_descendancy_matrix
-#' @returns adjacency matrix
-#'
-#' @export
-#' @importFrom ontologyIndex get_term_descendancy_matrix
-#' @examples
-#' terms <- get_hpo()$id[seq(1000)]
-#' adjacency <- adjacency_matrix(terms = terms)
-adjacency_matrix <- function(hpo = get_hpo(),
- terms = unique(hpo$id),
- remove_terms=grep(":",terms,
- invert = TRUE, value = TRUE),
- method = "HPOExplorer",
- verbose = TRUE) {
- #### Filter terms
- if(is.null(terms)){
- terms <- hpo$id
- }
- terms <- terms[!terms %in% remove_terms]
- #### Report
- messager("Creating adjacency matrix for",
- formatC(length(terms),big.mark = ","),"term(s).",
- v=verbose)
- if(tolower(method)=="ontologyindex"){
- #### ontologyIndex method ####
- ## Returns same object but not sure if it's the same in all conditions
- adjacency <- ontologyIndex::get_term_descendancy_matrix(ontology = hpo,
- terms = terms) *1
- } else {
- #### Custom script #####
- terms <- unique(terms)
- terms <- terms[terms %in% hpo$id]
- if(length(terms)==0) stop("0 valid HPO IDs (terms) provided.")
- size <- length(terms)
- adjacency <- matrix(nrow = size,
- ncol = size,
- dimnames = list(terms, terms))
- adjacency[is.na(adjacency)] <- 0
- for (id in terms) {
- children <- hpo$children[[id]]
- children <- children[children %in% terms]
- if(length(children)>0){
- adjacency[id, children] <- 1
- }
- }
- }
- return(adjacency)
-}
diff --git a/R/annotate_phenos.R b/R/annotate_phenos.R
index ac2eb46..33304d5 100644
--- a/R/annotate_phenos.R
+++ b/R/annotate_phenos.R
@@ -1,21 +1,18 @@
+#' @describeIn main main
#' Annotate phenotypes
#'
#' Annotate phenotypes \link[data.table]{data.table} without various types
#' of metadata.
-#' @inheritParams make_network_object
-#' @inheritParams make_phenos_dataframe
-#'
#' @export
#' @examples
#' phenos <- example_phenos()
#' phenos2 <- annotate_phenos(phenos)
annotate_phenos <- function(phenos,
hpo = get_hpo(),
- adjacency = NULL,
##### Phenotype metadata ####
add_ont_lvl_absolute = TRUE,
add_ont_lvl_relative = FALSE,
- add_info_contents = FALSE,
+ add_info_content = TRUE,
add_description = TRUE,
#### Disease/symptom metadata ####
add_disease_data = FALSE,
@@ -30,76 +27,58 @@ annotate_phenos <- function(phenos,
#### Extra #####
add_hoverboxes = FALSE,
columns = list_columns(),
- interactive = TRUE,
- verbose = TRUE){
+ interactive = TRUE){
#### Add ontology levels: absolute ####
if(isTRUE(add_ont_lvl_absolute)){
phenos <- add_ont_lvl(phenos = phenos,
hpo = hpo,
- absolute = TRUE,
- verbose = verbose)
+ absolute = TRUE)
}
#### Add ontology levels: relative ####
if(isTRUE(add_ont_lvl_relative)){
phenos <- add_ont_lvl(phenos = phenos,
hpo = hpo,
- adjacency = adjacency,
- absolute = FALSE,
- verbose = verbose)
+ absolute = FALSE)
}
#### Add info content ####
if(isTRUE(add_info_content)){
phenos <- add_info_content(phenos = phenos,
- hpo = hpo,
- verbose = verbose)
+ hpo = hpo)
}
- phenos <- add_hpo_definition(phenos = phenos,
- verbose = verbose)
- phenos <- add_ancestor(phenos = phenos,
- verbose = verbose)
+ phenos <- add_hpo_definition(phenos = phenos)
+ phenos <- add_ancestor(phenos = phenos)
if(isTRUE(add_ndiseases)){
- phenos <- add_ndisease(phenos = phenos,
- verbose = verbose)
+ phenos <- add_ndisease(phenos = phenos)
}
#### Add age of onset ####
if(isTRUE(add_onsets)){
- phenos <- add_onset(phenos = phenos,
- verbose = verbose)
+ phenos <- add_onset(phenos = phenos)
}
#### Add age of death ####
if(isTRUE(add_deaths)){
phenos <- add_death(phenos = phenos,
- allow.cartesian = TRUE,
- verbose = verbose)
+ allow.cartesian = TRUE)
}
#### Add Tiers ####
if(isTRUE(add_tiers)){
- phenos <- add_tier(phenos = phenos,
- verbose = verbose)
+ phenos <- add_tier(phenos = phenos)
}
#### Add Severities ####
if(isTRUE(add_severities)){
- phenos <- add_severity(phenos = phenos,
- verbose = verbose)
+ phenos <- add_severity(phenos = phenos)
}
#### Add phenotype-disease freqs ####
if(isTRUE(add_pheno_frequencies)){
- phenos <- add_pheno_frequency(phenos = phenos,
- verbose = verbose)
+ phenos <- add_pheno_frequency(phenos = phenos)
}
#### Add phenotype-disease freqs ####
if(isTRUE(add_disease_definitions)){
- phenos <- add_disease_definition(phenos = phenos,
- include_mondo = include_mondo,
- verbose = verbose)
+ phenos <- add_mondo(phenos = phenos)
}
#### Add hoverboxes ####
if(isTRUE(add_hoverboxes)){
- phenos <- make_hoverboxes(phenos = phenos,
- interactive = interactive,
- columns = columns,
- verbose = verbose)
+ phenos <- KGExplorer::add_hoverboxes(g=phenos)
}
return(phenos)
}
diff --git a/R/as_ascii.R b/R/as_ascii.R
index 4a44e4e..7e0a1b6 100644
--- a/R/as_ascii.R
+++ b/R/as_ascii.R
@@ -1,14 +1,14 @@
-as_ascii <- function(dt,
- cols=names(dt)){
- cols <- cols[cols %in% names(dt)]
+as_ascii <- function(dat,
+ cols=names(dat)){
+ cols <- cols[cols %in% names(dat)]
func <- function(v){
Encoding(v) <- "latin1"
iconv(v, "latin1", "UTF-8")
}
for(col in cols){
- if(is.character(dt[[col]])){
- dt[[col]] <- func(dt[[col]])
+ if(is.character(dat[[col]])){
+ dat[[col]] <- func(dat[[col]])
}
}
- return(dt)
+ return(dat)
}
diff --git a/R/clear_cache.R b/R/clear_cache.R
deleted file mode 100644
index 976c04c..0000000
--- a/R/clear_cache.R
+++ /dev/null
@@ -1,26 +0,0 @@
-#' Clear cache
-#'
-#' Remove all data cached by \pkg{HPOExplorer}.
-#' @param save_dir Path to cache directory.
-#' @param verbose Print messages.
-#' @inheritParams base::unlink
-#' @inheritDotParams base::unlink
-#' @returns Null.
-#'
-#' @export
-#' @importFrom tools R_user_dir
-#' @examples
-#' \dontrun{
-#' clear_cache()
-#' }
-clear_cache <- function(save_dir=tools::R_user_dir(package = "HPOExplorer",
- which = "cache"),
- recursive=TRUE,
- verbose=TRUE,
- ...
- ){
-
- f <- list.files(save_dir,recursive = recursive)
- messager("Clearing",length(f),"cached files from:",save_dir,v=verbose)
- unlink(x = save_dir,recursive = recursive,...)
-}
diff --git a/R/convert_ontology.R b/R/convert_ontology.R
deleted file mode 100644
index fbc93f4..0000000
--- a/R/convert_ontology.R
+++ /dev/null
@@ -1,99 +0,0 @@
-#' Convert ontology
-#'
-#' Convert an \link[ontologyIndex]{ontology_index} to
-#' a number of other useful formats.
-#' @param ont An \link[ontologyIndex]{ontology_index} object.
-#' @param to A character string specifying the format to convert to.
-#' @param as_sparse If TRUE, return a sparse matrix where possible.
-#' @inheritParams adjacency_matrix
-#' @returns An object of the specified format.
-#'
-#' @export
-#' @importFrom ontologyIndex get_term_descendancy_matrix
-#' @importFrom stats as.dist hclust cutree
-#' @examples
-#' ont <- get_hpo()
-#' terms <- head(ont$id,100)
-#' obj <- convert_ontology(ont=ont, terms=terms, to="igraph_dist_hclust")
-convert_ontology <- function(ont=get_hpo(),
- terms=unique(ont$id),
- remove_terms=grep(":",terms,
- invert = TRUE, value = TRUE),
- to=c("adjacency",
- "descendancy",
- "dist",
- "dist_hclust",
- "dist_hclust_dendrogram",
- "clusters",
- "igraph",
- "igraph_dist",
- "igraph_dist_hclust",
- "igraph_dist_hclust_dendrogram",
- "tidygraph",
- "data.frame",
- "data.table"),
- as_sparse=FALSE){
- # devoptera::args2vars(convert_ontology, reassign = TRUE)
- to <- match.arg(to)
- if(to=="tidygraph") requireNamespace("tidygraph")
- if(to=="igraph") requireNamespace("igraph")
- terms <- terms[!terms %in% remove_terms]
-
- if(to=="adjacency"){
- obj <- adjacency_matrix(hpo = ont,
- terms = terms,
- remove_terms = remove_terms)
- } else if(to=="descendancy"){
- obj <- ontologyIndex::get_term_descendancy_matrix(ontology = ont,
- terms = terms)
- } else if(to=="dist"){
- adj <- convert_ontology(ont, terms, remove_terms, to="adjacency")
- # obj <- stats::dist(adj) ### seems to take forever
- obj <- stats::as.dist(abs(adj-max(adj)))
- } else if(to=="dist_hclust"){
- d <- convert_ontology(ont, terms, remove_terms, to="dist")
- obj <- stats::hclust(d)
- } else if(to=="dist_hclust_dendrogram"){
- dh <- convert_ontology(ont, terms, remove_terms, to="dist_hclust")
- obj <- stats::as.dendrogram(dh)
- }else if(to=="clusters"){
- hc <- convert_ontology(ont, terms, remove_terms, to="hclust")
- obj <- stats::cutree(hc,k=10)
- } else if(to=="igraph"){
- adj <- convert_ontology(ont, terms, remove_terms, to="adjacency")
- obj <- igraph::graph_from_adjacency_matrix(adj)
- } else if (to=="igraph_dist"){
- g <- convert_ontology(ont, terms, remove_terms, to="igraph")
- obj <- igraph::distances(g)
- } else if(to=="igraph_dist_hclust"){
- gd <- convert_ontology(ont, terms, remove_terms, to="igraph_dist")
- if(any(is.infinite(gd))) gd[is.infinite(gd)] <- max(gd[!is.infinite(gd)])
- obj <- stats::hclust(stats::as.dist(gd))
- } else if(to=="igraph_dist_hclust_dendrogram"){
- gdh <- convert_ontology(ont, terms, remove_terms, to="igraph_dist_hclust")
- obj <- stats::as.dendrogram(gdh)
- } else if(to=="tidygraph"){
- g <- convert_ontology(ont, terms, remove_terms, to="igraph")
- obj <- tidygraph::as_tbl_graph(g)
- } else if(to=="data.frame"){
- obj <- as.data.frame(ont)
- if(!is.null(terms)) obj <- obj[terms,]
- if(!is.null(remove_terms)) obj <- obj[!obj$id %in% remove_terms,]
- } else if(to=="data.table"){
- df <- convert_ontology(ont, terms, remove_terms, to="data.frame")
- obj <- data.table::as.data.table(df)
- }else {
- stop("Unknown conversion type.")
- }
- #### Convert to sparse ####
- if(isTRUE(as_sparse)){
- if(methods::is(obj,"matrix")){
- obj <- Matrix::Matrix(adj, sparse=TRUE)
- }
- }
- ## Report
- messager("Converted ontology to:",to,
- if(as_sparse) paste0("(sparse)") else NULL
- )
- return(obj)
-}
diff --git a/R/data.R b/R/data.R
index 1831299..ea3c714 100644
--- a/R/data.R
+++ b/R/data.R
@@ -26,8 +26,7 @@
#' \code{
#' annot <- load_phenotype_to_genes(file = "phenotype.hpoa")
#' annot <- annot[onset!="",]
-#' annot$onset_name <- harmonise_phenotypes(phenotypes = annot$onset,
-#' as_hpo_ids = FALSE)
+#' annot$onset_name <- map_phenotypes(terms = annot$onset)
#' counts <- dplyr::group_by(annot, disease_id) |>
#' dplyr::summarise(hpo_ids=length(unique(hpo_id)),
#' onsets=length(unique(onset)))
@@ -46,10 +45,10 @@
#'
#' @description
#' HPO severity tiers automatically assigned using
-#' \link[HPOExplorer]{assign_tiers}.
+#' \link[HPOExplorer]{make_tiers}.
#' @source
#' \code{
-#' hpo_tiers_auto <- HPOExplorer:::assign_tiers(as_datatable=TRUE)
+#' hpo_tiers_auto <- HPOExplorer:::make_tiers(as_datatable=TRUE)
#' hpo_tiers_auto <- add_onset(phenos = hpo_tiers_auto)
#' #### Filter by onset criterion ####
#' ### Tier 1: Shortened life span: infancy
@@ -85,7 +84,7 @@
#' annot <- HPOExplorer::load_phenotype_to_genes(3)
#' annot <- annot[modifier!=""]
#' parse_mod <- function(x){
-#' unique(harmonise_phenotypes(strsplit(x,";")[[1]]))
+#' unique(map_phenotypes(strsplit(x,";")[[1]]))
#' }
#' annot <- annot[,modifier_name:=lapply(modifier,parse_mod)][modifier!="",]
#' annot <- annot[,.(modifier_name=unlist(modifier_name)),
@@ -119,7 +118,7 @@
#' \code{
#' annot <- load_phenotype_to_genes("phenotype.hpoa")
#' hpo_frequencies <- HPOExplorer:::parse_pheno_frequency(annot=annot)
-#' hpo_frequencies <- HPOExplorer:::as_ascii(dt=hpo_frequencies)
+#' hpo_frequencies <- HPOExplorer:::as_ascii(dat=hpo_frequencies)
#' data.table::setcolorder(hpo_frequencies,c("disease_id","hpo_id"))
#' usethis::use_data(hpo_frequencies, overwrite = TRUE)
#' }
@@ -134,9 +133,7 @@
#' Age of Death associated with each disease, and by extension, each phenotype.
#' @source
#' \code{
-#' terms <- ontologyIndex::get_descendants(ontology = get_hpo(),
-#' roots = "HP:0011420",
-#' exclude_roots = TRUE)
+#' terms <- simona::dag_offspring(get_hpo(), term = "HP:0011420")
#' aod <- lapply(stats::setNames(terms, terms),
#' function(hpo_id){
#' message("Extracting API data for",hpo_id)
@@ -144,8 +141,7 @@
#' }) |> data.table::rbindlist(fill = TRUE,
#' use.names = TRUE,
#' idcol = "AgeOfDeath")
-#' aod$AgeOfDeath_name <- harmonise_phenotypes(phenotypes = aod$AgeOfDeath,
-#' as_hpo_ids = FALSE)
+#' aod$AgeOfDeath_name <- map_phenotypes(terms = aod$AgeOfDeath)
#' #### Convert AoD to numeric scores ####
#' dict <- HPOExplorer:::hpo_dict(type="AgeOfDeath")
#' aod$AgeOfDeath_score <- dict[aod$AgeOfDeath_name]
@@ -167,14 +163,14 @@
#' @source
#' \code{
#' hpo <- get_hpo()
-#' ids <- unique(hpo$id)
+#' ids <- unique(hpo@terms)
#' hpo_id_to_omop <- oard_query_api(ids = ids, workers=10)
-#' id_col="hpo_id"
+#' input_col="hpo_id"
#' data.table::setnames(hpo_id_to_omop,
#' toupper(gsub("concept_","OMOP_",names(hpo_id_to_omop)))
#' )
-#' data.table::setnames(hpo_id_to_omop, "OMOP_CODE",id_col)
-#' hpo_id_to_omop <- hpo_id_to_omop[,c(id_col,"OMOP_ID","OMOP_NAME"),
+#' data.table::setnames(hpo_id_to_omop, "OMOP_CODE",input_col)
+#' hpo_id_to_omop <- hpo_id_to_omop[,c(input_col,"OMOP_ID","OMOP_NAME"),
#' with=FALSE]
#' usethis::use_data(hpo_id_to_omop, overwrite = TRUE)
#' }
@@ -190,7 +186,7 @@
#' @source
#' \code{
#' annot <- load_phenotype_to_genes(3)
-#' id_col <- "disease_id"
+#' input_col <- "disease_id"
#' ## NOTE: must keep batch_size=1 as the OARD API returns results only for
#' ## the IDs it can map. This leads to a mismatch between the input and output
#' ## which is exacerbated that the concept_id is automatically converted to
@@ -201,8 +197,8 @@
#' data.table::setnames(disease_id_to_omop,
#' toupper(gsub("concept_","OMOP_",names(disease_id_to_omop)))
#' )
-#' data.table::setnames(disease_id_to_omop,"QUERY",id_col)
-#' disease_id_to_omop <- disease_id_to_omop[,c(id_col,"OMOP_ID","OMOP_NAME"),
+#' data.table::setnames(disease_id_to_omop,"QUERY",input_col)
+#' disease_id_to_omop <- disease_id_to_omop[,c(input_col,"OMOP_ID","OMOP_NAME"),
#' with=FALSE]
#' usethis::use_data(disease_id_to_omop, overwrite = TRUE)
#' }
diff --git a/R/example_phenos.R b/R/example_phenos.R
index d3bf20b..3844718 100644
--- a/R/example_phenos.R
+++ b/R/example_phenos.R
@@ -1,3 +1,4 @@
+#' @describeIn main main
#' Example phenotypes dataframe
#'
#' Create a minimal example of a phenos dataframe.
@@ -8,6 +9,7 @@
#' @importFrom data.table data.table
#' @examples
#' phenos <- example_phenos()
-example_phenos <- function(i=seq(10)){
- data.table::data.table(hpo_id=grep("^HP:",get_hpo()$id, value = TRUE)[i])
+example_phenos <- function(i=seq(10),
+ hpo=get_hpo()){
+ data.table::data.table(hpo_id=grep("^HP:",hpo@terms, value = TRUE)[i])
}
diff --git a/R/filter_descendants.R b/R/filter_descendants.R
new file mode 100644
index 0000000..9929cdb
--- /dev/null
+++ b/R/filter_descendants.R
@@ -0,0 +1,39 @@
+#' @describeIn filter_ filter_
+#' Filter descendants
+#'
+#' Subset a \code{phenos} data.table to only
+#' descendants of an ancestor HPO ID term.
+#' @param ancestor Phenotype names (or HPO ID) of an ancestor in the
+#' \href{https://hpo.jax.org/}{Human Phenotype Ontology}.
+#' Only phenotypes that are descendants of this ancestor will be kept.
+#' Set to \code{NULL} (default) to skip this filtering step.
+#' @returns data.table of phenotypes, with additional columns:
+#' "ancestor", "ancestor_id"
+#'
+#' @export
+#' @examples
+#' phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay")
+#' phenos2 <- filter_descendants(phenos = phenos,
+#' ancestor = "Motor delay")
+filter_descendants <- function(phenos,
+ ancestor = NULL,
+ hpo = get_hpo()){
+
+ hpo_id <- NULL;
+
+ if(!is.null(ancestor)){
+ messager("Subsetting phenotypes to only ancestors of:",
+ paste(ancestor,collapse = ","))
+ ancestor_id <- map_phenotypes(terms = ancestor,
+ hpo = hpo,
+ to="id")
+ all_ids <- simona::dag_offspring(dag = hpo,
+ term=ancestor_id,
+ include_self = TRUE )
+ phenos <- phenos[hpo_id %in% all_ids,
+ ][,ancestor:=ancestor][,ancestor_id:=ancestor_id]
+ messager(formatC(nrow(phenos),big.mark = ","),
+ "associations remain after filtering.")
+ }
+ return(phenos)
+}
diff --git a/R/fix_hpo_ids.R b/R/fix_hpo_ids.R
index b62c489..7fe2498 100644
--- a/R/fix_hpo_ids.R
+++ b/R/fix_hpo_ids.R
@@ -2,39 +2,36 @@
#'
#' Fix missing HPO IDs by inferring their identity from the \code{hpo_name}
#' column using several methods.
-#' @param dt \link[data.table]{data.table}
+#' @param dat \link[data.table]{data.table}
#' containing a column named "hpo_name".
-#' @inheritParams make_phenos_dataframe
#' @returns data.table with the column "hpo_id"
#'
#' @keywords internal
-#' @importFrom data.table setcolorder setkey
-#' @importFrom stats setNames
-fix_hpo_ids <- function(dt,
+fix_hpo_ids <- function(dat,
phenotype_to_genes = load_phenotype_to_genes()) {
hpo_id <- NULL;
- dt$hpo_id <- harmonise_phenotypes(dt$hpo_name, as_hpo_ids = TRUE)
+ dat$hpo_id <- map_phenotypes(dat$hpo_name, to = "id")
dict <- stats::setNames(phenotype_to_genes$hpo_id,
phenotype_to_genes$hpo_name)
- dt[is.na(hpo_id)]$hpo_id <- dict[dt[is.na(hpo_id)]$hpo_name]
+ dat[is.na(hpo_id)]$hpo_id <- dict[dat[is.na(hpo_id)]$hpo_name]
#### Check if any IDs are still missing ####
- missing1 <- sum(is.na(dt$hpo_id))
+ missing1 <- sum(is.na(dat$hpo_id))
if(missing1>0){
stp <- paste(missing1,"HPO IDs are still missing.")
warning(stp)
}
- missing2 <- sum(!grepl("HP",dt$hpo_id))
+ missing2 <- sum(!grepl("HP",dat$hpo_id))
if(missing2>0){
stp <- paste(missing2,"HPO IDs are still missing.")
warning(stp)
}
- missing3 <- sum(is.na(dt$hpo_name))
+ missing3 <- sum(is.na(dat$hpo_name))
if(missing3>0){
stp <- paste(missing3,"hpo_names are still missing.")
warning(stp)
}
- data.table::setcolorder(dt,"hpo_id")
- data.table::setkey(dt,"hpo_id")
- return(dt)
+ data.table::setcolorder(dat,"hpo_id")
+ data.table::setkey(dat,"hpo_id")
+ return(dat)
}
diff --git a/R/get_data.R b/R/get_data.R
index cde6645..0549823 100644
--- a/R/get_data.R
+++ b/R/get_data.R
@@ -12,10 +12,7 @@
get_data <- function(file,
tag = "latest",
repo = "neurogenomics/HPOExplorer",
- save_dir = tools::R_user_dir(
- package = "HPOExplorer",
- which = "cache"
- ),
+ save_dir = KGExplorer::cache_dir(package = "HPOExplorer"),
add_version = FALSE,
overwrite = TRUE
){
diff --git a/R/get_gencc.R b/R/get_gencc.R
deleted file mode 100644
index 752bc34..0000000
--- a/R/get_gencc.R
+++ /dev/null
@@ -1,74 +0,0 @@
-#' Get GenCC
-#'
-#' Get phenotype-gene evidence score from the
-#' \href{https://thegencc.org/}{Gene Curation Coalition}.
-#' Data downloaded from \href{https://search.thegencc.org/download}{here}.\cr
-#' \emph{NOTE:} Due to licensing restrictions, a GenCC download does not
-#' include OMIM data. OMIM data can be accessed and downloaded through
-#' \href{https://omim.org/downloads/}{OMIM}.\cr
-#' \emph{NOTE:} GenCC does not currently have any systematic versioning.
-#' There for the \code{attr(obj,"version")} attribute is set to the date it was
-#' downloaded and cached by \link[HPOExplorer]{get_gencc}.
-#'
-#' @param dict A named vector of evidence score mappings.
-#' See \href{https://thegencc.org/faq.html#validity-termsdelphi-survey}{here}
-#' for more information.
-#' @inheritParams add_evidence
-#' @inheritParams get_data
-#' @returns \link[data.table]{data.table} with columns:
-#' \itemize{
-#' \item{"disease_id": }{Disease ID.}
-#' \item{"gene_symbol": }{Gene symbol.}
-#' \item{"evidence_score": }{Evidence score.}
-#' }
-#'
-#' @export
-#' @import data.table
-#' @importFrom utils download.file
-#' @examples
-#' d <- get_gencc()
-get_gencc <- function(agg_by=c("disease_id",
- "gene_symbol"),
- dict = c("Definitive"=6,# GENCC:100001
- "Strong"=5, # GENCC:100002
- "Moderate"=4, # GENCC:100003
- "Supportive"=3, # GENCC:100009
- "Limited"=2, # GENCC:100004
- "Disputed Evidence"=1, # GENCC:100005
- "Refuted Evidence"=0, # GENCC:100006
- "No Known Disease Relationship"=0 # GENCC:100008
- ),
- save_dir=tools::R_user_dir("HPOExplorer",
- which="cache"),
- verbose=TRUE){
- # devoptera::args2vars(get_gencc)
-
- disease_id <- disease_original_curie <- classification_title <-
- evidence_score <- NULL;
-
- messager("Gathering data from GenCC.",v=verbose)
- URL <- "https://search.thegencc.org/download/action/submissions-export-csv"
- f <- file.path(save_dir,"genCC_submission.csv")
- if(!file.exists(f)){
- dir.create(save_dir, showWarnings = FALSE, recursive = TRUE)
- utils::download.file(URL, f)
- } else {
- messager("Importing cached file.",v=verbose)
- }
- d <- data.table::fread(f)
- d[,disease_id:=gsub("^Orphanet","ORPHA",disease_original_curie)]
- #### Encode evidence numerically ####
- d[,evidence_score:=dict[classification_title]]
- #### Aggregate so that there's 1 entry/gene/disease ####
- if(!is.null(agg_by)){
- d <- d[,list(evidence_score_min=min(evidence_score, na.rm = TRUE),
- evidence_score_max=max(evidence_score, na.rm = TRUE),
- evidence_score_mean=mean(evidence_score, na.rm=TRUE)),
- by=agg_by]
- }
- #### Add version ####
- attr(d,"version") <- format(file.info(f)$mtime, "%Y-%m-%d")
- get_version(obj=d, verbose=verbose)
- #### Return ####
- return(d)
-}
diff --git a/R/get_gene_lengths.R b/R/get_gene_lengths.R
deleted file mode 100644
index 5492d2b..0000000
--- a/R/get_gene_lengths.R
+++ /dev/null
@@ -1,42 +0,0 @@
-get_gene_lengths <- function(gene_list,
- keep_seqnames = c(seq(22),"X","Y"),
- ensembl_version = 75,
- verbose = TRUE){
-
- requireNamespace("ensembldb")
- requireNamespace("AnnotationFilter")
-
- gene_list <- unique(gene_list)
- messager("Gathering metadata for",length(gene_list),"unique genes.",
- v=verbose)
- if(is.null(keep_seqnames)){
- keep_seqnames <- eval(formals(get_gene_lengths)$keep_seqnames)
- }
- #### Standardise seqnames ####
- keep_seqnames <- unique(
- c(tolower(keep_seqnames),
- paste0("chr",keep_seqnames),
- keep_seqnames)
- )
- #### Get gene reference database ####
- if(ensembl_version==75){
- #### Outdated ref ####
- requireNamespace("EnsDb.Hsapiens.v75")
- txdb <- EnsDb.Hsapiens.v75::EnsDb.Hsapiens.v75
- } else {
- #### Updated ref ####
- requireNamespace("AnnotationHub")
- hub <- AnnotationHub::AnnotationHub()
- q <- AnnotationHub::query(hub, c("EnsDb",ensembl_version,"sapiens"))
- txdb <- q[[length(q)]]
- }
- #### Set
- tx_gr <- ensembldb::genes(
- txdb,
- columns = c(ensembldb::listColumns(txdb, "gene"), "entrezid"),
- filter=AnnotationFilter::AnnotationFilterList(
- AnnotationFilter::SymbolFilter(value = gene_list),
- AnnotationFilter::SeqNameFilter(value = keep_seqnames)
- ))
- return(tx_gr)
-}
diff --git a/R/get_gene_lists.R b/R/get_gene_lists.R
index 043a9db..044d694 100644
--- a/R/get_gene_lists.R
+++ b/R/get_gene_lists.R
@@ -25,10 +25,9 @@ get_gene_lists <- function(phenotypes,
# devoptera::args2vars(get_gene_lists)
hpo_id <- NULL;
- hpo_ids <- harmonise_phenotypes(phenotypes = phenotypes,
- hpo = hpo,
- as_hpo_ids = TRUE,
- verbose = TRUE)
+ hpo_ids <- map_phenotypes(terms = phenotypes,
+ hpo = hpo,
+ to="id")
p2g <- phenotype_to_genes[hpo_id %in% hpo_ids]
#### Return at data.table #####
if(isFALSE(as_list)) return(p2g)
diff --git a/R/get_hpo.R b/R/get_hpo.R
index 59d0560..d0b41ee 100644
--- a/R/get_hpo.R
+++ b/R/get_hpo.R
@@ -1,6 +1,7 @@
+#' @describeIn get_ get_
#' Get Human Phenotype Ontology (HPO)
#'
-#' Updated version of Human Phenotype Ontology (HPO) from 2023-10-09.
+#' Updated version of Human Phenotype Ontology (HPO).
#' Created from the OBO files distributed by the HPO project's
#' \href{https://github.com/obophenotype/human-phenotype-ontology}{GitHub}.
#'
@@ -8,26 +9,28 @@
#' Note that the maximum ontology level depth in the 2016 version was 14,
#' whereas in the 2023 version the maximum ontology level depth is 16
#' (due to an expansion of the HPO).
-#' @source \href{https://bioportal.bioontology.org/ontologies/HP}{BioPortal}
-#' \code{
-#' hpo <- HPOExplorer:::make_ontology(upload_tag="latest")
-#' }
-#' @inheritParams get_data
-#' @inheritParams piggyback::pb_download
-#' @importFrom tools R_user_dir
-#' @returns \link[ontologyIndex]{ontology_index} object.
+#' @inheritParams KGExplorer::add_ancestors
+#' @inheritDotParams KGExplorer::get_ontology
+#' @returns \link[simona]{ontology_DAG} object.
#'
#' @export
+#' @import KGExplorer
#' @examples
#' hpo <- get_hpo()
-get_hpo <- function(save_dir=tools::R_user_dir("HPOExplorer",
- which="cache"),
- tag = "latest",
- overwrite = TRUE){
- #### ontologyIndex data outdated, from 2016. Don't use. ####
- # utils::data("hpo",package = "ontologyIndex")
- get_data(file = "hp-base.rds",
- tag = tag,
- overwrite = overwrite,
- save_dir = save_dir)
+get_hpo <- function(lvl,
+ add_ancestors = TRUE,
+ force_new = FALSE,
+ ...){
+ save_dir <- KGExplorer::cache_dir(package = "HPOExplorer")
+ file <- file.path(save_dir,"hpo.rds")
+ if(!file.exists(file) || isTRUE(force_new)){
+ ont <- KGExplorer::get_ontology(name = "hp",
+ add_ancestors = add_ancestors,
+ force_new = force_new,
+ ...)
+ saveRDS(ont,file)
+ } else {
+ ont <- readRDS(file)
+ }
+ return(ont)
}
diff --git a/R/get_hpo_id_direct.R b/R/get_hpo_id_direct.R
index 46a8702..f267289 100644
--- a/R/get_hpo_id_direct.R
+++ b/R/get_hpo_id_direct.R
@@ -1,16 +1,15 @@
+#' @describeIn get_ get_
#' Get HPO term ID direct
#'
#' Directly retrieves it from the HPO
-#' ontology object (found in the \pkg{ontologyIndex} package \code{data(hpo)}).
-#' @param phenotype One or more phenotype names in to get HPO IDs for.
-#' @inheritParams make_phenos_dataframe
+#' ontology object.
#' @returns The HPO ID(s) of phenotype(s)
#'
#' @export
#' @examples
-#' phenotype = "Phenotypic abnormality"
-#' pheno_abnormality_id <- get_hpo_id_direct(phenotype = phenotype)
-get_hpo_id_direct <- function(phenotype,
+#' term = "Phenotypic abnormality"
+#' pheno_abnormality_id <- get_hpo_id_direct(term = term)
+get_hpo_id_direct <- function(term,
hpo = get_hpo()) {
- return(hpo$id[which(phenotype == hpo$name)])
+ return(hpo@terms[which(term == hpo@elementMetadata$name)])
}
diff --git a/R/get_kg_monarch.R b/R/get_kg_monarch.R
deleted file mode 100644
index 7890f70..0000000
--- a/R/get_kg_monarch.R
+++ /dev/null
@@ -1,42 +0,0 @@
-#' Get knowledge graph: Monarch
-#'
-#'
-#' Option 1: Use the \href{https://api.monarchinitiative.org/api/}{biolink API}
-#' to efficiently extract specific subset of data
-#' from the Monarch server.
-#' Option 2: Import the entire knowledge graph from the
-#' \href{https://data.monarchinitiative.org/monarch-kg/latest/}{Monarch server}.
-#' @source \href{https://pubmed.ncbi.nlm.nih.gov/37707514/}{BioThings Explorer}
-get_kg_monarch <- function(){
- ### Option 1 ####
- #
- # con <- neo4r::neo4j_api$new(
- # url = "https://api.monarchinitiative.org/api/",
- # user = "neo4j",
- # password = "plop"
- # )
- # jsonlite::read_json("https://api.monarchinitiative.org/api/bioentity/MONDO:0012990")
- # neo2R::graphRequest(graph = ,
- # endpoint = "/bioentity/",
- # postText = "MONDO:0012990")
- # neo <- neo2R::graphRequest("https://data.monarchinitiative.org/monarch-kg-dev/latest/monarch-kg.neo4j.dump", )
- # top_targets <- add_mondo(top_targets)
-
- ### Option 2 ####
- d <- data.table::fread("https://data.monarchinitiative.org/monarch-kg/latest/monarch-kg-denormalized-edges.tsv.gz")
-
- sort(unique(d$subject_category))
-
- d[subject_category== "biolink:Cell",]$subject_label |>
- gsub(pattern="[0-9]+",replacement="") |>
- # stringr::str_trunc(20) |>
- unique() |> sort()
-
- d2 <- d[#(subject_category=="biolink:Disease" & object_category=="biolink:PhenotypicFeature") |
- (subject_category=="biolink:PhenotypicFeature" & object_category=="biolink:Gene") |
- (subject_category=="biolink:Disease" & object_category=="biolink:Gene")]
-
- #### Use evidence_count as connection weights ####
- round(table(d$evidence_count,useNA = "always")/nrow(d)*100,6)
-
-}
diff --git a/R/get_max_ont_lvl.R b/R/get_max_ont_lvl.R
deleted file mode 100644
index e30286b..0000000
--- a/R/get_max_ont_lvl.R
+++ /dev/null
@@ -1,15 +0,0 @@
-get_max_ont_lvl <- function(hpo = get_hpo(),
- absolute = TRUE,
- exclude_top_lvl=FALSE){
- #### Get the maximum ontology level ####
- max_lvl <- 0
- for(i in seq(length(hpo$id))){
- tmp <- get_ont_lvl(term = hpo$id[[i]],
- hpo = hpo,
- absolute = absolute)
- if(tmp > max_lvl) max_lvl <- tmp
- }
- #### Exclude the top level term "All" (HP:0000001) ####
- if(isTRUE(exclude_top_lvl)) max_lvl <- max_lvl-1
- return(max_lvl)
-}
diff --git a/R/get_metadata_omim.R b/R/get_metadata_omim.R
deleted file mode 100644
index d246227..0000000
--- a/R/get_metadata_omim.R
+++ /dev/null
@@ -1,20 +0,0 @@
-get_metadata_omim <- function(save_dir = file.path(
- tools::R_user_dir("HPOExplorer",
- which="cache")),
- verbose = TRUE){
- Class.ID <- id <- NULL;
-
- messager("Importing OMIM metadata.",v=verbose)
- f <- file.path(save_dir,"OMIM.csv.gz")
- if(!file.exists(f)){
- dir.create(save_dir, showWarnings = FALSE, recursive = TRUE)
- utils::download.file(paste0(
- "https://data.bioontology.org/ontologies/OMIM/download?",
- "apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb&download_format=csv"
- ), f)
- }
- meta <- data.table::fread(f)
- colnames(meta) <- gsub(" ",".",colnames(meta))
- meta[,id:=paste0("OMIM:",basename(Class.ID))]
- return(meta)
-}
diff --git a/R/get_metadata_orphanet.R b/R/get_metadata_orphanet.R
deleted file mode 100644
index 6cb59ae..0000000
--- a/R/get_metadata_orphanet.R
+++ /dev/null
@@ -1,21 +0,0 @@
-get_metadata_orphanet <- function(save_dir = file.path(
- tools::R_user_dir("HPOExplorer",
- which="cache")),
- verbose = TRUE){
- Class.ID <- id <- NULL;
-
- options(download.file.method = "curl")
- messager("Importing Orphanet metadata.",v=verbose)
- f <- file.path(save_dir,"ORDO.csv.gz")
- if(!file.exists(f)){
- dir.create(save_dir, showWarnings = FALSE, recursive = TRUE)
- utils::download.file(paste0(
- "https://data.bioontology.org/ontologies/ORDO/download?",
- "apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb&download_format=csv"
- ), f)
- }
- meta <- data.table::fread(f)
- colnames(meta) <- gsub(" ",".",colnames(meta))
- meta[,id:=gsub("Orphanet_","ORPHA:",basename(Class.ID))]
- return(meta)
-}
diff --git a/R/get_monarch.R b/R/get_monarch.R
deleted file mode 100644
index 6d84282..0000000
--- a/R/get_monarch.R
+++ /dev/null
@@ -1,58 +0,0 @@
-#' Get Monarch
-#'
-#' Get key datasets from the Monarch Initiative server.
-#' @param file Short name of the file to retrieve.
-#' \itemize{
-#' \item{"gene_to_gene"}{Gene-to-gene mappings across species orthologous.}
-#' \item{"phenotype_to_gene"}{Phenotype-to-gene mappings for multiple species.}
-#' \item{"disease_to_model"}{Disease-to-model mappings for multiple
-#' model species.}
-#' \item{"phenotype_to_phenotype"}{Phenotype-to-phenotype mappings across
-#' species homologs.}
-#' }
-#' @param file_map Mapping between short name and full name of each file.
-#' @param domain Monarch Initiative server domain.
-#' @inheritDotParams data.table::fread
-#' @returns \link[data.table]{data.table}
-#'
-#' @export
-#' @examples
-#' dat <- get_monarch()
-get_monarch <- function(file=c("gene_to_gene",
- "phenotype_to_gene",
- "disease_to_model",
- "phenotype_to_phenotype"
- ),
- file_map=list(
- ## Gene-gene homology across 12 species
- gene_to_gene=
- paste0(
- "monarch-kg-dev/latest/tsv/all_associations/",
- "gene_to_gene_homology_association.all.tsv.gz"
- ),
- ## Gene-phenotype associations across 8 species
- phenotype_to_gene=
- paste0(
- "monarch-kg-dev/latest/tsv/all_associations/",
- "gene_to_phenotypic_feature_association.all.tsv.gz"
- ),
- disease_to_model=
- paste0(
- "latest/tsv/model_associations/",
- "model_disease.all.tsv.gz"
- ),
- phenotype_to_phenotype=
- paste0(
- "upheno2/current/upheno-release/all/",
- "upheno_mapping_all.csv"
- )
- ),
- domain="https://data.monarchinitiative.org",
- ...
- ){
-
- file <- match.arg(file)
- URL <- paste(domain,file_map[[file]], sep="/")
- dat <- data.table::fread(URL,...)
- return(dat)
-}
diff --git a/R/get_mondo.R b/R/get_mondo.R
deleted file mode 100644
index 33703a7..0000000
--- a/R/get_mondo.R
+++ /dev/null
@@ -1,28 +0,0 @@
-#' Get Mondo Disease Ontology
-#'
-#' Import \href{https://mondo.monarchinitiative.org/}{Mondo} as an R object.
-#' @source
-#' \code{
-#' mondo <- HPOExplorer:::make_ontology(file="mondo-base.obo",
-#' repo="monarch-initiative/mondo",
-#' upload_tag="latest")
-#' }
-#' @inheritParams get_data
-#' @inheritParams piggyback::pb_download
-#' @importFrom tools R_user_dir
-#' @returns \link[ontologyIndex]{ontology_index}
-#'
-#' @export
-#' @examples
-#' mondo <- get_mondo()
-get_mondo <- function(save_dir=tools::R_user_dir("HPOExplorer",
- which="cache"),
- tag = "latest",
- overwrite = TRUE){
- #### ontoProc data outdated, from 2021 Don't use. ####
- # mondo <- ontoProc::getMondoOnto()
- get_data(file = "mondo-base.rds",
- tag = tag,
- overwrite = overwrite,
- save_dir = save_dir)
-}
diff --git a/R/get_ont_lvl.R b/R/get_ont_lvl.R
deleted file mode 100644
index e7adaf0..0000000
--- a/R/get_ont_lvl.R
+++ /dev/null
@@ -1,73 +0,0 @@
-#' Get HPO term level
-#'
-#' In this function, ontology level refers to the number of generations of
-#' sub-phenotypes a term has below it in the HPO DAG. For example, the root of
-#' the HPO is term "HP:0000001", the longest path to a term with no child terms
-#' is 14. In other words there are 14 generations of "is-a" relationships below
-#' HP:0000001 and it is therefore at ontology level 14. A term with no
-#' sub-phenotypes below it (a leaf node), is at ontology level 0.
-#' @inheritParams make_phenos_dataframe
-#' @inheritParams make_network_object
-#' @inheritParams adjacency_matrix
-#' @returns Ontology level of HPO ID.
-#'
-#' @keywords internal
-#' @source
-#' \code{
-#' lvl_absolute <- get_ont_lvl(term = "HP:0000001", absolute=TRUE)
-#' lvl_relative <- get_ont_lvl(term = "HP:0000001", absolute=FALSE)
-#' }
-get_ont_lvl <- function(term,
- hpo = get_hpo(),
- adjacency = NULL,
- absolute = TRUE,
- verbose = FALSE) {
-
- messager("Getting ontology level for:", paste(term,collapse = ", "),
- v=verbose)
- #### Children method ####
- if(isTRUE(absolute)){
- children <- unique(
- setdiff(unlist(hpo$children[term]),
- term)
- )
- if (length(children) == 0) {
- return(0)
- } else {
- return(1 + get_ont_lvl(term = children,
- hpo = hpo,
- absolute = absolute,
- verbose = verbose)) #<- recursion..
- }
- } else {
- #### Parents method ####
- if(is.null(adjacency)){
- stp <- "Must supply adjacency when absolute=FALSE."
- stop(stp)
- }
- phenotypes <- rownames(adjacency)
- pos_parents <- hpo$parents[term]
- paths <- lapply(phenotypes, function(p){
- if (adjacency[p, term] == 1) {
- if (p %in% pos_parents) {
- 1 + get_ont_lvl(term = p,
- adjacency = adjacency,
- absolute = absolute,
- hpo = hpo) # <- recursion
- }
- }
- }) |> unlist()
- #### Check count ####
- if (length(paths) == 0) {
- return(0)
- } else {
- parents <- 0
- for (i in seq(length(paths))) {
- if (paths[[i]] > parents) {
- parents <- paths[[i]]
- }
- }
- }
- return(parents)
- }
-}
diff --git a/R/get_ont_lvls.R b/R/get_ont_lvls.R
deleted file mode 100644
index bfd2e9d..0000000
--- a/R/get_ont_lvls.R
+++ /dev/null
@@ -1,77 +0,0 @@
-#' Get ontology level for HPO terms
-#'
-#' For a given set of HPO terms, get their level
-#' within the hierarchically organised HPO ontology.
-#' Ontology level can be computed either absolute mode (\code{absolute=TRUE})
-#' where the entire ontology is considered when assigning levels, or
-#' relative mode (\code{absolute=FALSE}) where only a subset of the ontology
-#' that is connected to a given term is considered when assigning levels.
-#' Relative mode can be helpful when trying to make plot where nodes are
-#' scaled to the ontology level.
-#' @param absolute Make the levels absolute in the sense that they consider
-#' the entire HPO ontology (\code{TRUE}).
-#' Otherwise, levels will be relative to only the HPO terms that are in
-#' the provided subset of \code{terms} AND are directly adjacent (connected)
-#' to a given cluster of terms (\code{FALSE}).
-#' @param reverse If \code{TRUE}, ontology
-#' level numbers with be revered such that the level of the parent terms
-#' are larger than the child terms.
-#' @param exclude_top_lvl Exclude the top level term of the HPO
-#' (i.e. "All" (HP:0000001)) when computing \emph{absolute} levels.
-#' This argument is not used when computing \emph{relative} levels.
-#' @inheritParams make_phenos_dataframe
-#' @inheritParams make_network_object
-#' @inheritParams adjacency_matrix
-#' @returns A named vector of relative ontology level,
-#' where names are HPO Ids and
-#' value is relative ontology level.
-#'
-#' @export
-#' @importFrom stats setNames
-#' @examples
-#' terms <- ontologyIndex::get_descendants(ontology = get_hpo(),
-#' roots = "HP:0000002")
-#' lvls <- get_ont_lvls(terms = terms)
-#' lvls_abs <- get_ont_lvls(terms = terms, absolute=TRUE)
-get_ont_lvls <- function(terms,
- hpo = get_hpo(),
- adjacency = NULL,
- absolute = TRUE,
- exclude_top_lvl=TRUE,
- reverse = TRUE,
- verbose = TRUE) {
-
- terms <- unique(terms)
- messager("Getting",if(absolute)"absolute"else"relative",
- "ontology level for",
- formatC(length(terms),big.mark = ","),"HPO IDs.", v=verbose)
- #### Create adjacency matrix if needed ####
- if(isFALSE(absolute) &
- is.null(adjacency)){
- adjacency <- adjacency_matrix(terms = terms,
- hpo = hpo,
- verbose = verbose)
- }
- #### Get ontology levle for each HPO term ####
- hierarchy <- lapply(stats::setNames(terms,
- terms),
- function(term){
- get_ont_lvl(term = term,
- hpo = hpo,
- adjacency = adjacency,
- absolute = absolute,
- verbose = FALSE)
- }) |> unlist()
- #### Reverse levels ####
- if (isTRUE(reverse) &&
- isFALSE(absolute)) {
- hierarchy <- max(hierarchy) - hierarchy
- }
- if(isFALSE(reverse) &&
- isTRUE(absolute)){
- max_lvl <- get_max_ont_lvl(hpo = hpo,
- exclude_top_lvl = exclude_top_lvl)
- hierarchy <- max_lvl- hierarchy
- }
- return(hierarchy)
-}
diff --git a/R/get_ont_lvls2.R b/R/get_ont_lvls2.R
deleted file mode 100644
index db38514..0000000
--- a/R/get_ont_lvls2.R
+++ /dev/null
@@ -1,49 +0,0 @@
-#' Get relative ontology level for multiple HPO terms
-#'
-#' This calls the \code{get_ont_lvl} function on all phenotypes in a
-#' subset of the HPO. The subset chosen when creating the adjacency from the main
-#' adjacency matrix of all phenotypes. So, the phenotypes can be found in the
-#' row and column names of adjacency.
-#' @param reverse A boolean, if TRUE it will reverse the ontology
-#' level numbers so that
-#' the parent terms are larger than the child terms.
-#' @param absolute Make the levels absolute in the sense that they consider
-#' the entire HPO ontology (\code{TRUE}).
-#' Otherwise, levels will be relative to only the subset \code{terms} provided
-#' (\code{FALSE}).
-#' @inheritParams make_phenos_dataframe
-#' @inheritParams make_network_object
-#' @inheritParams adjacency_matrix
-#' @returns A named vector of relative ontology level,
-#' where names are HPO Ids and
-#' value is relative ontology level.
-#'
-#' @export
-#' @importFrom stats setNames
-#' @examples
-#' terms <- ontologyIndex::get_descendants(ontology = get_hpo(),
-#' roots = "HP:0000002")
-#' lvls <- get_ont_lvls2(terms = terms)
-get_ont_lvls2 <- function(terms,
- hpo = get_hpo(),
- adjacency =
- adjacency_matrix(terms = terms,
- hpo = hpo),
- absolute = FALSE,
- reverse = TRUE,
- verbose = TRUE) {
-
- messager("Getting ontology level for",
- formatC(length(terms),big.mark = ","),"HPO IDs.", v=verbose)
- # if(isTRUE(absolute)){
- # adjacency <- adjacency_matrix(hpo = hpo,
- # verbose = verbose)
- # }
-
-
-
- # if (isTRUE(reverse)) {
- # hierarchy <- max(hierarchy) - hierarchy
- # }
- # return(hierarchy)
-}
diff --git a/R/get_orphanet_epidemiology.R b/R/get_orphanet_epidemiology.R
deleted file mode 100644
index e37fd2c..0000000
--- a/R/get_orphanet_epidemiology.R
+++ /dev/null
@@ -1,79 +0,0 @@
-get_orphanet_epidemiology <- function(agg_by=c("MONDO_ID","id","Name"),
- include_mondo=TRUE){
- # https://www.orphadata.com/epidemiology/
- # tmp <- tempfile(fileext = ".xml")
- # download.file("https://www.orphadata.com/data/xml/en_product9_prev.xml",
- # destfile = tmp)
- # xml <- rvest::read_html(tmp)
-
- # xml <- xml2::read_xml(tmp)
- # xmlconvert::xml_to_df(xml, records.xpath = "/html")
- # xmls <- xml |>
- # rvest::html_nodes("Disorder") |>
- # # xml2::xml_find_all(".//Disorder") |>
- # xml2::as_list()# |>
- # tibble::as_tibble(xmls[[1]]) |>
- # tidyr::unnest_longer(col=c(OrphaCode,ExpertLink,
- # Name,DisorderType,DisorderGroup), simplify = TRUE) |>
- # tidyr::unnest_wider(col=PrevalenceList) |>
- # tidyr::unnest_longer(col = c(Source,PrevalenceType,PrevalenceQualification,ValMoy))
- #
- #
- # nodes <- xml2::xml_find_all(xml, ".//Disorder")
- # ## Preview structure
- # nodes[1]|> xml2::html_structure()
- # # xml2::xml_path()|>
- # # xml2::xml_text()
- #
- # ### Convert each node to a flattened data.table
- # xml2::xml_find_all(nodes[1:10], ".//Prevalence") |>
- # xml2::as_list() |>
- # purrr::flatten() |>
- # data.table::as.data.table()# |>
- # data.table::transpose()
-
-
- mean_prevalence <- Prevalence.ValMoy <- prevalence_denominator <- MONDO_ID <-
- NULL;
- ## Open in Excel and convert to CSV first ##
- path <- system.file("extdata", "orphanet_epidemiology.csv.gz",
- package = "HPOExplorer")
- d <- data.table::fread(path)
- #### Fix column names ####
- names(d) <- trimws(whitespace = "[.]",
- gsub("[.]+",".",
- gsub("/|#",".",
- gsub(paste("/DisorderList",
- "/Disorder",
- "/PrevalenceList",
- sep = "|"),
- "",
- names(d)
- )
- )
- )
- ) |> `names<-`(names(d))
- #### Prepare numeric prevalence data ####
- d[,prevalence_numerator:=`Prevalence.ValMoy`]
- d$prevalence_denominator <- gsub(
- " +","",
- stringr::str_split(d$Prevalence.PrevalenceClass.Name," / ",
- simplify = TRUE)[,2]
- ) |> as.numeric()
- d[,prevalence:=prevalence_numerator/prevalence_denominator*100]
-
- #### Add MONDO ID ####
- if(isTRUE(include_mondo)){
- d[,MONDO_ID:=mondo_dict(ids = id)]
- }
-
- if(!is.null(agg_by)){
- ## Compute mean prevalence
- dprev <- d[,list(n=.N, mean_prevalence=mean(prevalence, na.rm=TRUE)),
- by=agg_by #"Prevalence.PrevalenceType.Name"
- ][order(-mean_prevalence)]
- return(dprev)
- } else {
- return(d)
- }
-}
diff --git a/R/get_term_definition_api.R b/R/get_term_definition_api.R
index d90d767..f552d71 100644
--- a/R/get_term_definition_api.R
+++ b/R/get_term_definition_api.R
@@ -4,7 +4,6 @@
#' HPO term. If a \code{line_length} \> 0 is passed to the function, it will add
#' newlines every nth word. This can be useful when displaying the description
#' in plots with limited space.
-#' @param term One or more HPO IDs.
#' @returns Character vector of definitions.
#' @keywords internal
get_term_definition_api <- function(term,
diff --git a/R/get_term_definition_data.R b/R/get_term_definition_data.R
index df27352..b243652 100644
--- a/R/get_term_definition_data.R
+++ b/R/get_term_definition_data.R
@@ -1,7 +1,9 @@
get_term_definition_data <- function(term,
line_length,
hpo = get_hpo()){
- definitions <- hpo$def[term]
+ dict <- stats::setNames(hpo@elementMetadata$definition,
+ hpo@elementMetadata$id)
+ definitions <- dict[term]
if (line_length > 0) {
definitions <- substr(x=definitions, start=0, stop=line_length)
}
diff --git a/R/get_upheno.R b/R/get_upheno.R
deleted file mode 100644
index 4d29dad..0000000
--- a/R/get_upheno.R
+++ /dev/null
@@ -1,68 +0,0 @@
-#' Get UPHENO
-#'
-#' Get data from the \href{https://github.com/obophenotype/upheno}{
-#' Unified Phenotype Ontology (UPHENO)}.
-#'
-#' @param file Can be one of the following:
-#' \itemize{
-#' \item{"ontology"}{Creates an \link[ontologyIndex]{ontologyIndex} R object by
-#' importing the OBO file directly from the official
-#' \href{https://github.com/obophenotype/upheno}{UPHENO GitHub repository}.}
-#' \item{"bestmatches"}{Returns a merged table with the best matches between
-#' human and non-human homologous phenotypes (from multiple species).
-#' Distributed by the official
-#' \href{https://github.com/obophenotype/upheno/tree/master/mappings}{
-#' UPHENO GitHub repository}.}
-#' \item{"upheno_mapping"}{Return a merged table with matches bteween human
-#' and non-human homologous phenotypes (from multiple species).
-#' Distributed by the
-#' \href{https://data.monarchinitiative.org/upheno2/current/upheno-release/all/index.html}{
-#' Monarch Initiative server}.}
-#' }
-#' @import data.table
-#' @importFrom stringr str_split
-#' @returns \link[ontologyIndex]{ontologyIndex} or
-#' \link[data.table]{data.table}.
-#'
-#' @export
-#' @examples
-#' upheno <- get_upheno()
-get_upheno <- function(file=c("ontology",
- "bestmatches",
- "upheno_mapping")){
- file <- match.arg(file)
- if(file=="ontology"){
- requireNamespace("ontologyIndex")
- upheno <- ontologyIndex::get_OBO(
- "https://github.com/obophenotype/upheno-dev/raw/master/upheno.obo",
- extract_tags = "everything")
- return(upheno)
- }
- if(file=="bestmatches"){
- #### Fuzzy query ####
- ## Only between HPO and 3 ontologies
- base_url <- "http://purl.obolibrary.org/obo/upheno/mappings/"
- URLs <- paste0(base_url,
- c("hp-to-zp-bestmatches.tsv",
- "hp-to-mp-bestmatches.tsv",
- "hp-to-wbphenotype-bestmatches.tsv"))
- pheno_map <- lapply(URLs, function(x){
- data.table::fread(x)
- }) |> `names<-`(gsub("-bestmatches.tsv","",basename(URLs))) |>
- data.table::rbindlist(idcol = "map", fill = TRUE) |>
- `names<-`(c("map","id1","label1","id2",
- "label2","equivalence_score","subclass_score"))
- pheno_map$db2 <- stringr::str_split(pheno_map$id2,":", simplify = TRUE)[,1]
- return(pheno_map)
- }
- if (file=="upheno_mapping"){
- id1 <- id2 <- p1 <- p2 <- NULL;
- pheno_map <- get_monarch("phenotype_to_phenotype")
- pheno_map[,id1:=gsub("_",":",basename(p1))
- ][,id2:=gsub("_",":",basename(p2))]
- pheno_map$db1 <- stringr::str_split(pheno_map$id1,":", simplify = TRUE)[,1]
- pheno_map$db2 <- stringr::str_split(pheno_map$id2,":", simplify = TRUE)[,1]
- data.table::setkeyv(pheno_map,"id1")
- return(pheno_map)
- }
-}
diff --git a/R/get_version.R b/R/get_version.R
deleted file mode 100644
index f23d222..0000000
--- a/R/get_version.R
+++ /dev/null
@@ -1,34 +0,0 @@
-#' Get version
-#'
-#' Extract the precise version of the HPO release that the data object
-#' was built from. All HPO Releases can be found at the official
-#' \href{https://github.com/obophenotype/human-phenotype-ontology/releases}{
-#' HPO GitHub Releases page}.
-#' @param obj An object from
-#' \link[HPOExplorer]{get_hpo} or
-#' \link[HPOExplorer]{load_phenotype_to_genes}.
-#' @param verbose Print messages.
-#' @param return_version Return the version as a character string.
-#' @returns HPO Release version a character string.
-#'
-#' @export
-#' @examples
-#' obj <- get_hpo()
-#' get_version(obj=obj)
-get_version <- function(obj,
- return_version = FALSE,
- verbose = TRUE){
- ## Extract
- if(methods::is(obj,"ontology_index")){
- x <- grep("data-version:",attr(obj,"version"),value=TRUE)
- v <- paste0("v",
- trimws(gsub("data-version:|hp/releases/|/hp-base.owl","",x))
- )
- } else {
- v <- attr(obj,"version")
- }
- ## Print
- messager("+ Version:",v,v=verbose)
- ## Return
- if(isTRUE(return_version)) return(v)
-}
diff --git a/R/gpt_annot_check.R b/R/gpt_annot_check.R
index d52a6e0..939c917 100644
--- a/R/gpt_annot_check.R
+++ b/R/gpt_annot_check.R
@@ -27,8 +27,8 @@ gpt_annot_check <- function(annot = gpt_annot_read(),
# prior_ids <- unique(HPOExplorer::hpo_modifiers$hpo_id)
# new_ids <- unique(annot$hpo_id)
# length(new_ids)/length(prior_ids)
- # length(prior_ids)/length(hpo$id)
- # length(new_ids)/length(hpo$id)
+ # length(prior_ids)/length(hpo@terms)
+ # length(new_ids)/length(hpo@terms)
#### Check annotation consistency ####
nm <- names(annot)[!grepl("hpo_name|justification|hpo_id",names(annot),
ignore.case = TRUE)]
diff --git a/R/harmonise_phenotypes.R b/R/harmonise_phenotypes.R
deleted file mode 100644
index b3f6d30..0000000
--- a/R/harmonise_phenotypes.R
+++ /dev/null
@@ -1,83 +0,0 @@
-#' Harmonise phenotypes
-#'
-#' Harmonise a mixed vector of phenotype names (e.g. "Focal motor seizure")
-#' and HPO IDs (e.g. c("HP:0000002","HP:0000003")).
-#' @param phenotypes A character vector of phenotype names and/or HPO IDs.
-#' @param as_hpo_ids Return all \code{phenotypes} as HPO IDs (\code{TRUE}),
-#' or phenotype names (\code{FALSE}).
-#' @param ignore_case Ignore casing when searching for matches.
-#' @param keep_order Return a named list of the same length and order
-#' as \code{phenotypes}.
-#' If \code{FALSE}, return a named list of only the unique \code{phenotypes},
-#' sometimes in a different order.
-#' @param validate Remove any phenotype names or IDs that cannot be found in the
-#' \code{hpo}.
-#' @param invert Invert the keys/values of the dictionary,
-#' such that the key becomes the values (and vice versa).
-#' @param verbose Print messages.
-#' @inheritParams make_phenos_dataframe
-#' @returns Character vector
-#'
-#' @export
-#' @importFrom stats setNames
-#' @examples
-#' phenotypes <- c("Focal motor seizure","HP:0000002","HP:0000003")
-#' #### As phenotype names ####
-#' pheno_names <- harmonise_phenotypes(phenotypes=phenotypes)
-#' #### As HPO IDs ####
-#' pheno_ids <- harmonise_phenotypes(phenotypes=phenotypes, as_hpo_ids=TRUE)
-harmonise_phenotypes <- function(phenotypes,
- hpo = get_hpo(),
- as_hpo_ids = FALSE,
- validate = TRUE,
- ignore_case = TRUE,
- keep_order = TRUE,
- invert = FALSE,
- verbose = TRUE){
-
- # devoptera::args2vars(harmonise_phenotypes)
- phenotypes_og <- phenotypes
- phenotypes <- unique(phenotypes)
- hpo_ids <- grep("^HP:|^HP_",phenotypes,
- value = TRUE,
- ignore.case = ignore_case)
- ptypes <- phenotypes[!phenotypes %in% hpo_ids]
- #### Validate phenotypes ####
- if(isTRUE(as_hpo_ids)){
- messager("Translating all phenotypes to HPO IDs.",v=verbose)
- out <- stats::setNames(hpo_ids,hpo_ids)
- if(length(ptypes)>0) {
- id_dict <- stats::setNames(names(hpo$name),
- unname(hpo$name))
- out <- c(out,id_dict[ptypes])
- }
- if(isTRUE(validate)){
- out <- out[out %in% unname(hpo$id)]
- }
- } else {
- messager("Translating all phenotypes to names.",v=verbose)
- out <- stats::setNames(ptypes,ptypes)
- if(length(hpo_ids)>0){
- out <- c(out, hpo$name[hpo_ids])
- }
- if(isTRUE(validate)){
- out <- out[out %in% unname(hpo$name)]
- }
- }
- #### Return ####
- if(isFALSE(keep_order)){
- messager("+ Returning a dictionary of phenotypes",
- "(different order as input).",
- v=verbose)
- } else {
- messager("+ Returning a vector of phenotypes",
- "(same order as input).",
- v=verbose)
- out <- out[phenotypes_og]
- }
- #### Invert ####
- if(isTRUE(invert)){
- out <- stats::setNames(names(out), unname(out))
- }
- return(out)
-}
diff --git a/R/hpo_modifiers_agg.R b/R/hpo_modifiers_agg.R
index 376ec14..598eee1 100644
--- a/R/hpo_modifiers_agg.R
+++ b/R/hpo_modifiers_agg.R
@@ -1,11 +1,11 @@
-hpo_modifiers_agg <- function(dt,
+hpo_modifiers_agg <- function(dat,
by = c("disease_id","hpo_id")){
modifier <- modifier_name <- disease_name <- disease_name <- disease_id <-
Severity_score <- Severity_score_min <- hpo_id <- NULL;
by <- by[1]
if(by=="hpo_id"){
- dt2 <- dt[,list(
+ dt2 <- dat[,list(
modifier=paste(unique(modifier),collapse=";"),
modifier_name=paste(unique(modifier_name),collapse=";"),
modifier_count=paste(table(unlist(modifier_name)),collapse=";"),
@@ -16,7 +16,7 @@ hpo_modifiers_agg <- function(dt,
Severity_score_min=min(Severity_score, na.rm=TRUE)
), by=by]
} else if (by=="disease_id"){
- dt2 <- dt[,list(
+ dt2 <- dat[,list(
disease_name=disease_name,
modifier=paste(unique(modifier),collapse=";"),
modifier_name=paste(unique(modifier_name),collapse=";"),
diff --git a/R/hpo_to_matrix.R b/R/hpo_to_matrix.R
index fd8aff9..324c645 100644
--- a/R/hpo_to_matrix.R
+++ b/R/hpo_to_matrix.R
@@ -55,8 +55,7 @@ hpo_to_matrix <- function(terms = NULL,
phenotype_to_genes[,dummy:=1]
value.var <- "dummy"
} else if(grepl("^evidence_score",value.var)){
- phenotype_to_genes <- add_evidence(phenos = phenotype_to_genes,
- verbose = verbose)
+ phenotype_to_genes <- add_evidence(phenos = phenotype_to_genes)
} else if(!value.var %in% names(phenotype_to_genes)){
stp <- paste("value.var not found in phenotype_to_genes.")
stop(stp)
@@ -82,7 +81,10 @@ hpo_to_matrix <- function(terms = NULL,
#### Format and return ####
if(isTRUE(as_matrix) ||
isTRUE(run_cor)){
- X <- as.matrix(X_dt[,-meta_vars,with=FALSE])|> `rownames<-`(rn)
+ X <- dt_to_matrix(dat=X_dt,
+ omit_cols = meta_vars,
+ rownames = rn,
+ as_sparse = FALSE)
if(isTRUE(run_cor)){
messager("Computing all parwise correlations.",v=verbose)
X_cor <- stats::cor(X, method = method)
diff --git a/R/ids_to_mondo.R b/R/ids_to_mondo.R
deleted file mode 100644
index 97d738f..0000000
--- a/R/ids_to_mondo.R
+++ /dev/null
@@ -1,12 +0,0 @@
-mondo_dict <- function(mondo=get_mondo(),
- ids=NULL){
- xref <- unlist(mondo$xref)
- # xref <- xref[names(xref) %in% names(mondo$id)]
- dict <- xref |> invert_dict()
- names(dict) <- gsub("^Orphanet","ORPHA",names(dict))
- if(!is.null(ids)) {
- return(dict[ids])
- } else {
- return(dict)
- }
-}
diff --git a/R/igraph_to_ggnetwork.R b/R/igraph_to_ggnetwork.R
deleted file mode 100644
index b494293..0000000
--- a/R/igraph_to_ggnetwork.R
+++ /dev/null
@@ -1,5 +0,0 @@
-igraph_to_ggnetwork <- function(g){
- g |>
- igraph::simplify() |>
- ggnetwork::fortify()
-}
diff --git a/R/igraph_to_plotly.R b/R/igraph_to_plotly.R
deleted file mode 100644
index 0fdf6a9..0000000
--- a/R/igraph_to_plotly.R
+++ /dev/null
@@ -1,69 +0,0 @@
-#' igraph to plotly data
-#'
-#' Convert an igraph to data for a 3D plotly plot.
-#' @param dim Number of dimensions to create layout in.
-#' @inheritParams network_3d
-#' @returns Named list of data.frames.
-#'
-#' @keywords internal
-#' @importFrom data.table data.table as.data.table .SD :=
-igraph_to_plotly <- function(g,
- layout_func = igraph::layout.fruchterman.reingold,
- dim = 3,
- seed = 2023,
- verbose = TRUE){
-
- requireNamespace("igraph")
- .SD <- NULL;
-
- set.seed(seed)
- messager("Converting igraph to plotly data.",v=verbose)
- layout_coords <- cbind(
- name=names(igraph::V(g)),
- layout_func(g,dim=dim) |>
- data.table::as.data.table()|>
- `colnames<-`(c("x","y","z")[seq(dim)])
- )
- # d <- data.table::as.data.table(igraph::as_edgelist(g))
- d <- igraph::as_data_frame(g, what = "both")
- # d <- igraph::as_long_data_frame(g)
- # gnet <- data.table::data.table(
- # igraph_to_ggnetwork(g)
- # )[,-c("x","y","xend","yend")]
- # if("node_type" %in% names(gnet)){
- # gnet <- unique(gnet[node_type %in% c("hpo_name","ancestor_name")])
- # }
-
- #### Vertex data ####
- #### Merge layout coordinates with vertex metadata ####
- vdf <- merge(
- layout_coords,
- d$vertices,
- by = "name",
- all = TRUE
- )
- ## Make NAs into characters so that they still get assigned a point color
- cols <- names(vdf)[sapply(vdf, class) == 'character']
- vdf[, (cols) := lapply(.SD, replace,NA,"NA"),.SDcols=cols]
- #### Edge data ####
- edf <- data.table::merge.data.table(
- data.table::data.table(d$edges),
- layout_coords,
- by.x="from",
- by.y="name"
- ) |> data.table::merge.data.table(
- data.table::copy(layout_coords) |>
- data.table::setnames(c("x","y","z")[seq(dim)],
- c("xend","yend","zend")[seq(dim)]
- ),
- by.x="to",
- by.y="name"
- )
-
- if(!"hpo_name" %in% names(vdf)){
- vdf$hpo_name <- harmonise_phenotypes(phenotypes = vdf$hpo_id,
- as_hpo_ids = FALSE)
- }
- return(list(vertices=vdf,
- edges=edf))
-}
diff --git a/R/invert_dict.R b/R/invert_dict.R
deleted file mode 100644
index 14100f8..0000000
--- a/R/invert_dict.R
+++ /dev/null
@@ -1,3 +0,0 @@
-invert_dict <- function(dict){
- stats::setNames(names(dict),unname(dict))
-}
diff --git a/R/kde_surface.R b/R/kde_surface.R
deleted file mode 100644
index 416f842..0000000
--- a/R/kde_surface.R
+++ /dev/null
@@ -1,57 +0,0 @@
-#' Kernel Density Estimation (KDE) surface
-#'
-#' Compute a KDE surface based on the density of points along
-#' an x- and y-axis.
-#' @param xyz A data.frame containing the columns "x" and "y" (required).
-#' Can also include a third column "z" (optional).
-#' @param extend_kde Extend the limits of the KDE area by setting
-#' \code{extend_kde>1}.
-#' This makes the "mountains' of the KDE end more smoothly along the edges of
-#' the x/y coordinates.
-#' @param rescale_z Rescale the z-axis so that the KDE can be positioned
-#' below the original data points (when plotting them together).
-#' @inheritParams MASS::kde2d
-#' @returns
-#' \itemize{
-#' \item{\code{x, y}: }{
-#' The x and y coordinates of the grid points, vectors of length n.
-#' }
-#' \item{\code{z}: }{
-#' An \code{n[1]} by \code{n[2]} matrix of the estimated density:
-#' rows correspond to the value of \code{x}, columns to the value of \code{y}.
-#' }
-#' }
-#'
-#' @export
-#' @examples
-#' xyz <- data.frame(x=stats::rnorm(50),
-#' y=stats::rnorm(50),
-#' z=stats::rnorm(50))
-#' kd <- kde_surface(xyz = xyz)
-kde_surface <- function(xyz,
- n = 50,
- extend_kde = 1,
- rescale_z = TRUE){
- # devoptera::args2vars(kde_surface)
- requireNamespace("MASS")
- requireNamespace("scales")
-
- kd <- MASS::kde2d(x = xyz$x,
- y = xyz$y,
- n = n,
- lims = c(range(xyz$x),
- range(xyz$y)
- )*extend_kde)
- #### Rescale the z-axis so that the KDE plot is below the points ####
- if(isTRUE(rescale_z)){
- if("z" %in% names(xyz)){
- kd$z <- scales::rescale(x = kd$z,
- to = c(min(xyz$z*2),
- min(xyz$z)-.25))
- } else{
- wrn <- "Cannot rescale_z when z column not present in xyz data.frame."
- warning(wrn)
- }
- }
- return(kd)
-}
diff --git a/R/list_deaths.R b/R/list_deaths.R
index 932273d..9d91160 100644
--- a/R/list_deaths.R
+++ b/R/list_deaths.R
@@ -1,3 +1,4 @@
+#' @describeIn map_ map_
#' List age of death HPO terms
#'
#' List age of death phenotypes in the HPO.
@@ -10,22 +11,19 @@
#' deaths <- list_deaths()
list_deaths <- function(hpo = get_hpo(),
exclude = FALSE,# c("Miscarriage","Stillbirth","Prenatal death" )
- as_hpo_ids = FALSE,
- include_na = TRUE,
- verbose = TRUE){
- # devoptera::args2vars(list_opts)
-
- opts <- harmonise_phenotypes(phenotypes = names(hpo_dict(type = "AgeOfDeath")),
- hpo = hpo,
- as_hpo_ids = TRUE,
- keep_order = FALSE,
- invert = TRUE,
- verbose = verbose)
+ to = c("name","id"),
+ include_na = TRUE){
+ to <- match.arg(to)
+ opts <- map_phenotypes(terms = names(hpo_dict(type = "AgeOfDeath")),
+ hpo = hpo,
+ to="id",
+ keep_order = FALSE,
+ invert = TRUE)
if(!is.null(exclude)){
opts <- opts[!grepl(paste(exclude,collapse = "|"),opts,
ignore.case = TRUE)]
}
- if(isTRUE(as_hpo_ids)){
+ if(to=="id"){
ids <- names(opts)
if(isTRUE(include_na)){
ids <- c(ids,NA)
diff --git a/R/list_onsets.R b/R/list_onsets.R
index 5b36850..153cc47 100644
--- a/R/list_onsets.R
+++ b/R/list_onsets.R
@@ -1,8 +1,8 @@
+#' @describeIn map_ map_
#' List age of onset HPO terms
#'
#' List age of onset phenotypes in the HPO.
#' @param exclude HPO phenotype names to exclude.
-#' @param as_hpo_ids Return as a character vector vector HPO IDs only.
#' @param include_na Include NA values for onset.
#' @inheritParams make_phenos_dataframe
#' @returns Named list of HPO IDs.
@@ -12,22 +12,20 @@
#' onsets <- list_onsets()
list_onsets <- function(hpo = get_hpo(),
exclude = FALSE, #c("Antenatal","Fetal","Congenital")
- as_hpo_ids = FALSE,
+ to = c("name","id"),
include_na = TRUE,
verbose = TRUE){
- # devoptera::args2vars(list_onsets)
-
- opts <- harmonise_phenotypes(phenotypes = names(hpo_dict(type = "onset")),
- hpo = hpo,
- as_hpo_ids = TRUE,
- keep_order = FALSE,
- invert = TRUE,
- verbose = verbose)
+ to <- match.arg(to)
+ opts <- map_phenotypes(terms = names(hpo_dict(type = "onset")),
+ hpo = hpo,
+ to="id",
+ keep_order = FALSE,
+ invert = TRUE)
if(!is.null(exclude)){
opts <- opts[!grepl(paste(exclude,collapse = "|"),opts,
ignore.case = TRUE)]
}
- if(isTRUE(as_hpo_ids)){
+ if(to=="id"){
ids <- names(opts)
if(isTRUE(include_na)){
ids <- c(ids,NA)
diff --git a/R/load_decipher_genes.R b/R/load_decipher_genes.R
deleted file mode 100644
index f0f201b..0000000
--- a/R/load_decipher_genes.R
+++ /dev/null
@@ -1,27 +0,0 @@
-load_decipher_genes <- function(verbose=TRUE){
-
- requireNamespace("R.utils")
- disease_name <- disease_name <- ID <- `disease_id` <- NULL;
-
- messager("Loading DECIPHER genes.",v=verbose)
- #### UCSC bigbed file ####
- # "https://hgdownload.soe.ucsc.edu/gbdb/hg38/"
-
- #### EBI annotations file ####
- dat <- data.table::fread(
- "https://www.ebi.ac.uk/gene2phenotype/downloads/DDG2P.csv.gz")
- names(dat) <- gsub(" ","_",names(dat))
- dat[,disease_name:=tolower(disease_name)]
- ## Bizarrely, the do not provide their disease IDs in their own database.
- ## Have to instead infer from the HPO data
- d <- load_phenotype_to_genes("phenotype.hpoa")
- d <- d[grepl("DECIPHER",d$disease_id),]
- d[,ID:=disease_id][,disease_name:=tolower(disease_name)]
- dat <- data.table::merge.data.table(dat,
- d[,c("ID","disease_name")],
- by.x = "disease_name",
- by.y = "disease_name",
- all.x = TRUE)
- # sum(!is.na(dat$ID))
- return(dat)
-}
diff --git a/R/load_disease_genes.R b/R/load_disease_genes.R
deleted file mode 100644
index d7b1fdb..0000000
--- a/R/load_disease_genes.R
+++ /dev/null
@@ -1,31 +0,0 @@
-#' Load disease genes
-#'
-#' Load gene lists associated with each disease phenotype from:
-#' \itemize{
-#' \item{DECIPHER}
-#' \item{ORPHANET}
-#' \item{OMIM}
-#' }
-#' @param verbose Print messages.
-#' @returns data.table
-#'
-#' @export
-#' @importFrom data.table := rbindlist
-#' @examples
-#' dat <- load_disease_genes()
-load_disease_genes <- function(verbose=TRUE){
- # devoptera::args2vars(load_disease_genes)
-
- #### Import data ####
- decipher <- load_decipher_genes(verbose = verbose)
- orphanet <- load_orphanet_genes(verbose = verbose)
- omim <- load_omim_genes(verbose = verbose)
- #### Merge data ####
- nms <- c("disease_id","disease_name_og","gene_symbol")
- dgenes <- list(
- DECIPHER=decipher[,c("ID","disease_name","gene_symbol")] |> `names<-`(nms),
- ORPHANET=orphanet[,c("Orphanet_ID","phenotype","gene")] |> `names<-`(nms),
- OMIMG=omim[,c("OMIMID","Disease","GeneSym")] |> `names<-`(nms)
- ) |> data.table::rbindlist(fill = TRUE,use.names = TRUE, idcol = "Database")
- return(dgenes)
-}
diff --git a/R/load_harmonizome_genes.R b/R/load_harmonizome_genes.R
deleted file mode 100644
index e98b2af..0000000
--- a/R/load_harmonizome_genes.R
+++ /dev/null
@@ -1,11 +0,0 @@
-load_harmonizome_genes <- function(dataset){
- # dataset <- c("omim","clinvar")
- dataset <- tolower(dataset[1])
- baseurl <- paste0(
- "https://maayanlab.cloud/static/hdfs/harmonizome/data/",dataset
- )
- genes <- data.table::fread(paste0(baseurl,"/gene_list_terms.txt.gz"))
- terms <- data.table::fread(paste0(baseurl,"/attribute_list_entries.txt.gz"))
- dat <- data.table::merge.data.table(terms, genes)
- return(dat)
-}
diff --git a/R/load_omim_genes.R b/R/load_omim_genes.R
deleted file mode 100644
index 8be2f6e..0000000
--- a/R/load_omim_genes.R
+++ /dev/null
@@ -1,12 +0,0 @@
-load_omim_genes <- function(verbose=TRUE){
-
- messager("Loading OMIM genes.",v=verbose)
- #### From enrichr #####
- ## Outdated file
- # dat <- read_enrichr(file = "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=OMIM_Expanded")
-
- #### From Harmonizome ####
- # j <- jsonlite::fromJSON("https://maayanlab.cloud/Harmonizome/api/1.0/dataset/OMIM+Gene-Disease+Associations")
- dat <- load_harmonizome_genes(dataset = "omim")
- return(dat)
-}
diff --git a/R/load_orphanet_genes.R b/R/load_orphanet_genes.R
deleted file mode 100644
index 644ac3a..0000000
--- a/R/load_orphanet_genes.R
+++ /dev/null
@@ -1,35 +0,0 @@
-load_orphanet_genes <- function(verbose=TRUE){
-
- Orphanet_ID <- NULL;
- messager("Loading Orphanet genes.",v=verbose)
- # URL <- "https://hgdownload.soe.ucsc.edu/gbdb/hg38/bbi/orphanet/orphadata.bb"
- # bb <- rtracklayer::import(URL)
-
- #### From Orphanet XML file ####
- # (they don't provide the data in any easy-to-use format)
- # xml <- xml2::read_xml("https://www.orphadata.com/data/xml/en_product6.xml")
- # xml_list <- xml2::as_list(xml)
- # df <- lapply(xml_list$JDBOR$DisorderList, function(x){
- # data.table::data.table(
- # t(unlist(x[names(x)!="DisorderGeneAssociationList"])),
- # t(unlist(x$DisorderGeneAssociationList$DisorderGeneAssociation$Gene[
- # c("Symbol")
- # ])),
- # t(unlist(x$DisorderGeneAssociationList$DisorderGeneAssociation[
- # c("SourceOfValidation","DisorderGeneAssociationType","DisorderGeneAssociationStatus")
- # ]))
- # )
- # }) |> data.table::rbindlist(fill = TRUE)
-
- #### From enrichrR ####
- dat <- read_enrichr(file = "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=Orphanet_Augmented_2021")
- splt <- cbind(
- (
- data.table::as.data.table(
- stringr::str_split(dat$term,"ORPHA:",simplify = TRUE)) |>
- `colnames<-`(c("phenotype","Orphanet_ID"))
- )[,Orphanet_ID:=paste0("ORPHA:",Orphanet_ID)],
- gene=dat$gene
- )
- return(splt)
-}
diff --git a/R/load_phenotype_to_genes.R b/R/load_phenotype_to_genes.R
index cffb1ff..1ad0d48 100644
--- a/R/load_phenotype_to_genes.R
+++ b/R/load_phenotype_to_genes.R
@@ -20,8 +20,9 @@ load_phenotype_to_genes <- function(file = c("phenotype_to_genes.txt",
"genes_to_phenotype.txt",
"phenotype.hpoa"),
save_dir = file.path(
- tools::R_user_dir("HPOExplorer",
- which="cache"),
+ KGExplorer::cache_dir(
+ package = "HPOExplorer"
+ ),
"data"),
repo = paste(
"obophenotype",
@@ -49,7 +50,7 @@ load_phenotype_to_genes <- function(file = c("phenotype_to_genes.txt",
!overwrite){
messager("Reading cached RDS file:",file,v=verbose)
obj <- readRDS(path_rds)
- get_version(obj,verbose=verbose)
+ KGExplorer::get_version(obj,verbose=verbose)
return(obj)
}
#### Stored in GitHub Releases ####
@@ -72,7 +73,7 @@ load_phenotype_to_genes <- function(file = c("phenotype_to_genes.txt",
#### Add version ####
if(!is.null(attr(f,"version"))){
attr(phenotype_to_genes,"version") <- attr(f,"version")
- get_version(phenotype_to_genes,verbose=verbose)
+ KGExplorer::get_version(phenotype_to_genes, verbose=verbose)
}
#### Save RDS ####
## This lets up keep track of the version of the data without having to
diff --git a/R/make_hoverboxes.R b/R/make_hoverboxes.R
deleted file mode 100644
index 30c5355..0000000
--- a/R/make_hoverboxes.R
+++ /dev/null
@@ -1,69 +0,0 @@
-#' Make hoverboxes
-#'
-#' A hoverbox is a box of text that shows up when the cursor
-#' hovers over something.
-#' These can be useful when making interactive network plots
-#' of the HPO phenotypes because we can include a hoverbox that gives
-#' information and data associated with each phenotype.
-#'
-#' This function expects a dataframe of with a "hpo_name" column that has the
-#' name of each phenotype. It must then include columns
-#' for all of the parameters you wish to include in the hoverbox.
-#' @inheritParams make_phenos_dataframe
-#' @inheritParams make_network_object
-#' @inheritParams base::round
-#' @inheritParams stringr::str_wrap
-#' @returns A nicely formatted string with newlines etc,
-#' to be used as a hoverbox.
-#'
-#' @export
-#' @importFrom stats setNames
-#' @importFrom data.table :=
-#' @importFrom stringr str_wrap
-#' @examples
-#' phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay",
-#' add_hoverboxes = FALSE)
-#' phenos <- make_hoverboxes(phenos = phenos)
-make_hoverboxes <- function(phenos,
- columns = list_columns(),
- interactive = TRUE,
- width = 60,
- digits = 3,
- verbose = TRUE) {
- # templateR:::source_all()
- # devoptera::args2vars(make_hoverboxes)
-
- hover <- hpo_id <- hpo_name <- NULL;
-
- #### Select sep ####
- sep <- if(isTRUE(interactive)) "
" else "\n"
- columns <- columns[unname(columns) %in% names(phenos)]
- if(length(columns)==0){
- messager("No columns found. Making hoverboxes from hpo_id only.",v=verbose)
- phenos[hover:=paste("hpo_id:",hpo_id)]
- } else {
- messager("Making hoverboxes from:",
- paste(shQuote(columns),collapse = ", "),v=verbose)
- #### Iterate over phenotypes ####
- hoverBoxes <- lapply(stats::setNames(
- unique(phenos$hpo_name),
- unique(phenos$hpo_name)
- ), function(pheno_i){
- lapply(seq(length(columns)), function(i){
- val <- phenos[hpo_name == pheno_i, ][1,][[columns[[i]]]]
- val <- if(is.numeric(val)) round(val,digits = digits) else {
- paste(
- stringr::str_wrap(val, width = width),
- collapse = sep
- )
- }
- paste0("",names(columns)[[i]],"",
- ": ",val
- )
- }) |> paste(collapse = sep)
- })
- #### Assign to each row #####
- phenos$hover <- unlist(hoverBoxes[phenos$hpo_name])
- }
- return(phenos)
-}
diff --git a/R/make_igraph_object.R b/R/make_igraph_object.R
index 7a9832c..a5a04fc 100644
--- a/R/make_igraph_object.R
+++ b/R/make_igraph_object.R
@@ -1,3 +1,4 @@
+#' @describeIn make_ make_
#' Make an \link[igraph]{igraph} object
#'
#' This uses the network package to coerce the adjacency matrix into a
@@ -8,8 +9,6 @@
#' hpo_id.
#' @inheritParams make_phenos_dataframe
#' @inheritParams make_network_object
-#' @inheritParams ggnetwork::fortify.network
-#' @inheritDotParams ggnetwork::ggnetwork
#' @returns A \link[igraph]{igraph} object.
#'
#' @export
@@ -18,11 +17,7 @@
#' g <- make_igraph_object(phenos = phenos)
make_igraph_object <- function(phenos,
hpo = get_hpo(),
- adjacency = adjacency_matrix(
- terms = phenos$hpo_id,
- hpo = hpo),
colour_var = "fold_change",
- add_ont_lvl_absolute = FALSE,
cols = list_columns(
extra_cols = c(
colour_var,
@@ -31,27 +26,20 @@ make_igraph_object <- function(phenos,
value = TRUE)
)
),
- layout = "fruchtermanreingold",
- verbose = TRUE,
...
){
- requireNamespace("igraph")
- phenoNet <- make_network_object(phenos = phenos,
- hpo = hpo,
- adjacency = adjacency,
- colour_var = colour_var,
- add_ont_lvl_absolute = add_ont_lvl_absolute,
- cols = cols,
- verbose = verbose,
- ...)
- messager("Creating igraph object.",v=verbose)
- vertices <- unique(
- phenoNet[,!names(phenoNet) %in% c("x","y","vertex.names","xend","yend")]
- )
- rownames(vertices) <- vertices$hpo_id
- g <- igraph::graph_from_adjacency_matrix(adjacency)
- for(n in names(vertices)){
- igraph::vertex_attr(g,name = n) <- vertices[names(igraph::V(g)),n]
+ requireNamespace("tidygraph")
+ g <- KGExplorer::ontology_to(ont = hpo,
+ terms = phenos$hpo_id,
+ to = "tbl_graph")
+ pcols <- intersect(names(phenos), names(cols))
+ gcols <- KGExplorer:::get_graph_colnames(g)
+ cols <- setdiff(pcols, gcols)
+ if(length(cols)>0){
+ phenos <- phenos[,cols,with=FALSE]
+ g <- g |> tidygraph::activate("nodes") |>
+ tidygraph::left_join(by=c("name" = "hpo_id"),
+ phenos)
}
return(g)
}
diff --git a/R/make_network_object.R b/R/make_network_object.R
index 569c3e0..903b493 100644
--- a/R/make_network_object.R
+++ b/R/make_network_object.R
@@ -1,3 +1,4 @@
+#' @describeIn make_ make_
#' Make a \link[ggnetwork]{ggnetwork} object
#'
#' This uses the network package to coerce the adjacency matrix into a
@@ -6,15 +7,11 @@
#'
#' It expects there to be a column of HPO IDs in the phenos dataframe called
#' hpo_id.
-#' @param phenos dataframe of phenotypes and values / parameters.
-#' @param adjacency An adjacency matrix generated
-#' by \link[HPOExplorer]{adjacency_matrix}.
#' @param colour_var The column from phenos that you wish
#' to map to node colour.
#' @param cols Columns to add to metadata of \link[ggnetwork]{ggnetwork} object.
-#' @inheritParams make_phenos_dataframe
-#' @inheritParams ggnetwork::fortify.network
-#' @inheritDotParams ggnetwork::ggnetwork
+#' @inheritParams ggnetwork::fortify
+#' @inheritDotParams ggnetwork::fortify
#' @returns A \link[ggnetwork]{ggnetwork} object.
#'
#' @export
@@ -27,10 +24,6 @@
#' colour_var = "ontLvl_geneCount_ratio")
make_network_object <- function(phenos,
hpo = get_hpo(),
- adjacency =
- adjacency_matrix(
- terms = phenos$hpo_id,
- hpo = hpo),
colour_var = "fold_change",
add_ont_lvl_absolute = FALSE,
cols = list_columns(
@@ -41,42 +34,35 @@ make_network_object <- function(phenos,
value = TRUE)
)
),
- layout = "fruchtermanreingold",
- verbose = TRUE,
+ as=c("ggnetwork","tbl_graph"),
...) {
- # devoptera::args2vars(make_network_object, reassign = TRUE)
-
- messager("Making phenotype network object.",v=verbose)
- if("hpo_id" %in% names(phenos)){
- adjacency <- adjacency[unique(phenos$hpo_id),
- unique(phenos$hpo_id)]
- }
- if(!"ontLvl" %in% names(phenos) &&
- isTRUE(add_ont_lvl_absolute)){
- phenos <- add_ont_lvl(phenos = phenos,
- hpo = hpo,
- absolute = TRUE,
- verbose = verbose)
- }
- #### Create phenoNet obj ####
- messager("Creating ggnetwork object.",v=verbose)
- phenoNet <- ggnetwork::ggnetwork(x = adjacency,
- arrow.gap = 0,
- layout = layout,
- ...)
- cols <- cols[cols %in% names(phenos)]
- for(col in cols){
- #### Add metadata to phenoNet ####
- phenoNet <- make_node_data(phenoNet = phenoNet,
- phenos = phenos,
- phenos_column = col,
- verbose = verbose)
- }
- #### Add number of total edges for each node ####
- if("hpo_id" %in% names(phenos)){
- messager("Adding n_edges per node.",v=verbose)
- phenoNet$n_edges <- rowSums(adjacency)[phenoNet$vertex.names]
- }
- phenoNet <- unique(phenoNet)
- return(phenoNet)
+ as <- match.arg(as)
+ messager("Making phenotype network object.")
+ if(!"ontLvl" %in% names(phenos) &&
+ isTRUE(add_ont_lvl_absolute)){
+ phenos <- add_ont_lvl(phenos = phenos,
+ hpo = hpo,
+ absolute = TRUE)
+ }
+ #### Create phenoNet obj ####
+ g <- make_igraph_object(phenos,
+ hpo = hpo,
+ colour_var = colour_var,
+ cols = cols)
+ #### Return tbl_graph object if requested ####
+ if(as == "tbl_graph") return(g)
+ #### Proceed to convert to ggnetwork object ####
+ messager("Creating ggnetwork object.")
+ phenoNet <- ggnetwork::fortify(g,
+ ...)
+ cols <- cols[cols %in% names(phenos)]
+ #### No longer needed as make_igraph_object() adds these columns ####
+ # for(col in cols){
+ # #### Add metadata to phenoNet ####
+ # phenoNet <- make_node_data(phenoNet = phenoNet,
+ # phenos = phenos,
+ # phenos_column = col)
+ # }
+ phenoNet <- unique(phenoNet)
+ return(phenoNet)
}
diff --git a/R/make_node_data.R b/R/make_node_data.R
index 821002d..c6b7f2e 100644
--- a/R/make_node_data.R
+++ b/R/make_node_data.R
@@ -1,19 +1,18 @@
make_node_data <- function(phenoNet,
- phenos,
- phenos_column,
- new_column=phenos_column,
- verbose=TRUE) {
+ phenos,
+ phenos_column,
+ new_column=phenos_column) {
if(!phenos_column %in% names(phenos)){
messager(paste0("phenos_column=",shQuote(phenos_column)),
- "not found in phenos.",v=verbose)
+ "not found in phenos.")
return(phenoNet)
}
- messager("Adding",paste0("new_column=",shQuote(new_column)),v=verbose)
+ messager("Adding",paste0("new_column=",shQuote(new_column)))
# data.table::setkeyv(phenos,"hpo_id")
# phenoNet[[new_column]] <- phenos[phenoNet$vertex.names, ][[phenos_column]]
dat <- unique(phenos[,c("hpo_id",phenos_column), with=FALSE])
data_dict <- stats::setNames(dat[[phenos_column]], dat$hpo_id)
- phenoNet[[new_column]] <- data_dict[phenoNet$vertex.names]
+ phenoNet[[new_column]] <- data_dict[phenoNet$name]
return(phenoNet)
}
diff --git a/R/make_ontology.R b/R/make_ontology.R
deleted file mode 100644
index 5b77dc1..0000000
--- a/R/make_ontology.R
+++ /dev/null
@@ -1,58 +0,0 @@
-#' Make an ontologyIndex object
-#'
-#' Make a \link[ontologyIndex]{ontology_index}
-#' object from the Open Biomedical Ontologies (OBO) file.
-#' By default, this function uses the Human Phenotype Ontology (HPO)
-#' distributed via the official
-#' \href{https://github.com/obophenotype/human-phenotype-ontology/releases}{
-#' HPO GitHub Releases}.
-#' @param fix_ascii Fix non-ASCII characters in metadata.
-#' @inheritParams get_data
-#' @inheritParams piggyback::pb_download
-#' @inheritParams ontologyIndex::get_OBO
-#' @inheritDotParams ontologyIndex::get_OBO
-#' @returns \link[ontologyIndex]{ontology_index} object
-#'
-#' @keywords internal
-#' @importFrom ontologyIndex get_OBO
-make_ontology <- function(file = "hp-base.obo",
- repo = "obophenotype/human-phenotype-ontology",
- tag = "latest",
- save_dir = tools::R_user_dir(
- package = "HPOExplorer",
- which = "cache"
- ),
- extract_tags = "everything",
- fix_ascii = TRUE,
- overwrite = TRUE,
- upload_tag = NULL,
- ...){
-
- #### Download data ####
- f <- get_data(file = file,
- repo = repo,
- tag = tag,
- save_dir = save_dir,
- overwrite = overwrite)
- #### Import data ####
- hpo <- ontologyIndex::get_OBO(file = f,
- extract_tags = "everything",
- ...)
- #### Fix non-ASCII characters in metadata ####
- if(fix_ascii){
- func <- function(v){
- Encoding(v) <- "latin1"
- iconv(v, "latin1", "UTF-8")
- }
- attributes(hpo)$version <- func(attributes(hpo)$version)
- }
- if(!is.null(upload_tag)){
- save_path <- file.path(save_dir,gsub("\\.obo",".rds",file))
- saveRDS(hpo,save_path)
- piggyback::pb_upload(file = save_path,
- repo = "neurogenomics/HPOExplorer",
- overwrite = TRUE,
- tag = upload_tag)
- }
- return(hpo)
-}
diff --git a/R/make_phenos_dataframe.R b/R/make_phenos_dataframe.R
index 6513d50..d9caf6a 100644
--- a/R/make_phenos_dataframe.R
+++ b/R/make_phenos_dataframe.R
@@ -1,58 +1,22 @@
+#' @describeIn make_ make_
#' Make phenotypes dataframe
#'
#' Make a dataframe from a subset of the Human Phenotype Ontology.
-#' @param hpo Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.
-#' @param phenotype_to_genes Output of
-#' \link[HPOExplorer]{load_phenotype_to_genes} mapping phenotypes
-#' to gene annotations.
-#' @param ancestor The ancestor to get all descendants of. If \code{NULL},
-#' returns the entirely ontology.
-#' @param add_ont_lvl_absolute Add the absolute ontology level of each HPO term.
-#' See \link[HPOExplorer]{get_ont_lvls} for more details.
-#' @param add_ont_lvl_relative Add the relative ontology level of each HPO term.
-#' See \link[HPOExplorer]{get_ont_lvls} for more details.
-#' @param add_description Whether to get the phenotype descriptions as
-#' well (slower).
-#' @param add_info_contents Add information content column for each phenotype.
-#' @param add_disease_data Add all disease metadata columns.
-#' This will expand the data using \code{allow.cartesian=TRUE}.
-#' @param add_ndiseases Add the number of diseases per phenotype.
-#' @param add_pheno_frequencies Add the frequency of each phenotype in
-#' each disease.
-#' @param add_tiers Add severity Tiers column using
-#' \link[HPOExplorer]{add_tier}.
-#' @param add_severities Add severity column using
-#' \link[HPOExplorer]{add_severity}.
-#' @param add_hoverboxes Add hoverdata with
-#' \link[HPOExplorer]{make_hoverboxes}.
-#' @param add_onsets Add age of onset columns using
-#' \link[HPOExplorer]{add_onset}.
-#' @param add_deaths Add age of death columns using
-#' \link[HPOExplorer]{add_death}.
-#' @param columns A named vector of columns in \code{phenos}
-#' to add to the hoverdata via \link[HPOExplorer]{make_hoverboxes}.
-#' @param add_disease_definitions Add disease definitions.
-#' @param include_mondo Add \href{https://mondo.monarchinitiative.org/}{MONDO}
-#' IDs, names, and definitions to each disease.
-#' @param verbose Print messages.
#' @inheritParams ggnetwork_plot
#' @inheritParams make_network_object
#' @returns The HPO in dataframe format.
#'
#' @export
-#' @importFrom ontologyIndex get_descendants
-#' @importFrom data.table setnames := .N
#' @examples
#' phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay")
make_phenos_dataframe <- function(ancestor = NULL,
hpo = get_hpo(),
phenotype_to_genes =
load_phenotype_to_genes(),
- adjacency = NULL,
##### Phenotype metadata ####
add_ont_lvl_absolute = TRUE,
add_ont_lvl_relative = FALSE,
- add_info_contents = FALSE,
+ add_info_content = FALSE,
add_description = TRUE,
#### Disease/symptom metadata ####
add_disease_data = FALSE,
@@ -70,34 +34,30 @@ make_phenos_dataframe <- function(ancestor = NULL,
interactive = TRUE,
verbose = TRUE
){
- # devoptera::args2vars(make_phenos_dataframe)
- # ancestor = "Neurodevelopmental delay"
-
hpo_id <- . <- NULL;
if(!is.null(ancestor)){
- IDx <- get_hpo_id_direct(phenotype = ancestor,
+ IDx <- get_hpo_id_direct(term = ancestor,
hpo = hpo)
- IDx_all <- ontologyIndex::get_descendants(ontology = hpo,
- roots = IDx,
- exclude_roots = FALSE)
+ IDx_all <- simona::dag_offspring(hpo,
+ term=IDx,
+ include_self = TRUE)
descendants <- phenotype_to_genes[hpo_id %in% IDx_all,]
} else {
descendants <- phenotype_to_genes
}
messager("Extracting data for",
formatC(length(unique(descendants$hpo_name)),big.mark = ","),
- "descendents.",v=verbose)
- messager("Computing gene counts.",v=verbose)
+ "descendents.")
+ messager("Computing gene counts.")
phenos <- descendants[,.(geneCount=.N), by=c("hpo_id","hpo_name")]
#### Annotate phenotypes ####
phenos <- annotate_phenos(phenos = phenos,
hpo = hpo,
- adjacency = adjacency,
##### Phenotype metadata ####
add_ont_lvl_absolute = add_ont_lvl_absolute,
add_ont_lvl_relative = add_ont_lvl_relative,
- add_info_contents = add_info_contents,
+ add_info_content = add_info_content,
add_description = add_description,
#### Disease/symptom metadata ####
add_disease_data = add_disease_data,
@@ -112,7 +72,6 @@ make_phenos_dataframe <- function(ancestor = NULL,
#### Extra #####
add_hoverboxes = add_hoverboxes,
columns = columns,
- interactive = interactive,
- verbose = verbose)
+ interactive = interactive)
return(phenos)
}
diff --git a/R/assign_tiers.R b/R/make_tiers.R
similarity index 77%
rename from R/assign_tiers.R
rename to R/make_tiers.R
index 286d7c5..7df6ed5 100644
--- a/R/assign_tiers.R
+++ b/R/make_tiers.R
@@ -1,4 +1,5 @@
-#' Add severity Tiers (auto)
+#' @describeIn make_ make_
+#' Make severity Tiers (auto)
#'
#' Automatically add severity Tier for each HPO ID, in accordance with the
#' rating system provided by
@@ -32,21 +33,15 @@
#' @param search_ancestors Inherit Tiers of ancestors.
#' @param search_descendants Inherit Tiers of descendants.
#' @param as_datatable Return the results as a \link[data.table]{data.table}.
-#' @inheritParams make_phenos_dataframe
-#' @inheritParams make_network_object
#' @returns Tier assignments for each term in \code{terms}.
#' Will be returned as either a named vector or a \link[data.table]{data.table}.
#'
#' @export
-#' @importFrom data.table data.table
-#' @importFrom utils data
-#' @importFrom stats setNames
-#' @importFrom ontologyIndex get_ancestors get_descendants
#' @examples
-#' terms <- get_hpo()$id[seq(10)]
-#' tiers <- assign_tiers(terms = terms)
-assign_tiers <- function(hpo=get_hpo(),
- terms=hpo$id,
+#' terms <- get_hpo()@terms[seq(100)]
+#' tiers <- make_tiers(terms = terms)
+make_tiers <- function(hpo=get_hpo(),
+ terms=hpo@terms,
keyword_sets = list(
Tier1=c("intellectual disability","death"),
Tier2=c("impaired mobility","malform"),
@@ -62,10 +57,7 @@ assign_tiers <- function(hpo=get_hpo(),
search_descendants=TRUE,
as_datatable=FALSE,
verbose=TRUE){
-
- # devoptera::args2vars(assign_tiers)
-
- disease_name <- definition <- NULL;
+ disease_name <- NULL;
#### Gather sets of IDs in the HPO that would qualify ####
annot <- load_phenotype_to_genes("phenotype.hpoa")
hpo_sets <- lapply(stats::setNames(names(keyword_sets),
@@ -76,28 +68,28 @@ assign_tiers <- function(hpo=get_hpo(),
#### Matched in the name #####
if(isTRUE(check_names)){
ids1 <- grep(paste(k,collapse = "|"),
- hpo$name,value = TRUE, ignore.case = TRUE)
+ hpo@elementMetadata$name,value = TRUE, ignore.case = TRUE)
ids <- c(ids,ids1)
}
#### Matched in the definition ####
if(isTRUE(check_definitions)){
hp_sub <- grep(paste(k,collapse = "|"),
- hpo$def,
+ hpo@elementMetadata$definition,
ignore.case = TRUE,value = TRUE)
- ids2 <- hpo$name[names(hp_sub)]
+ ids2 <- hpo@elementMetadata$name[names(hp_sub)]
ids <- c(ids,ids2)
}
#### Matches in the disease association ####
if(isTRUE(check_diseases)){
annot_sub <- annot[grepl(paste(k,collapse = "|"), disease_name,
ignore.case = TRUE),]
- ids3 <- hpo$name[names(hpo$name) %in% annot_sub$hpo_id]
+ ids3 <- hpo@terms[hpo@terms %in% annot_sub$hpo_id]
ids <- c(ids,ids3)
}
ids <- ids[!duplicated(names(ids))]
messager(paste0("keyword_set=",nm,":"),
"Found",formatC(length(unique(ids)),big.mark = ","),
- "terms matching category.",v=verbose)
+ "terms matching category.")
return(ids)
})
#### Remove phenotypes belonging to multiple tiers ####
@@ -108,8 +100,7 @@ assign_tiers <- function(hpo=get_hpo(),
terms <- unique(terms)
messager("Auto-assigning severity Tiers to",
formatC(length(terms),big.mark = ","),
- "terms.",
- v=verbose)
+ "terms.")
tiers <- lapply(stats::setNames(terms,
terms),
function(term){
@@ -118,17 +109,20 @@ assign_tiers <- function(hpo=get_hpo(),
return("direct")
} else if(isTRUE(search_descendants) &&
any(names(h) %in%
- ontologyIndex::get_descendants(
- ontology = hpo,
- roots = term))){
+ simona::dag_offspring(
+ dag = hpo,
+ term = term)
+ )
+ ){
return("descendant")
} else if(isTRUE(search_ancestors) &&
any(names(h) %in%
- ontologyIndex::get_ancestors(
- ontology = hpo,
- terms = term))){
+ simona::dag_ancestors(
+ dag = hpo,
+ term = term)
+ )
+ ){
return("ancestor")
-
} else {
NULL
}
@@ -136,11 +130,11 @@ assign_tiers <- function(hpo=get_hpo(),
}) |> unlist()
#### Return ####
if(isTRUE(as_datatable)){
- dt <- data.table::data.table(hpo_id=names(tiers),
- tier_auto=unname(tiers),
- key="hpo_id")
- dups <- dt$hpo_id[duplicated(dt$hpo_id)]
- return(dt)
+ dat <- data.table::data.table(hpo_id=names(tiers),
+ tier_auto=unname(tiers),
+ key="hpo_id")
+ dups <- dat$hpo_id[duplicated(dat$hpo_id)]
+ return(dat)
} else {
return(tiers)
}
diff --git a/R/map_models.R b/R/map_models.R
deleted file mode 100644
index 9690575..0000000
--- a/R/map_models.R
+++ /dev/null
@@ -1,26 +0,0 @@
-map_models <- function(){
-
- #### Get disease models ####
- ## Link by disease (MONDO_ID)
- models <- get_monarch("disease_to_model")
- top_targets <- MultiEWCE::example_targets$top_targets
- top_targets_map <- data.table::merge.data.table(
- top_targets,
- map,
- by.x = "hpo_id",
- by.y = "id1",
- all = FALSE
- )
- top_targets_fmap <- data.table::merge.data.table(top_targets,
- map2[,-c("hpo_name")],
- by="hpo_id")
- messager(round(length(unique(top_targets_map$hpo_id)) /
- length(unique(top_targets$hpo_id))*100,1),
- "% of hpo_ids mapped across non-human ontologies.")
- messager(round(length(unique(top_targets_fmap$hpo_id)) /
- length(unique(top_targets$hpo_id))*100,1),
- "% of hpo_ids mapped across non-human ontologies.")
-
-
- sum(top_targets$hpo_id %in% map2$hpo_id)
-}
diff --git a/R/map_phenotypes.R b/R/map_phenotypes.R
new file mode 100644
index 0000000..ff64ba1
--- /dev/null
+++ b/R/map_phenotypes.R
@@ -0,0 +1,26 @@
+#' @describeIn map_ map_
+#' Harmonise phenotypes
+#'
+#' Harmonise a mixed vector of phenotype names (e.g. "Focal motor seizure")
+#' and HPO IDs (e.g. c("HP:0000002","HP:0000003")).
+#' @returns Character vector
+#'
+#' @export
+#' @import KGExplorer
+#' @examples
+#' terms <- c("Focal motor seizure","HP:0000002","HP:0000003")
+#' #### As phenotype names ####
+#' term_names <- map_phenotypes(terms=terms)
+#' #### As HPO IDs ####
+#' term_ids <- map_phenotypes(terms=terms, to="id")
+map_phenotypes <- function(terms,
+ hpo = get_hpo(),
+ to=c("name","id"),
+ keep_order = TRUE,
+ invert = FALSE){
+ KGExplorer::map_ontology_terms(terms = terms,
+ ont = hpo,
+ to = to,
+ keep_order = keep_order,
+ invert = invert)
+}
diff --git a/R/map_upheno.R b/R/map_upheno.R
deleted file mode 100644
index e5658de..0000000
--- a/R/map_upheno.R
+++ /dev/null
@@ -1,69 +0,0 @@
-#' Map phenotypes across uPheno
-#'
-#' Map phenotypes across species within the Unified Phenotype Ontology (uPheno).
-#' First, gathers phenotype-phenotype mappings across ontologies.
-#' Next, gathers all phenotype-gene associations for each ontology,
-#' converts all genes to human HGNC orthologs, and computes the number of
-#' overlapping orthologs between all pairs mapped phenotypes.
-#' Finally, plots the results as the proportion of intersecting genes between
-#' all pairs of phenotypes.
-#' @source https://data.monarchinitiative.org/upheno2/current/qc/index
-#' @source https://data.monarchinitiative.org/upheno2/current/upheno-release/all/index.html
-#'
-#' @param save_dir Directory to save cached data.
-#' @param pheno_map_method Method to use for mapping phenotypes across ontologies.
-#' \itemize{
-#' \item{"upheno"}{Use uPheno's phenotype-to-phenotype mappings.
-#' Contains fewer ontologies but with greater coverage of phenotypes.}
-#' \item{"monarch"}{Use Monarch's phenotype-to-phenotype mappings.
-#' Contains more ontologies but with less coverage of phenotypes.
-#' }
-#' }
-#' @param gene_map_method Method to use for mapping genes across species.
-#' \itemize{
-#' \item{"monarch"}{Use Monarch's gene-to-gene mappings.}
-#' \item{"orthogene"}{Use \link[orthogene]{convert_orthologs} to generate
-#' gene-to-gene mappings.}
-#' }
-#' @param force_new Force new data to be downloaded and processed.
-#' @param subset_db1 Subset of ontologies to include in the plot.
-#' @param save_dir Directory to save cached data.
-#' @param show_plot Show the plot.
-#' @param fill_scores Fill missing scores in the "equivalence_score" and
-#' "subclass_score" columns with this value. These columns represent the
-#' quality of mapping between two phenotypes on a scale from 0-1.
-#' @param terms A subset of HPO IDs to include in the final dataset and plots
-#' (e.g. c("HP:0001508","HP:0001507")).
-#' @returns A list containing the data and plot.
-#'
-#' @export
-#' @import data.table
-#' @importFrom tools R_user_dir
-#' @examples
-#' res <- map_upheno()
-map_upheno <- function(pheno_map_method=c("upheno","monarch"),
- gene_map_method=c("monarch","orthogene"),
- subset_db1=c("HP"),
- terms=NULL,
- fill_scores=NULL,
- show_plot=TRUE,
- force_new=FALSE,
- save_dir=tools::R_user_dir(package = "HPOExplorer",
- which = "cache")){
- # devoptera::args2vars(map_upheno)
-
- #### Get data ####
- pheno_map_genes_match <- map_upheno_data(pheno_map_method=pheno_map_method,
- gene_map_method=gene_map_method,
- fill_scores=fill_scores,
- force_new = force_new,
- terms=terms)
- #### Plots ####
- plots <- map_upheno_plot(pheno_map_genes_match,
- subset_db1=subset_db1)
- #### Return ####
- return(
- list(data=pheno_map_genes_match,
- plots=plots)
- )
-}
diff --git a/R/map_upheno_data.R b/R/map_upheno_data.R
deleted file mode 100644
index a666e86..0000000
--- a/R/map_upheno_data.R
+++ /dev/null
@@ -1,61 +0,0 @@
-#' Map uPheno data
-#'
-#' Get uPheno cross-species mapping data by:
-#' \itemize{
-#' \item{Downloading cross-species phenotype-phenotype mappings.}
-#' \item{Downloading within-spceies phenotype-gene mappings,
-#' and converting these genes to human orthologs.}
-#' \item{Merging the phenotype-phenotype and phenotype-gene mappings.}
-#' \item{Filtering out any mappings that do not have a human ortholog within
-#' each respective phenotype.}
-#' \item{Calculating the proportion of orthologous genes overlapping across
-#' the species-specific phenotype-gene mappings.}
-#' \item{Iterating the above steps using multiple methods
-#' (\code{pheno_map_method}) and concatenating the results together.}
-#' }
-#' @param keep_nogenes Logical indicating whether to keep mappings that do not
-#' have any orthologous genes.
-#' @inheritParams map_upheno
-#' @returns A data.table containing the mapped data.
-#'
-#' @export
-#' @examples
-#' pheno_map_genes_match <- map_upheno_data()
-map_upheno_data <- function(pheno_map_method=c("upheno","monarch"),
- gene_map_method=c("monarch","orthogene"),
- keep_nogenes=FALSE,
- fill_scores=NULL,
- terms=NULL,
- save_dir=tools::R_user_dir(package = "HPOExplorer",
- which = "cache"),
- force_new=FALSE){
- # devoptera::args2vars(map_upheno_data)
-
- #### Check for cached data ####
- path <- file.path(save_dir,"pheno_map_genes_match.rds")
- if(file.exists(path) &&
- isFALSE(force_new)){
- ## Read from cache
- messager("Importing cached data:",path)
- pheno_map_genes_match <- readRDS(path)
- } else {
- ## Create
- pheno_map_genes_match <- lapply(
- stats::setNames(pheno_map_method,
- pheno_map_method),
- function(m){
- map_upheno_data_i(pheno_map_method=m,
- gene_map_method=gene_map_method,
- keep_nogenes=keep_nogenes,
- fill_scores=fill_scores,
- terms=terms)
- }) |> data.table::rbindlist(fill=TRUE, idcol = "pheno_map_method")
- ## Save
- messager("Caching processed file -->",path)
- attr(pheno_map_genes_match,"version") <- format(Sys.Date(), "%Y-%m-%d")
- dir.create(save_dir, showWarnings = FALSE, recursive = TRUE)
- saveRDS(pheno_map_genes_match,path)
- }
- ## Return
- return(pheno_map_genes_match)
-}
diff --git a/R/map_upheno_data_i.R b/R/map_upheno_data_i.R
deleted file mode 100644
index 575a54b..0000000
--- a/R/map_upheno_data_i.R
+++ /dev/null
@@ -1,198 +0,0 @@
-map_upheno_data_i <- function(pheno_map_method,
- gene_map_method,
- keep_nogenes,
- fill_scores,
- terms){
- hgnc_id <- gene_label1 <- subject <- subject_taxon_label <-
- hgnc_label <- n_phenotypes <- n_genes_intersect <-
- prop_intersect <- p1 <- p2 <- db1 <- db2 <- id1 <- id2 <-
- equivalence_score <- subclass_score <- hgnc_label1 <-
- hgnc_label2 <- hgnc_id2 <- gene_id <- gene_label1 <- gene_label2 <-
- n_genes_db1 <- object <-
- n_genes_db2 <- subject_taxon_label1 <- subject_taxon_label2 <-
- phenotype_genotype_score <- equivalence_score <- NULL;
-
- pheno_map_method <- pheno_map_method[1]
- gene_map_method <- gene_map_method[1]
- messager(paste0("map_upheno_data: pheno_map_method=",
- shQuote(pheno_map_method)))
- #### Import data ####
- ## Cross-species phenotype map
- {
- if(pheno_map_method=="upheno"){
- pheno_map <- get_upheno("bestmatches")
- pheno_map[,db1:=gsub("*:.*","",basename(id1))]
- } else if(pheno_map_method=="monarch"){
- pheno_map <- get_monarch("phenotype_to_phenotype") |>
- data.table::setnames(c("label_x","label_y"),c("label1","label2"))
- pheno_map[,id1:=gsub("_",":",basename(p1))
- ][,id2:=gsub("_",":",basename(p2))]
- pheno_map[,db1:=gsub("*_.*","",basename(p1))
- ][,db2:=gsub("*_.*","",basename(p2))]
- pheno_map[,equivalence_score:=NA][,subclass_score:=NA]
- }
- #### Filter data ####
- if(!is.null(terms)){
- pheno_map <- pheno_map[id1 %in% terms,]
- if(nrow(pheno_map)==0){
- stop("No terms found in pheno_map")
- }
- }
- }
-
- ## Gene-phenotype associations across 8 species
- {
- genes <- get_monarch("phenotype_to_gene")
- data.table::setkeyv(genes,"object")
- messager("Unique species with genes:",
- data.table::uniqueN(genes$subject_taxon_label))
- # genes[,db:=gsub("*:.*","",object)]
- ## Create an ID-label map for each phenotype
- genes_map <- genes[,c("object","object_label",
- "subject_taxon","subject_taxon_label")] |> unique()
- genes_map <- genes_map[,.SD[1], by="object"
- ][,db:=gsub("*:.*","",object)]
- ## Create an db-species map for each Ontology
- species_map <- genes_map[,.SD[1], keyby="db"][,.(db,subject_taxon_label)]
- }
-
- ## Gene-gene homology across 12 species
- if(gene_map_method=="monarch"){
- {
- homol <- get_monarch("gene_to_gene")
- messager("Unique species with ortholog:",
- data.table::uniqueN(homol$subject_taxon_label))
- ## Subset to only convert human --> non-human
- homol <- homol[grep("^HGNC",subject),] |>
- data.table::setnames(
- c("subject","subject_label","object","object_label"),
- c("hgnc_id","hgnc_label","gene_id","gene_label"))
- ## Add human-to-human back into map
- hhomol <- (homol[,c("hgnc_id","hgnc_label",
- "subject_taxon","subject_taxon_label")] |> unique()
- )[,gene_id:=hgnc_id][,gene_label:=hgnc_label]
- homol <- rbind(hhomol,homol,fill=TRUE)
- message("Unique orthologs: ",data.table::uniqueN(homol$gene_id))
- }
- ## Orthogene
- } else if(gene_map_method=="orthogene"){
- requireNamespace("orthogene")
- homol <- lapply(stats::setNames(unique(genes$subject_taxon_label),
- unique(genes$subject_taxon_label)),
- function(x){
-
- input_species <- if(x=="Xenopus laevis"){
- "Xenopus"
- } else if(x=="Caenorhabditis elegans"){
- "celegans"
- } else {
- x
- }
- orthogene::convert_orthologs(
- gene_df = genes[subject_taxon_label==x,]$subject_label|>
- unique(),
- # gene_input = "subject_label",
- gene_output = "columns",
- input_species = input_species,
- non121_strategy = "kbs",
- output_species = "human")
- }) |>
- data.table::rbindlist(fill=TRUE, idcol = "subject_taxon_label") |>
- data.table::setkeyv(c("subject_taxon_label","input_gene"))
- data.table::setnames(homol,"input_gene","subject_label")
- message("Unique orthologs: ",
- data.table::uniqueN(
- homol[subject_taxon_label!="Homo sapiens"]$subject_label))
- }
- #### Map non-human genes onto human orthologs ####
- {
- genes_homol <- data.table::merge.data.table(
- genes,
- homol[,c("hgnc_id","hgnc_label","gene_id","gene_label")],
- by.x="subject",
- by.y="gene_id")
- messager(formatC(nrow(genes_homol),big.mark = ","),"/",
- formatC(nrow(genes),big.mark = ","),
- "rows remain after cross-species gene mapping.")
- messager(data.table::uniqueN(genes_homol$subject_taxon_label),"/",
- data.table::uniqueN(genes$subject_taxon_label),
- "species remain after cross-species gene mapping.")
- }
-
- #### Map non-human phenotypes onto human phenotypes ####
- #### Merge nonhuman ontology genes with human HPO genes ####
- {
- pheno_map_genes <- data.table::merge.data.table(
- pheno_map,
- genes_homol,
- by.x="id1",
- by.y="object",
- all.x = keep_nogenes,
- allow.cartesian = TRUE) |>
- #### Merge nonhuman ontology genes with human HPO genes ####
- data.table::merge.data.table(
- genes_homol,
- by.x="id2",
- by.y="object",
- all.y = keep_nogenes,
- suffixes = c(1,2),
- allow.cartesian = TRUE
- )
- ## Fill in missing species for those without gene data
- pheno_map_genes[
- is.na(subject_taxon_label1),
- subject_taxon_label1:=species_map[db1]$subject_taxon_label]
- pheno_map_genes[
- is.na(subject_taxon_label2),
- subject_taxon_label2:=species_map[db2]$subject_taxon_label]
- ## Add gene counts
- pheno_map_genes[,n_genes_db1:=data.table::uniqueN(gene_label1), by="id1"]
- pheno_map_genes[,n_genes_db2:=data.table::uniqueN(gene_label2), by="id2"]
- ## Report
- messager(data.table::uniqueN(pheno_map_genes$subject_taxon_label2),"/",
- data.table::uniqueN(genes_homol$subject_taxon_label),
- "species remain after cross-species phenotype mapping.")
- ## Remove
- # remove(genes_human,genes_nonhuman,pheno_map)
- }
-
- #### Count the number of overlapping genes
- {
- if(isFALSE(keep_nogenes)){
- pheno_map_genes_match <- pheno_map_genes[hgnc_label1==hgnc_label2,]
- } else {
- pheno_map_genes_match <- pheno_map_genes |> data.table::copy()
- }
- pheno_map_genes_match <-
- pheno_map_genes_match[,
- list(n_genes_intersect=data.table::uniqueN(hgnc_id2)),
- by=c("id1","db1","label1","n_genes_db1",
- "id2","db2","label2","n_genes_db2",
- "subject_taxon1","subject_taxon_label1",
- "subject_taxon2","subject_taxon_label2",
- "equivalence_score","subclass_score")
- ] |>
- data.table::setorderv("n_genes_intersect",-1)
- pheno_map_genes_match[,n_phenotypes:=data.table::uniqueN(id1),
- by=c("db1","db2",
- "subject_taxon1","subject_taxon2",
- "subject_taxon_label1","subject_taxon_label2"
- )]
- pheno_map_genes_match[,prop_intersect:=(n_genes_intersect/n_genes_db1)]
- ## Compute a score that captures both the phenotype mapping score and
- ## the poportional gene overlap score.
- pheno_map_genes_match[,phenotype_genotype_score:=data.table::nafill(
- (equivalence_score*prop_intersect)^(1/2),fill = 0)]
- # remove(pheno_map_genes)
- }
- ## Fill missing data
- if(!is.null(fill_scores)){
- data.table::setnafill(x = pheno_map_genes_match,
- fill = fill_scores,
- cols=c("equivalence_score","subclass_score"))
- }
- ## Check that the number of intersecting nonhuman ontology genes is always
- ## less than or equal to the number of total HPO genes.
- # pheno_map_genes_match[n_genes>n_genes_hpo,]
- return(pheno_map_genes_match)
-}
diff --git a/R/map_upheno_heatmap.R b/R/map_upheno_heatmap.R
deleted file mode 100644
index 9ad5911..0000000
--- a/R/map_upheno_heatmap.R
+++ /dev/null
@@ -1,97 +0,0 @@
-map_upheno_heatmap <- function(plot_dat,
- hpo_ids=NULL,
- value.var=c("phenotype_genotype_score",
- "prop_intersect",
- "equivalence_score",
- "subclass_score"),
- min_rowsums=NULL,
- cluster_from_ontology=FALSE,
- save_dir=tempdir(),
- height = 15,
- width = 10){
- requireNamespace("scKirby")
- requireNamespace("ComplexHeatmap")
- id1 <- hpo_id <- NULL;
- set.seed(2023)
- value.var <- match.arg(value.var)
- # hpo_ids <- MultiEWCE::example_targets$top_targets$hpo_id
-
- ### Subset phenotypes
- if(!is.null(hpo_ids)) plot_dat <- plot_dat[id1 %in% unique(hpo_ids)]
- plot_dat[,hpo_id:=id1][,label1:=gsub(" (HPO)","",label1,fixed = TRUE)]
- plot_dat <- add_ancestor(plot_dat)
- data.table::setkeyv(plot_dat,"label1")
-
- ### Plot
- X <- data.table::dcast.data.table(plot_dat,
- formula = label1 ~ subject_taxon_label2,
- fill = 0,
- value.var = value.var,
- fun.aggregate = mean,
- na.rm=TRUE) |>
- scKirby::to_sparse()
- #### Filter by min_rowsums ####
- if(!is.null(min_rowsums)){
- X <- X[Matrix::rowSums(X, na.rm = TRUE)>=min_rowsums,]
- }
- if(nrow(X)==0) stop("No rows remain after filtering by `min_rowsums`.")
-
- #### Get clusters from ontology ####
- if(isTRUE(cluster_from_ontology)){
- ids <- harmonise_phenotypes(plot_dat$id1,keep_order = FALSE)
- ids <- ids[ids %in% rownames(X)]
- ## best to do this on the entire HPO, then subset
- cluster_rows <- convert_ontology(to="igraph_dist_hclust",
- terms = names(ids))
- # leaves <- dendextend::get_leaves_attr(cluster_rows,"label")
- # c2 <- dendextend::prune(cluster_rows,
- # leaves[!leaves %in% names(ids)],
- # keep_branches = FALSE)
- ## subset to only those in the heatmap
- X <- X[ids,]
- } else {
- cluster_rows <- TRUE
- }
- #### Add annotation ####
- annot_dat <- plot_dat[rownames(X),
- c("label1","n_genes_db1",
- "ancestor_name")] |> unique()
- col_fun <- colorRamp2::colorRamp2(
- seq(min(annot_dat$n_genes_db1),
- max(annot_dat$n_genes_db1),
- length.out=4),
- pals::gnuplot(4))
- la <- ComplexHeatmap::rowAnnotation(
- df=data.frame(annot_dat[,-c("label1")],
- row.names = annot_dat$label1),
- # ?ComplexHeatmap::Legend
- show_legend = c(TRUE, FALSE),
- annotation_legend_param = list(
- n_genes_db1= list(title = "HPO genes",
- col_fun = col_fun))
- )
- row_split <- if(isFALSE(cluster_from_ontology)) annot_dat$ancestor_name
- #### make heatmap ####
- ch <- ComplexHeatmap::Heatmap(matrix = as.matrix(X),
- name = value.var,
- cluster_rows = cluster_rows,
- row_title_rot = 0,
- row_names_gp = grid::gpar(fontsize = 7),
- row_title_gp = grid::gpar(fontsize = 9),
- row_split = row_split,
- col = pals::viridis(n = 20),
- cluster_columns = FALSE,
- left_annotation = la)
- # methods::show(ch)
- #### Save ####
- if(!is.null(save_dir)){
- grDevices::pdf(file = file.path(save_dir,
- paste0("map_upheno_heatmap-",
- value.var,".pdf")),
- height = height,
- width = width)
- methods::show(ch)
- grDevices::dev.off()
- }
- return(ch)
-}
diff --git a/R/map_upheno_nn.R b/R/map_upheno_nn.R
deleted file mode 100644
index d629cb4..0000000
--- a/R/map_upheno_nn.R
+++ /dev/null
@@ -1,62 +0,0 @@
-map_upheno_nn <- function(){
-
- #### Find nearest neighbors for each HPO term in each non-human ontology ####
- Xnonhuman <- data.table::dcast.data.table(
- genes_nonhuman,
- formula = ortholog_hgnc_label ~ alt_id,
- fun.aggregate = length,
- ) |> scKirby::to_sparse()
- Xhuman <- data.table::dcast.data.table(
- genes_human,
- formula = hgnc_label ~ hpo_id,
- fun.aggregate = length,
- ) |> scKirby::to_sparse()
- X <- Seurat::RowMergeSparseMatrices(mat1 = Xhuman, Xnonhuman)
- obj <- scKirby::process_seurat(
- list(data=list(genes=X),
- obs=data.frame(genes_map,
- row.names = genes_map$object)
- ),
- nfeatures = 4600
- )
-
- dbs <- stringr::str_split(colnames(Xnonhuman),"[:]",simplify = TRUE)[,1] |>
- unique()
- BPPARAM <- BiocParallel::MulticoreParam(10,progressbar = TRUE)
- hpo_ids <- grep("^HP:",rownames(obj@graphs$RNA_snn), value = TRUE)
- nei_mat <- BiocParallel::bplapply(
- BPPARAM = BPPARAM,
- X = stats::setNames(hpo_ids,hpo_ids),
- FUN = function(hpo_id){
- lapply(dbs,
- function(db){
- obj@graphs$RNA_snn[
- hpo_id,
- grepl(db,colnames(obj@graphs$RNA_snn))] |>
- sort() |>
- tail(10)
- })
- })
- dt <- lapply(nei_mat,
- function(x) {
- lapply(x,
- function(y){
- data.table::as.data.table(data.frame(val=y),
- keep.rownames = "alt_id")
- }) |>
- data.table::rbindlist()
- }) |>
- data.table::rbindlist(idcol = "hpo_id")
-
- dt_map <- genes_map[dt[val>.5,], on=c("object"="alt_id"), allow.cartesian = TRUE]
- dt_map <- genes_map[dt_map, on=c("object"="hpo_id"), allow.cartesian = TRUE]
-
- # View(dt_map[val>.95])
- # o = order(obj@graphs$RNA_snn, decreasing=TRUE)[1:10]
- # mm <- MatrixModels::glm4()
-
- # res <- phenomix::iterate_lm(xmat = Xhuman, ymat = Xnonhuman, workers = 10)
- # orthogene:::sparsity(Xg)
- # Xc <- fastcluster::hclust(dist(Xg))
-
-}
diff --git a/R/map_upheno_plots.R b/R/map_upheno_plots.R
deleted file mode 100644
index f5fbdba..0000000
--- a/R/map_upheno_plots.R
+++ /dev/null
@@ -1,52 +0,0 @@
-#' Map uPheno plot
-#'
-#' Generate multiple kinds of plots summarising mappings of phenotypes and genes
-#' between the HPO and various non-human ontologies within uPheno.
-#' @param types A character vector of plot types to generate.
-#' @param pheno_map_genes_match uPheno cross-species mapping data generated by
-#' \link[HPOExplorer]{map_upheno_data}.
-#' @param subset_db1 A character vector of reference ontologies to subset
-#' \code{pheno_map_genes_match} by.
-#' @returns A named list of \pkg{ggplot2} objects.
-#'
-#' @export
-#' @examples
-#' pheno_map_genes_match <- map_upheno_data()
-#' upheno_plots <- map_upheno_plot(
-#' pheno_map_genes_match = pheno_map_genes_match)
-map_upheno_plot <- function(pheno_map_genes_match=NULL,
- subset_db1="HP",
- types=c("rainplot","scatterplot","heatmap")){
- ## Prepare plot data
- n_phenotypes <- db1 <- id1 <-NULL;
- {
- #### Subset data ####
- if(!is.null(subset_db1)) {
- plot_dat <- pheno_map_genes_match[db1 %in% subset_db1,]
- messager(data.table::uniqueN(plot_dat$subject_taxon_label2),"/",
- data.table::uniqueN(pheno_map_genes_match$subject_taxon_label2),
- "species remain after filtering by `subset_db1`.")
- }else {
- plot_dat <- data.table::copy(pheno_map_genes_match)
- }
- ## Only use one phenotype-phenotype per row
- plot_dat <- plot_dat[,.SD[1],by=c("db1","id1","db2","id2")]
- ## Recompute n_phenotypes
- plot_dat[,n_phenotypes:=data.table::uniqueN(id1),
- by=c("db1","db2",
- "subject_taxon1","subject_taxon2",
- "subject_taxon_label1","subject_taxon_label2"
- )]
- }
- plots <- list()
- if("rainplot" %in% types){
- plots[["rainplot"]] <- map_upheno_rainplot(plot_dat = plot_dat)
- }
- if("scatterplot" %in% types){
- plots[["scatterplot"]] <- map_upheno_scatterplot(plot_dat = plot_dat)
- }
- if("heatmap" %in% types){
- plots[["heatmap"]] <- map_upheno_heatmap(plot_dat = plot_dat)
- }
- return(plots)
-}
diff --git a/R/map_upheno_rainplot.R b/R/map_upheno_rainplot.R
deleted file mode 100644
index e3a5ab4..0000000
--- a/R/map_upheno_rainplot.R
+++ /dev/null
@@ -1,51 +0,0 @@
-map_upheno_rainplot <- function(plot_dat){
-
- requireNamespace("ggplot2")
- requireNamespace("ggdist")
- requireNamespace("tidyquant")
-
-
- ### Plot proportion of intersecting orthologs per ontology ####
- ggplot(plot_dat,
- aes(x=paste0(subject_taxon_label2,
- "\n(n = ",n_phenotypes," phenotypes)"),
- y=(n_genes_intersect/n_genes_db1),
- fill=factor(db2))) +
- facet_grid(db1~.,
- scales = "free_y",
- space = "free_y") +
- # add half-violin from {ggdist} package
- ggdist::stat_halfeye(
- # adjust bandwidth
- adjust = 0.5,
- # move to the right
- justification = -0.1,
- # remove the slub interval
- .width = 0,
- point_colour = NA
- ) +
- geom_boxplot(
- show.legend = FALSE,
- width = 0.12,
- # removing outliers
- outlier.color = NA,
- alpha = 0.5
- ) +
- # ggdist::stat_dots(
- # aes(color=factor(db2)),
- # show.legend = FALSE,
- # # ploting on left side
- # side = "left",
- # # adjusting position
- # justification = 1.1,
- # # adjust grouping (binning) of observations
- # # binwidth = 0.25
- # ) +
- # Themes and Labels
- tidyquant::scale_fill_tq() +
- tidyquant::scale_color_tq() +
- tidyquant::theme_tq() +
- coord_flip() +
- labs(x="Species",
- fill="Ontology")
-}
diff --git a/R/map_upheno_scatterplot.R b/R/map_upheno_scatterplot.R
deleted file mode 100644
index 81527e1..0000000
--- a/R/map_upheno_scatterplot.R
+++ /dev/null
@@ -1,11 +0,0 @@
-map_upheno_scatterplot <- function(plot_dat){
-
- #### Check if there's a relationship between phenotype mapping scores
- # and number of shared genes ####
- ggplot(plot_dat,
- aes(x=(n_genes_intersect/n_genes_db1),
- y=equivalence_score)) +
- geom_point() +
- facet_grid(rows = "subject_taxon_label2") +
- geom_smooth()
-}
diff --git a/R/mondo_to_matrix.R b/R/mondo_to_matrix.R
deleted file mode 100644
index 0e3d54c..0000000
--- a/R/mondo_to_matrix.R
+++ /dev/null
@@ -1,48 +0,0 @@
-mondo_to_matrix <- function(species=c("Homo sapiens")){
-
- subject_taxon_label <- evidence_score <- evidence <- object_definition <-
- object <- NULL;
-
- #### Prepare data ####
- dat <- data.table::fread(paste0(
- "https://data.monarchinitiative.org/latest/tsv/",
- "all_associations/gene_disease.all.tsv.gz"
- ))
- dat[,evidence_score:=sapply(evidence,
- function(x){length(strsplit(x,"|")[[1]])})]
- dat <- dat[subject_taxon_label %in% species]
- mondo <- get_mondo()
- dat[,object_definition:=mondo$def[object]]
- #### Make matrix ####
- X_dt <- data.table::dcast.data.table(data = dat,
- formula = "subject_label ~ object",
- value.var = "evidence_score",
- fun.aggregate = sum,
- fill = 0)
- X <- as.matrix(X_dt[,-1], rownames = X_dt[[1]]) |>
- methods::as("sparseMatrix")
- #### Make col meta.data ####
- col_data <- dat[,c("subject_taxon",
- "subject_taxon_label",
- "object",
- "object_label",
- "object_definition")] |>
- unique() |>
- data.frame()
- rownames(col_data) <- col_data$object
-
- # obj <- scNLP::seurat_pipeline(obj = X,
- # meta.data = col_data,
- # vars.to.regress = "nFeature_RNA",
- # nfeatures = NULL)
- # tfidf_plt <- scNLP::plot_tfidf(object = obj,
- # label_var = "object_definition")
- # dp <- Seurat::DimPlot(obj, group.by = "object_label") + Seurat::NoLegend()
- # plotly::ggplotly(dp)
- # fp <- Seurat::FeaturePlot(obj, features = "n_genes")
-
- return(list(X=X,
- col_data=col_data,
- row_data=dat))
-
-}
diff --git a/R/network_3d.R b/R/network_3d.R
deleted file mode 100644
index a64ae4b..0000000
--- a/R/network_3d.R
+++ /dev/null
@@ -1,231 +0,0 @@
-#' 3D network
-#'
-#' Plot a subset of the HPO as a 3D network.
-#' @param g \link[igraph]{igraph} object
-#' generated by \link[HPOExplorer]{make_igraph_object}.
-#' @param layout_func Layout function for the graph.
-#' @param node_color_var Variable in the vertex metadata to color nodes by.
-#' @param edge_color_var Variable in the edge metadata to color edges by.
-#' @param text_color_var Variable in the node metadata to color text by.
-#' @param node_symbol_var Variable in the vertex metadata to shape nodes by.
-#' @param node_opacity Node opacity.
-#' @param edge_opacity Edge opacity.
-#' @param node_palette Color palette function for the nodes/points.
-#' @param edge_palette Color palette function for the edges/lines.
-#' @param kde_palette Color palette function for the KDE plot.
-#' @param add_kde Add a kernel density estimation (KDE) plot
-#' below the 3D scatter plot (i.e. the "mountains" beneath the points).
-#' @param extend_kde Extend the area that the KDE plot covers.
-#' @param bg_color Plot background color.
-#' @param add_labels Add phenotype name labels to each point.
-#' @param keep_grid Keep all grid lines and axis labels.
-#' @param aspectmode The proportions of the 3D plot. See the
-#' \href{https://plotly.com/python/reference/layout/scene/#layout-scene-aspectmode}{
-#' plotly documentation site} for details.
-#' @param hover_width Maximum width of the hover text.
-#' @param label_width Maximum width of the label text.
-#' @param seed Random seed to enable reproducibility.
-#' @param showlegend Show node fill legend.
-#' @param show_plot Print the plot after it's been generated.
-#' @param save_path Path to save interactive plot to
-#' as a self-contained HTML file.
-#' @param verbose Print messages.
-#' @returns A 3D interactive \link[plotly]{plotly} object.
-#'
-#' @source
-#' \href{https://www.r-bloggers.com/2013/09/network-visualization-part-4-3d-networks/}{
-#' R bloggers}
-#' @source
-#' \href{https://community.plotly.com/t/is-it-possible-to-connect-scatters-in-3d-scatter-plot/}{
-#' Plotly: Connect points in 3D plot}
-#' @source \href{https://plotly.com/r/line-charts/}{
-#' Plotly: Connect points in 2D plot}
-#' @source \href{https://plotly.com/r/3d-line-plots/}{
-#' Plotly: 3D lines plots}
-#' @source \href{https://plotly.com/python/v3/3d-network-graph/}{
-#' Plotly: 3D network plot}
-#'
-#' @export
-#' @importFrom stringr str_wrap
-#' @importFrom methods show
-#' @examples
-#' phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay")
-#' g <- make_igraph_object(phenos = phenos)
-#' plt <- network_3d(g=g, show_plot=FALSE)
-network_3d <- function(g,
- layout_func = igraph::layout.fruchterman.reingold,
- node_color_var = "ontLvl",
- edge_color_var = "zend",
- text_color_var = node_color_var,
- node_symbol_var = "ancestor_name",
- node_palette = pals::kovesi.cyclic_mrybm_35_75_c68_s25,
- edge_palette = pals::kovesi.cyclic_mrybm_35_75_c68_s25,
- node_opacity = .75,
- edge_opacity = .5,
- kde_palette = pals::gnuplot,
- add_kde = TRUE,
- extend_kde = 1.5,
- bg_color = kde_palette(6)[1],
- add_labels = FALSE,
- keep_grid = FALSE,
- aspectmode = 'cube',
- hover_width=80,
- label_width=80,
- seed = 2023,
- showlegend = TRUE,
- show_plot = TRUE,
- save_path = tempfile(fileext = "network_3d.html"),
- verbose = TRUE){
- # devoptera::args2vars(network_3d)
- # anc <- "Abnormality of the nervous system"
- # phenos <- make_phenos_dataframe(ancestor = anc)[ancestor_name==anc,]
- # g <- make_igraph_object(phenos = phenos)
- # g <- readRDS("~/Downloads/priotised_igraph.rds")
- requireNamespace("plotly")
- requireNamespace("pals")
- requireNamespace("htmlwidgets")
- label <- hpo_name <- NULL;
-
- #### Convert igraph to plotly data ####
- d <- igraph_to_plotly(g = g,
- layout_func = layout_func,
- dim = 3,
- seed = seed,
- verbose = verbose)
- vdf <- d$vertices
- vdf[,label:=gsub("\n","
",
- stringr::str_wrap(hpo_name, width = label_width))]
- edf <- d$edges
- remove(d)
- #### Create paths and nodes ####
- fig <-
- plotly::plot_ly() |>
- plotly::add_paths(data = edf,
- x = ~xend,
- y = ~yend,
- z = ~zend,
- xend = ~xend,
- yend = ~yend,
- # zend = ~zend,
- color = ~get(edge_color_var),
- colors = rev(edge_palette(50)),
- line = list(shape = "spline"),
- type = "scatter3d",
- name = "edge",
- opacity = edge_opacity,
- mode = "lines",
- hoverinfo = "none",
- showlegend = FALSE,
- inherit = FALSE) |>
- plotly::add_markers(data = vdf,
- x = ~x,
- y = ~y,
- z = ~z,
- symbol = ~stringr::str_wrap(
- get(node_symbol_var),
- width = label_width
- ),
- # size = ~ontLvl,
- color = ~ get(node_color_var),
- colors = (
- node_palette(length(
- unique(vdf[[node_color_var]])
- ))
- ),
- marker = list(
- line = list(
- color = bg_color
- )
- ),
- hovertext = ~ stringr::str_wrap(
- paste(
- paste0('name: ',name),
- paste0('hpo_id: ',hpo_id),
- paste0('hpo_name: ',hpo_name),
- paste0('ontLvl: ',ontLvl),
- paste0('ancestor_name: ',ancestor_name),
- sep = "
"),
- width = hover_width
- ),
- opacity = node_opacity,
- showlegend = showlegend,
- type = "scatter3d",
- mode = "markers")
- #### Add KDE ####
- if(isTRUE(add_kde)){
- kd <- kde_surface(xyz = vdf,
- extend_kde = extend_kde)
- fig <- fig |>
- plotly::add_surface(data = kd,
- x = ~x,
- y = ~y,
- z = ~z,
- opacity = 1,
- hoverinfo = "none",
- colorscale = list(
- seq(0,1,length.out=6),
- kde_palette(n = 6)
- ),
- showlegend = FALSE,
- inherit = FALSE)
- }
- #### Add text ####
- if(isTRUE(add_labels)){
- fig <- fig |>
- plotly::add_text(data = vdf,
- x = ~x,
- y = ~y,
- z = ~z,
- text = ~label,
- color = ~ get(text_color_var),
- colors = (
- node_palette(length(
- unique(vdf[[text_color_var]])
- ))
- ),
- # textfont = list(color="rbga(255,255,255,.8"),
- hoverinfo = "none",
- inherit = FALSE,
- showlegend = FALSE)
- }
- #### Add layout ####
- scene <- lapply(stats::setNames(c("xaxis","yaxis","zaxis"),
- c("xaxis","yaxis","zaxis")),
- function(x){list(showgrid = keep_grid,
- showline = keep_grid,
- showspikes = keep_grid,
- zeroline = keep_grid,
- title = list(
- text=if(isTRUE(keep_grid)){
- x
- } else {""}
- ),
- showticklabels = keep_grid)}
- )
- scene$aspectmode <- aspectmode
- fig <- fig |>
- plotly::layout(
- hoverlabel = list(align = "left"),
- plot_bgcolor = bg_color,
- paper_bgcolor = bg_color,
- scene = scene
- # showlegend = FALSE
- ) |>
- plotly::colorbar(title=edge_color_var)
- # file <- file.path("~/Downloads","hpo_plotly.png")
- # plotly::export(p = fig,
- # file = file,
- # selenium = RSelenium::rsDriver(browser = "chrome"))
-
- # reticulate::py_install(packages = "kaleido",)
- # plotly::save_image(p = fig,file = file, width = 10, height =10)
- if(isTRUE(show_plot)) methods::show(fig)
- if(!is.null(save_path)) {
- messager("Saving interactive plot -->",save_path,v=verbose)
- dir.create(dirname(save_path),showWarnings = FALSE, recursive = TRUE)
- htmlwidgets::saveWidget(widget = fig,
- file = save_path,
- selfcontained = TRUE)
- }
- return(fig)
-}
diff --git a/R/oard_query_api.R b/R/oard_query_api.R
deleted file mode 100644
index b5b07f9..0000000
--- a/R/oard_query_api.R
+++ /dev/null
@@ -1,49 +0,0 @@
-oard_query_api <- function(ids,
- concept_prefix="q=",
- domain="https://rare.cohd.io/api/",
- dataset_id=NULL,
- endpoint="vocabulary/findConceptByAny",
- batch_size=100,
- workers=1,
- verbose=TRUE){
- # devoptera::args2vars(oard_query_api)
- requireNamespace("BiocParallel")
-
- ids <- na.omit(unique(ids))
- messager("Querying OARD API for",formatC(length(ids),big.mark = ","),
- "IDs.",v=verbose)
- batches <- split(ids, ceiling(seq_along(ids)/batch_size))
- oard_query_api_i <- function(batch,
- ...){
- URL <- paste0(paste0(domain,endpoint),"?",
- ifelse(is.null(dataset_id),
- "",
- paste0("dataset_id=",dataset_id,"&")
- ),
- if(!is.null(batch)){
- paste0(concept_prefix,
- htmltools::urlEncodePath(
- paste(batch,collapse = ";")
- )
- )
- } else {
- ""
- }
- )
- # message(URL)
- res <- httr::GET(URL)
- cont <- httr::content(res, as = "text", encoding = "UTF-8")
- js <- jsonlite::fromJSON(cont, simplifyDataFrame = TRUE)
- dt <- js$results
- if(length(dt)==0) return(dt)
- if(nrow(dt)==length(js$parameter$q)) dt$query <- js$parameter$q
- return(dt)
- }
- BiocParallel::register(BiocParallel::MulticoreParam(workers=workers,
- progressbar = TRUE))
- RES <- BiocParallel::bplapply(batches,
- oard_query_api_i,
- match.call()) |>
- data.table::rbindlist(idcol = "batch", fill = TRUE)
- return(RES)
-}
diff --git a/R/parse_pheno_frequency.R b/R/parse_pheno_frequency.R
index 7bff400..e286ad0 100644
--- a/R/parse_pheno_frequency.R
+++ b/R/parse_pheno_frequency.R
@@ -5,28 +5,28 @@ parse_pheno_frequency <- function(annot){
freq_dt <- lapply(seq(nrow(annot)), function(i){
f <- annot[i,]$frequency
if(grepl("^HP",f)){
- dt <- data.table::data.table(
+ dat <- data.table::data.table(
pheno_freq_name = get_freq_dict()[f],
pheno_freq_min = get_freq_dict(type="min")[f],
pheno_freq_max = get_freq_dict(type="max")[f])
} else if(grepl("/",f)){
splt <- as.numeric(strsplit(f,"/")[[1]])
val <- splt[[1]] / splt[[2]]*100
- dt <- data.table::data.table(
+ dat <- data.table::data.table(
pheno_freq_name="ratio",
pheno_freq_min=val,
pheno_freq_max=val)
} else if(grepl("[%]",f)){
val <- as.numeric(gsub("[%]","",f))
- dt <- data.table::data.table(
+ dat <- data.table::data.table(
pheno_freq_name="percentage",
pheno_freq_min=val,
pheno_freq_max=val)
} else {
return(NULL)
}
- dt[,hpo_id:=annot[i,]$hpo_id][,disease_id:=annot[i,]$disease_id]
- return(dt)
+ dat[,hpo_id:=annot[i,]$hpo_id][,disease_id:=annot[i,]$disease_id]
+ return(dat)
}) |> data.table::rbindlist(fill=TRUE)
freq_dt$pheno_freq_mean <- rowMeans(
freq_dt[,c("pheno_freq_min","pheno_freq_max")], na.rm = TRUE
diff --git a/R/per_branch_plot.R b/R/per_branch_plot.R
index d158678..97005d7 100644
--- a/R/per_branch_plot.R
+++ b/R/per_branch_plot.R
@@ -17,54 +17,60 @@
#' @returns ggplot object
#'
#' @export
-#' @import ggplot2
-#' @importFrom ontologyIndex get_descendants
#' @examples
-#' ## Selecting child terms of
-#' ## "Abnormality of the nervous system" as background branches
#' plt <- per_branch_plot(
#' highlighted_branches = "Abnormality of nervous system physiology",
#' ancestor = "Abnormality of the nervous system")
per_branch_plot <- function(highlighted_branches,
ancestor,
hpo = get_hpo(),
- background_branches = hpo$children[
- hpo$id[match(ancestor, hpo$name)]
- ][[1]],
+ background_branches = simona::dag_children(
+ hpo,
+ term = map_phenotypes(terms = ancestor,
+ hpo = hpo,
+ to = "id")
+ ),
phenotype_to_genes = load_phenotype_to_genes()) {
-
- # templateR:::source_all()
- # devoptera::args2vars(per_branch_plot)
-
requireNamespace("ggplot2")
- n_phenos <- branch <- target <- NULL;
- highlighted_branches_ids <- hpo$id[match(highlighted_branches, hpo$name)]
+ highlighted_branches_ids <- map_phenotypes(terms = highlighted_branches,
+ hpo = hpo,
+ to = "id",
+ keep_order = FALSE)
#### Count phenotypes per branch ####
phenos_per_branch <- lapply(background_branches, function(b){
- n <- length(ontologyIndex::get_descendants(ontology = hpo,
- roots = b))
+ n <- length(simona::dag_offspring(hpo,term = b))
if (b %in% highlighted_branches_ids) {
target_branch <- "target"
} else {
target_branch <- "other"
}
- data.table::data.table("branch" = hpo$name[b],
+ data.table::data.table("branch_id" = b,
+ "branch_name"=map_phenotypes(terms = b,
+ hpo = hpo,
+ to = "name"),
"n_phenos" = n,
"target" = target_branch)
}) |> data.table::rbindlist()
data.table::setkeyv(phenos_per_branch, "n_phenos")
- phenos_per_branch$branch <- factor(x = phenos_per_branch$branch,
- levels = unique(phenos_per_branch$branch),
- ordered = TRUE)
+ phenos_per_branch$branch_name <- factor(
+ x = phenos_per_branch$branch_name,
+ levels = unique(phenos_per_branch$branch_name),
+ ordered = TRUE)
+ #### Plot ####
plt <- ggplot(phenos_per_branch,
- aes_string(x = "n_phenos", y = "branch",
- color = "target", fill = "target")) +
+ aes(x = !!sym("n_phenos"),
+ y = !!sym("branch_name"),
+ color = !!sym("target"),
+ fill = !!sym("target")
+ )
+ )+
geom_col(width = .5) +
labs(x="Descendants (n)",
y="hpo_name",
fill="Group",
color="Group") +
theme_bw()
+ #### Return ####
return(plt)
}
diff --git a/R/phenos_to_granges.R b/R/phenos_to_granges.R
index f74bde4..a5c86cd 100644
--- a/R/phenos_to_granges.R
+++ b/R/phenos_to_granges.R
@@ -6,7 +6,7 @@
#' The resulting object will contain genes (and gene metadata) for all
#' genes associated with each phenotypes.
#' @param as_datatable Return as a \link[data.table]{data.table}.
-#' @param keep_seqnames Chromosomes to keep.
+#' @param keep_chr Chromosomes to keep.
#' @inheritParams add_genes
#' @inheritParams make_network_object
#' @inheritParams make_phenos_dataframe
@@ -23,7 +23,7 @@ phenos_to_granges <- function(phenos = NULL,
phenotype_to_genes =
load_phenotype_to_genes(),
hpo = get_hpo(),
- keep_seqnames = c(seq(22),"X","Y"),
+ keep_chr = c(seq(22),"X","Y"),
by = c("hpo_id","disease_id"),
gene_col = "intersection",
split.field = "hpo_id",
@@ -42,12 +42,10 @@ phenos_to_granges <- function(phenos = NULL,
hpo = hpo,
by = by,
gene_col = gene_col,
- allow.cartesian = allow.cartesian,
- verbose = verbose)
+ allow.cartesian = allow.cartesian)
#### Get gene lengths #####
- gr <- get_gene_lengths(gene_list = phenos$gene_symbol,
- keep_seqnames = keep_seqnames,
- verbose = verbose)
+ gr <- KGExplorer::get_gene_lengths(genes = phenos$gene_symbol,
+ keep_chr = keep_chr)
#### Merge in gene length data ####
gr_dt <- data.table::merge.data.table(
phenos,
diff --git a/R/read_enrichr.R b/R/read_enrichr.R
deleted file mode 100644
index 8b5bd35..0000000
--- a/R/read_enrichr.R
+++ /dev/null
@@ -1,7 +0,0 @@
-read_enrichr <- function(file){
- lines <- readLines(file)
- lapply(strsplit(lines,"\t"), function(l){
- data.table::data.table(term=l[1],
- gene=l[-c(1,2)])
- }) |> data.table::rbindlist(fill = TRUE)
-}
diff --git a/R/report_missing.R b/R/report_missing.R
index 1f09289..1b9d74a 100644
--- a/R/report_missing.R
+++ b/R/report_missing.R
@@ -1,12 +1,12 @@
-report_missing <- function(phenos,
- id_col,
+report_missing <- function(dat,
+ input_col,
report_col,
verbose=TRUE){
- report_missing_i <- function(phenos,
- id_col,
+ report_missing_i <- function(dat,
+ input_col,
report_col,
verbose=TRUE){
- phenos2 <- phenos[,c(id_col,report_col),with=FALSE] |> unique()
+ phenos2 <- dat[,c(input_col,report_col),with=FALSE] |> unique()
n_missing <- sum(is.na(phenos2[[report_col]]))
n_total <- nrow(phenos2)
messager(formatC(n_missing,big.mark = ","),
@@ -16,8 +16,8 @@ report_missing <- function(phenos,
report_col,"missing.",v=verbose)
}
for(rc in report_col){
- report_missing_i(phenos = phenos,
- id_col = id_col,
+ report_missing_i(dat = dat,
+ input_col = input_col,
report_col = rc,
verbose = verbose)
}
diff --git a/R/search_hpo.R b/R/search_hpo.R
index 1b67a95..52aaa64 100644
--- a/R/search_hpo.R
+++ b/R/search_hpo.R
@@ -11,7 +11,6 @@
#' @returns Named list of HPO IDs.
#'
#' @export
-#' @importFrom ontologyIndex get_descendants
#' @examples
#' query_hits <- search_hpo()
search_hpo <- function(hpo = get_hpo(),
@@ -38,24 +37,24 @@ search_hpo <- function(hpo = get_hpo(),
include_descendants=TRUE,
include_ancestors=FALSE,
verbose=TRUE){
- # devoptera::args2vars(search_hpo)
+ name <- NULL;
query_hits <- lapply(queries, function(q){
terms <- grep(paste(q,collapse = "|"),
- hpo$name,
+ hpo@elementMetadata$name,
ignore.case = TRUE, value = TRUE)
+ dat <- subset(hpo@elementMetadata, name %in% terms)
res <- c()
if(isTRUE(include_descendants)){
res <- c(res,
- ontologyIndex::get_descendants(ontology = hpo,
- roots = names(terms),
- exclude_roots = FALSE)
+ simona::dag_offspring(hpo,
+ term = dat$id)
)
}
if(isTRUE(include_ancestors)){
res <- c(res,
- ontologyIndex::get_ancestors(ontology = hpo,
- terms = names(terms))
+ simona::dag_ancestors(hpo,
+ term = dat$id)
)
}
return(unique(res))
diff --git a/R/subset_descendants.R b/R/subset_descendants.R
deleted file mode 100644
index bfb0006..0000000
--- a/R/subset_descendants.R
+++ /dev/null
@@ -1,46 +0,0 @@
-#' Subset descendants
-#'
-#' Subset a \code{phenos} datatable to only
-#' descendants of an ancestor HPO ID term.
-#' @param ancestor Phenotype names (or HPO ID) of an ancestor in the
-#' \href{https://hpo.jax.org/}{Human Phenotype Ontology}.
-#' Only phenotypes that are descendants of this ancestor will be kept.
-#' Set to \code{NULL} (default) to skip this filtering step.
-#' @inheritParams make_phenos_dataframe
-#' @inheritParams make_network_object
-#' @inheritParams harmonise_phenotypes
-#' @returns data.table of phenotypes, with additional columns:
-#' "ancestor", "ancestor_id"
-#'
-#' @export
-#' @importFrom ontologyIndex get_descendants
-#' @importFrom data.table :=
-#' @examples
-#' phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay")
-#' phenos2 <- subset_descendants(phenos = phenos,
-#' ancestor = "Motor delay")
-subset_descendants <- function(phenos,
- ancestor = NULL,
- hpo = get_hpo(),
- ignore_case = TRUE,
- verbose = TRUE){
-
- hpo_id <- NULL;
-
- if(!is.null(ancestor)){
- messager("Subsetting phenotypes to only ancestors of:",
- paste(ancestor,collapse = ","),v=verbose)
- ancestor_id <- harmonise_phenotypes(phenotypes = ancestor,
- hpo = hpo,
- as_hpo_ids = TRUE,
- ignore_case = ignore_case)
- all_ids <- ontologyIndex::get_descendants(ontology = hpo,
- roots = ancestor_id,
- exclude_roots = FALSE)
- phenos <- phenos[hpo_id %in% all_ids,
- ][,ancestor:=ancestor][,ancestor_id:=ancestor_id]
- messager(formatC(nrow(phenos),big.mark = ","),
- "associations remain after filtering.",v=verbose)
- }
- return(phenos)
-}
diff --git a/R/unlist_col.R b/R/unlist_col.R
index d3e1ec0..3f58163 100644
--- a/R/unlist_col.R
+++ b/R/unlist_col.R
@@ -1,13 +1,13 @@
#' Unlist column
#'
#' Unlist a column in a \link[data.table]{data.table}.
-#' @param dt \link[data.table]{data.table}
+#' @param dat \link[data.table]{data.table}
#' @param col Column name.
#' @returns \link[data.table]{data.table}
#'
#' @keywords internal
-unlist_col <- function(dt,
+unlist_col <- function(dat,
col){
- dt[rep(dt[,.I],lengths(get(col)))
- ][, eval(col) := unlist(dt[[col]])][]
+ dat[rep(dat[,.I],lengths(get(col)))
+ ][, eval(col) := unlist(dat[[col]])][]
}
diff --git a/inst/.DS_Store b/inst/.DS_Store
index aa49b00..368127d 100644
Binary files a/inst/.DS_Store and b/inst/.DS_Store differ
diff --git a/inst/extdata/.DS_Store b/inst/extdata/.DS_Store
new file mode 100644
index 0000000..5008ddf
Binary files /dev/null and b/inst/extdata/.DS_Store differ
diff --git a/inst/extdata/orphanet_epidemiology.csv.gz b/inst/extdata/orphanet_epidemiology.csv.gz
deleted file mode 100644
index 580368d..0000000
Binary files a/inst/extdata/orphanet_epidemiology.csv.gz and /dev/null differ
diff --git a/inst/hex/hexSticker.Rmd b/inst/hex/hexSticker.Rmd
index ef23e9f..2047054 100644
--- a/inst/hex/hexSticker.Rmd
+++ b/inst/hex/hexSticker.Rmd
@@ -2,7 +2,7 @@
title: "hexSticker"
date: "