diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..19582aa Binary files /dev/null and b/.DS_Store differ diff --git a/DESCRIPTION b/DESCRIPTION index 263761f..604e0f8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: HPOExplorer Title: Analysis and Visualisation of the Human Phenotype Ontology -Version: 0.99.12 +Version: 1.0.0 Authors@R: c( person(given = "Brian", @@ -18,25 +18,21 @@ Authors@R: role = c("aut"), email = "nathan.skene@gmail.com", comment = c(ORCID = "0000-0002-6807-3180"))) -Description: HPOExplorer contains useful functions for working with - the Human Phenotype Ontology (HPO). - It allows you to create interactive phenotype network plots, - which can be useful for making web applications. - It also has functions for finding ontology level - (n generations of terms below the selected term), - and getting term definitions from HPO API. +Description: Import, annotate and visualise the 18k+ hierarchically structured + clinical phenotypes across the Human Phenotype Ontology. License: GPL-3 URL: https://github.com/neurogenomics/HPOExplorer BugReports: https://github.com/neurogenomics/HPOExplorer/issues Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.0 Depends: R (>= 4.1.0) Imports: + KGExplorer, + simona, ggplot2, - plotly, - ontologyIndex, + plotly, utils, stats, methods, @@ -45,31 +41,27 @@ Imports: tools, data.table, stringr, - EnsDb.Hsapiens.v75, - ensembldb, - AnnotationFilter, - GenomicRanges, - AnnotationHub + GenomicRanges Suggests: rmarkdown, knitr, devtools, - BiocStyle, rworkflows, here, httr, jsonlite, testthat (>= 3.0.0), - igraph, pals, Matrix, MASS, scales, htmlwidgets, - R.utils, piggyback, patchwork, - BiocParallel + igraph, + tidygraph +Remotes: + github::neurogenomics/KGExplorer VignetteBuilder: knitr biocViews: Genetics, Preprocessing, GeneTarget, SystemsBiology, diff --git a/NAMESPACE b/NAMESPACE index 1fd52e6..81ea4ea 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,14 +3,12 @@ export(add_ancestor) export(add_death) export(add_disease) -export(add_disease_definition) export(add_disease_genes) export(add_evidence) export(add_gene_frequency) export(add_genes) export(add_hpo_definition) export(add_hpo_id) -export(add_hpo_synonym) export(add_info_content) export(add_mondo) export(add_ndisease) @@ -21,89 +19,58 @@ export(add_pheno_frequency) export(add_prevalence) export(add_severity) export(add_tier) -export(adjacency_matrix) export(annotate_phenos) -export(assign_tiers) -export(clear_cache) -export(convert_ontology) export(example_phenos) -export(get_gencc) +export(filter_descendants) export(get_gene_lists) export(get_hpo) export(get_hpo_id_direct) -export(get_monarch) -export(get_mondo) -export(get_ont_lvls) -export(get_ont_lvls2) -export(get_upheno) -export(get_version) export(ggnetwork_plot) export(gpt_annot_check) export(gpt_annot_codify) export(gpt_annot_plot) export(gpt_annot_read) -export(harmonise_phenotypes) export(hpo_api) export(hpo_to_matrix) -export(kde_surface) export(list_columns) export(list_deaths) export(list_onsets) -export(load_disease_genes) export(load_phenotype_to_genes) -export(make_hoverboxes) export(make_igraph_object) export(make_network_object) export(make_phenos_dataframe) -export(map_upheno) -export(map_upheno_data) -export(map_upheno_plot) -export(network_3d) +export(make_tiers) +export(map_phenotypes) export(newlines_to_definition) export(per_branch_plot) export(phenos_to_granges) export(search_hpo) -export(subset_descendants) +import(KGExplorer) import(data.table) import(ggplot2) import(network) +import(simona) +importFrom(KGExplorer,map_mondo) importFrom(data.table,":=") importFrom(data.table,.EACHI) -importFrom(data.table,.N) -importFrom(data.table,.SD) importFrom(data.table,as.data.table) importFrom(data.table,copy) importFrom(data.table,data.table) importFrom(data.table,dcast.data.table) -importFrom(data.table,fcoalesce) importFrom(data.table,fread) importFrom(data.table,merge.data.table) -importFrom(data.table,rbindlist) -importFrom(data.table,setcolorder) -importFrom(data.table,setkey) -importFrom(data.table,setkeyv) importFrom(data.table,setnafill) importFrom(data.table,setnames) importFrom(ggnetwork,geom_edges) importFrom(ggnetwork,ggnetwork) -importFrom(methods,show) -importFrom(ontologyIndex,get_OBO) -importFrom(ontologyIndex,get_ancestors) -importFrom(ontologyIndex,get_descendants) -importFrom(ontologyIndex,get_term_descendancy_matrix) -importFrom(ontologyIndex,get_term_info_content) importFrom(plotly,ggplotly) importFrom(plotly,layout) -importFrom(stats,as.dist) importFrom(stats,as.formula) importFrom(stats,cor) -importFrom(stats,cutree) -importFrom(stats,hclust) importFrom(stats,na.omit) importFrom(stats,setNames) importFrom(stats,terms) importFrom(stringr,str_split) -importFrom(stringr,str_wrap) importFrom(tools,R_user_dir) importFrom(utils,data) importFrom(utils,download.file) diff --git a/NEWS.md b/NEWS.md index 1d6750b..27c3b97 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,40 @@ +# HPOExplorer 1.0.0 + +## New features + +* *MAJOR CHANGES*: Switched from `ontologyIndex` to `simona`, + which seems to be better written and has extensive analysis functions. +* *MAJOR CHANGES*: Moved most non-HPO data annotation to [`KnowledgeNets`](https://github.com/neurogenomics/KnowledgeNets) + - Functions to assess disease or phenotype prevalence (https://github.com/neurogenomics/RareDiseasePrioritisation/issues/34) + - `get_prevalence_orphanet` + - `get_prevalence_oard` + - Functions to assess the existence of models for phenotypes or diseases (https://github.com/neurogenomics/RareDiseasePrioritisation/issues/33) + - `map_upheno` + - `map_upheno_data` + - `map_upheno_plots` + - `map_upheno_heatmap` + - `map_upheno_rainplot` + - `map_upheno_scatterplot` + - `get_monarch` + - `get_monarch_models` + +* New exported functions: + - Helper functions: + - `convert_hpo` + - Functions to map IDs across ontologies: + - `add_medgen` + - `add_omop` + - `add_prevalence` + - `add_prevalence_report` +* New internal functions: + - `dt_to_matrix` + +## Bug fixes + +* `add_mondo` + - Drastically improved cross-ontology mappings to MONDO (https://github.com/monarch-initiative/mondo/issues/6873) + - Add subfunction `map_mondo` + # HPOExplorer 0.99.12 ## New features @@ -11,7 +48,7 @@ - `load_phenotypes_to_genes`: new attribute. - New exported helper function: `get_version` - New exported helper function: `clear_cache` -* Add `make_hpo` internal function. +* Add `make_ontology` internal function. * Ensure consistent function naming: - `create_node_data` --> `make_node_data` * `get_gencc` @@ -179,7 +216,7 @@ * `hpo_tiers`: - Found lots of typos, outdated phenotype names, and mismatched HPO IDs. We through and manually re-curated all of these and checked that they match up with the - `harmonise_phenotypes` output. + `map_phenotypes` output. - Add original `hpo_tiers` csv to *inst/extdata*. * Delete `get_hpo_id` function in favor of `get_hpo_id_direct` which is more reliable and comprehensive. @@ -219,7 +256,7 @@ - `phenos_to_granges` - `add_onset` - `add_tier` - - `harmonise_phenotypes` + - `map_phenotypes` - `get_gene_lists` - `get_gene_lengths` - `list_onsets` diff --git a/R/.DS_Store b/R/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/R/.DS_Store differ diff --git a/R/_docs.R b/R/_docs.R new file mode 100644 index 0000000..1a388d0 --- /dev/null +++ b/R/_docs.R @@ -0,0 +1,118 @@ +#' @title Main functions +#' +#' @description +#' Main functions. +#' @param phenos A data.table containing HPO IDs and other metadata. +#' @param hpo Human Phenotype Ontology object, +#' loaded from \link[KGExplorer]{get_ontology}. +#' @param add_ont_lvl_absolute Add the absolute ontology level of each HPO term. +#' See \link[KGExplorer]{get_ontology_levels} for more details. +#' @param add_ont_lvl_relative Add the relative ontology level of each HPO term. +#' See \link[KGExplorer]{get_ontology_levels} for more details. +#' @param add_description Whether to get the phenotype descriptions as +#' well (slower). +#' @param add_info_content Add information content column for each phenotype. +#' @param add_disease_data Add all disease metadata columns. +#' This will expand the data using \code{allow.cartesian=TRUE}. +#' @param add_ndiseases Add the number of diseases per phenotype. +#' @param add_pheno_frequencies Add the frequency of each phenotype in +#' each disease. +#' @param add_tiers Add severity Tiers column using +#' \link[HPOExplorer]{add_tier}. +#' @param add_severities Add severity column using +#' \link[HPOExplorer]{add_severity}. +#' @param add_hoverboxes Add hoverdata with +#' \link[KGExplorer]{add_hoverboxes}. +#' @param add_onsets Add age of onset columns using +#' \link[HPOExplorer]{add_onset}. +#' @param add_deaths Add age of death columns using +#' \link[HPOExplorer]{add_death}. +#' @param columns A named vector of columns in \code{phenos} +#' to add to the hoverdata via \link[KGExplorer]{add_hoverboxes}. +#' @param add_disease_definitions Add disease definitions. +#' @param include_mondo Add \href{https://mondo.monarchinitiative.org/}{MONDO} +#' IDs, names, and definitions to each disease. +#' @param interactive Make the plot interactive. +#' @family main +#' @returns Data. +#' @import data.table +#' @import KGExplorer +#' @name main +NULL + + +#' @title Get functions +#' +#' @description +#' Functions to get data objects and extract elements. +#' @param term One or more ontology IDs. +#' @family get_ +#' @param force_new Force a new download or creation of a data resource. +#' @param lvl How many levels deep into the ontology to get ancestors from. +#' For example: +#' \itemize{ +#' \item{1: }{"All"} +#' \item{2: }{"Phenotypic abnormality"} +#' \item{3: }{"Abnormality of the nervous system"} +#' \item{4: }{"Abnormality of nervous system physiology"} +#' \item{5: }{"Neurodevelopmental abnormality" or "Behavioral abnormality"} +#' } +#' @inheritParams main +#' @inheritParams make_ +#' @returns Data. +#' @name get_ +NULL + +#' @title Add functions +#' +#' @description +#' Functions to add metadata to data.table objects. +#' @family add_ +#' @param agg_by Column to aggregate metadata by. +#' @param add_definitions Add disease definitions using \link{add_mondo}. +#' @inheritParams main +#' @inheritParams make_ +#' @inheritParams get_ +#' @inheritParams data.table::merge.data.table +#' @returns Annotated data. +#' @name add_ +NULL + + +#' @title Filter functions +#' +#' @description +#' Functions to filter data.table objects. +#' @family filter_ +#' @inheritParams main +#' @returns Filtered data. +#' @name filter_ +NULL + +#' @title Map functions +#' +#' @description +#' Functions to map IDs. +#' @family map_ +#' @inheritParams main +#' @inheritParams KGExplorer::map_ontology_terms +#' @returns Mapped data. +#' @name map_ +NULL + + +#' @title Make functions +#' +#' @description +#' Functions to make complex R objects. +#' @param as R object class to return output as. +#' @param phenotype_to_genes Output of +#' \link{load_phenotype_to_genes} mapping phenotypes +#' to gene annotations. +#' @param ancestor The ancestor to get all descendants of. If \code{NULL}, +#' returns the entirely ontology. +#' @family make_ +#' @inheritParams main +#' @returns R object. +#' @name make_ +NULL diff --git a/R/add_ancestor.R b/R/add_ancestor.R index 993e940..3f01697 100644 --- a/R/add_ancestor.R +++ b/R/add_ancestor.R @@ -1,59 +1,52 @@ +#' @describeIn add_ add_ #' Add ancestor #' #' Assign each HPO ID to the higher-order ancestral term that it is part of. -#' @param lvl How many levels deep into the ontology to get ancestors from. -#' For example: -#' \itemize{ -#' \item{1: }{"All"} -#' \item{2: }{"Phenotypic abnormality"} -#' \item{3: }{"Abnormality of the nervous system"} -#' \item{4: }{"Abnormality of nervous system physiology"} -#' \item{5: }{"Neurodevelopmental abnormality" or "Behavioral abnormality"} -#' } #' @param remove_descendants Remove HPO terms that are descendants of a given #' ancestral HPO term. Ancestral terms be provided as a character vector of #' phenotype names (e.g. \code{c("Clinical course")}), #' HPO IDs (e.g. \code{"HP:0031797" }) or a mixture of the two. -#' See \link[HPOExplorer]{add_ancestor} for details. -#' @inheritParams make_phenos_dataframe -#' @inheritParams make_network_object -#' @returns phenos data.table with extra column +#' See \link{add_ancestor} for details. #' #' @export -#' @importFrom data.table := +#' @import KGExplorer #' @examples #' phenos <- example_phenos() #' phenos2 <- add_ancestor(phenos = phenos, lvl=5) add_ancestor <- function(phenos, lvl = 3, hpo = get_hpo(), - remove_descendants = NULL, - verbose=TRUE){ - - ancestor_name <- ancestor <- NULL; + remove_descendants = NULL){ + ancestor <- NULL; + #### Check for existing columns #### + if(all(c("ancestor","ancestor_name") %in% names(phenos))){ + messager("Ancestor columns already present. Skipping.") + return(phenos) + } + #### Add the new columns #### if("hpo_id" %in% names(phenos)){ - messager(paste0("Adding level-",lvl),"ancestor to each HPO ID.",v=verbose) - phenos$ancestor <- lapply(hpo$ancestors[phenos$hpo_id], - function(x){ - if(length(x)>0){ - x[lvl] - } else {NA} - }) |> unlist() - # phenos[,.(ancestor=unlist(ancestor))] - phenos[,ancestor_name:=hpo$name[ancestor]] + messager(paste0("Adding level-",lvl),"ancestor to each HPO ID.") + hpo <- KGExplorer::add_ancestors(ont = hpo, + lvl = lvl) + ancestors <- hpo@elementMetadata[,c("id","ancestor","ancestor_name")] |> + unique() + phenos <- data.table::merge.data.table(phenos, + ancestors, + by.x = "hpo_id", + by.y = "id", + all.x = TRUE) } else { - messager("hpo_id column not found. Cannot add ancestors.",v=verbose) + messager("hpo_id column not found. Cannot add ancestors.") } #### Filter #### if(!is.null(remove_descendants)){ messager("Removing remove descendants of:", - paste(shQuote(remove_descendants),collapse = "\n -"),v=verbose) - rmd <- HPOExplorer::harmonise_phenotypes(phenotypes = remove_descendants, - hpo = hpo, - as_hpo_ids = TRUE, - keep_order = FALSE, - verbose = verbose) + paste(shQuote(remove_descendants),collapse = "\n -")) + rmd <- map_phenotypes(terms = remove_descendants, + hpo = hpo, + to="id", + keep_order = FALSE) phenos <- phenos[!ancestor %in% rmd,] } return(phenos) diff --git a/R/add_death.R b/R/add_death.R index 091d888..04b7c96 100644 --- a/R/add_death.R +++ b/R/add_death.R @@ -1,3 +1,4 @@ +#' @describeIn add_ add_ #' Add age of death #' #' Add age of death for each HPO ID. @@ -18,10 +19,7 @@ #' @param keep_deaths The age of death associated with each HPO ID to keep. #' If >1 age of death is associated with the term, #' only the earliest age is considered. -#' See \link[HPOExplorer]{add_death} for details. -#' @param agg_by Column to aggregate age of death metadata by. -#' @inheritParams make_network_object -#' @inheritParams data.table::merge.data.table +#' See \link{add_death} for details. #' @returns phenos data.table with extra columns: #' \itemize{ #' \item{"AgeOfDeath": }{AgeOfDeath HPO IDs of disease phenotypes associated @@ -46,19 +44,17 @@ add_death <- function(phenos, keep_deaths = NULL, all.x = TRUE, allow.cartesian = FALSE, - agg_by = NULL, - verbose = TRUE){ + agg_by = NULL){ # devoptera::args2vars(add_death) AgeOfDeath_earliest <- AgeOfDeath_name <- NULL; if(!all(c("AgeOfDeath", "AgeOfDeath_name") %in% names(phenos))){ - messager("Annotating phenos with AgeOfDeath.",v=verbose) + messager("Annotating phenos with AgeOfDeath.") phenos <- add_disease(phenos = phenos, all.x = all.x, - allow.cartesian = allow.cartesian, - verbose = verbose) + allow.cartesian = allow.cartesian) annot <- pkg_data("hpo_deaths") annot <- annot[,c("disease_id", "AgeOfDeath_name", diff --git a/R/add_disease.R b/R/add_disease.R index 369efbd..bc50741 100644 --- a/R/add_disease.R +++ b/R/add_disease.R @@ -1,3 +1,4 @@ +#' @describeIn add_ add_ #' Add diseases #' #' Annotate each HPO term with diseases that they are associated with. @@ -6,12 +7,6 @@ #' See #' \href{https://hpo-annotation-qc.readthedocs.io/en/latest/annotationFormat.html}{ #' here for column descriptions}. -#' @param add_definitions Add disease definitions using -#' \link[HPOExplorer]{add_disease_definition}. -#' @inheritParams add_disease_definition -#' @inheritParams make_network_object -#' @inheritParams data.table::merge.data.table -#' @returns phenos data.table with extra columns #' #' @export #' @importFrom data.table merge.data.table @@ -22,23 +17,17 @@ add_disease <- function(phenos, # extra_cols = c("Evidence","Reference","Biocuration"), extra_cols = NULL, - include_mondo = TRUE, all.x = TRUE, allow.cartesian = FALSE, - add_definitions = FALSE, - verbose = TRUE){ - - # devoptera::args2vars(add_disease) - + add_definitions = FALSE){ if(!"hpo_id" %in% names(phenos)){ stp <- paste("hpo_id column must be present in phenos.") stop(stp) } if(!all(c("disease_name","disease_id") %in% names(phenos))){ - messager("Annotating phenos with Disease",v=verbose) + messager("Annotating phenos with Disease") annot <- load_phenotype_to_genes(3) #### From disease_id #### - data.table::setnames(phenos,"disease_id","disease_id", skip_absent = TRUE) if("disease_name" %in% names(phenos)){ return(phenos) } @@ -57,12 +46,9 @@ add_disease <- function(phenos, all.x = all.x, allow.cartesian = allow.cartesian) } - #### Add Definitions column and fill out missing disease_name columns #### + #### Add disease definitions and Mondo ID mappings #### if(isTRUE(add_definitions)){ - phenos <- add_disease_definition(phenos = phenos, - all.x = all.x, - include_mondo = include_mondo, - verbose = verbose) + phenos <- add_mondo(phenos = phenos) } return(phenos) } diff --git a/R/add_disease_definition.R b/R/add_disease_definition.R deleted file mode 100644 index 266e68d..0000000 --- a/R/add_disease_definition.R +++ /dev/null @@ -1,88 +0,0 @@ -#' Add disease definition -#' -#' Add metadata for diseases using files from their respective databases: -#' OMIM, DECIPHER, Orphanet. -#' @param id_col Name of the disease ID column. -#' @param cols Metadata columns to include. -#' @param save_dir Directory to save metadata files to. -#' @param include_mondo Add IDs/names/definitions from MONDO via -#' \link[HPOExplorer]{add_mondo}. -#' @inheritParams make_network_object -#' @inheritParams data.table::merge.data.table -#' @returns phenos data.table with extra columns. -#' -#' @export -#' @importFrom data.table merge.data.table fcoalesce := -#' @importFrom utils data -#' @examples -#' phenos <- load_phenotype_to_genes(3)[seq(1000)] -#' phenos2 <- add_disease_definition(phenos = phenos) -add_disease_definition <- function(phenos, - cols = c("Definitions","Preferred.Label"), - id_col = "disease_id", - save_dir = file.path( - tools::R_user_dir("HPOExplorer", - which="cache")), - include_mondo = TRUE, - all.x = TRUE, - verbose = TRUE) { - - # devoptera::args2vars(add_disease_definition) - disease_name <- disease_id <- Preferred.Label <- Definitions <- - MONDO_definition <- NULL; - - #### Check if already filled out #### - if(!is.null(phenos) && - all(cols %in% names(phenos))){ - return(phenos) - } - #### Gather metadata #### - messager("Adding disease metadata:",paste(cols,collapse = ", "),v=verbose) - meta <- list( - ORPH = get_metadata_orphanet(save_dir = save_dir, - verbose = verbose), - OMIM = get_metadata_omim(save_dir = save_dir, - verbose = verbose) - ) |> data.table::rbindlist(fill=TRUE, - use.names = TRUE, - idcol = "Database") - #### Return only metadata #### - if(is.null(phenos)) { - return(meta) - #### Merge metdata #### - } else { - phenos <- - data.table::merge.data.table(phenos, - meta[,unique(c("id",cols)), with=FALSE], - all.x = all.x, - by.x = id_col[1], - by.y = "id") - if("disease_name" %in% names(phenos)){ - #### Report missing #### - report_missing(phenos = phenos, - id_col = id_col, - report_col = c("disease_name","Definitions"), - verbose = verbose) - ## Needs to be lowercase to harmonise across metadata sources, eg.: - ## "CEREBROOCULOFACIOSKELETAL SYNDROME 1" vs. - ## "Cerebrooculofacioskeletal syndrome 1" - phenos[,disease_name:=tolower(data.table::fcoalesce(Preferred.Label, - disease_name, - disease_id))] - } - #### Add mondo #### - if(isTRUE(include_mondo)){ - phenos <- add_mondo(phenos = phenos, - all.x = all.x, - verbose = verbose) - ## Coalesce definitions ## - phenos[,Definitions:=tolower(data.table::fcoalesce(Definitions, - MONDO_definition))] - report_missing(phenos = phenos, - id_col = id_col, - report_col = "Definitions", - verbose = verbose) - } - return(phenos) - } -} diff --git a/R/add_disease_genes.R b/R/add_disease_genes.R index 0204d4d..d96d398 100644 --- a/R/add_disease_genes.R +++ b/R/add_disease_genes.R @@ -1,13 +1,8 @@ +#' @describeIn add_ add_ #' Add disease genes #' #' Add genes that overlap between an HPO ID and an associated phenotype. -#' @inheritParams make_network_object -#' @inheritParams data.table::merge.data.table -#' @returns phenos data.table with extra columns -#' #' @export -#' @importFrom data.table merge.data.table -#' @importFrom utils data #' @examples #' \dontrun{ #' phenos <- load_phenotype_to_genes() @@ -30,7 +25,7 @@ add_disease_genes <- function(phenos, dannot <- load_phenotype_to_genes(file = "phenotype.hpoa") # annot <- annot[hpo_id %in% unique(phenos$hpo_id),] # #### Add hpo_id associations #### - # dgenes <- load_disease_genes() + # dgenes <- get_disease_genes() # dgenes <- data.table::merge.data.table( # dgenes, # annot[,c("disease_id","disease_name","hpo_id")], diff --git a/R/add_evidence.R b/R/add_evidence.R index 97cb4e6..e28efd2 100644 --- a/R/add_evidence.R +++ b/R/add_evidence.R @@ -1,3 +1,4 @@ +#' @describeIn add_ add_ #' Add evidence #' #' Add the strength of evidence supporting each gene-disease association. @@ -16,7 +17,6 @@ #' } #' @param keep_evidence The evidence scores of each gene-disease #' association to keep. -#' @param agg_by Column to aggregate metadata by. #' @param default_score Default evidence score to #' apply to gene-disease associations that are present in the HPO annotations #' but don't have evidence scores in the GenCC annotations. @@ -40,21 +40,15 @@ add_evidence <- function(phenos, allow.cartesian = FALSE, agg_by = c("disease_id", "gene_symbol"), - default_score = 1, - verbose = TRUE){ - - # devoptera::args2vars(add_evidence) + default_score = 1){ evidence_score <- evidence_score_mean <- NULL; if(!all(c("evidence_score_mean") %in% names(phenos))){ - messager("Annotating gene-disease associations with Evidence score", - v=verbose) + messager("Annotating gene-disease associations with Evidence score") phenos <- add_disease(phenos = phenos, all.x = all.x, - allow.cartesian = allow.cartesian, - verbose = verbose) - annot <- get_gencc(agg_by = agg_by, - verbose = verbose) + allow.cartesian = allow.cartesian) + annot <- get_gencc(agg_by = agg_by) #### Merge with input data #### phenos <- data.table::merge.data.table( x = phenos, diff --git a/R/add_gene_frequency.R b/R/add_gene_frequency.R index 23ea94c..16c1dd0 100644 --- a/R/add_gene_frequency.R +++ b/R/add_gene_frequency.R @@ -1,3 +1,4 @@ +#' @describeIn add_ add_ #' Add gene frequency #' #' Add gene-level frequency, i.e. how often mutations in a given gene @@ -8,15 +9,12 @@ #' a gene that occurs 100% of the time in a given phenotype. #' Include \code{NA} if you wish to retain genes that #' do not have any frequency data. -#' See \link[HPOExplorer]{add_gene_frequency} for details. +#' See \link{add_gene_frequency} for details. #' @inheritParams make_phenos_dataframe #' @inheritParams data.table::merge.data.table #' @returns phenos data.table with extra column #' #' @export -#' @importFrom data.table setnames merge.data.table := -#' @importFrom utils data -#' @importFrom stringr str_split #' @examples #' phenotype_to_genes <- load_phenotype_to_genes()[seq(1000),] #' phenos2 <- add_gene_frequency(phenotype_to_genes = phenotype_to_genes) @@ -31,8 +29,7 @@ add_gene_frequency <- function(phenotype_to_genes = load_phenotype_to_genes(), gene_freq_min <- gene_freq_max <- . <- NULL; phenotype_to_genes <- add_hpo_id(phenos = phenotype_to_genes, - phenotype_to_genes= phenotype_to_genes, - verbose = verbose) + phenotype_to_genes= phenotype_to_genes) new_cols <- c("gene_freq_name","gene_freq_min", "gene_freq_max","gene_freq_mean") if(!all(new_cols %in% names(phenotype_to_genes))){ diff --git a/R/add_genes.R b/R/add_genes.R index adc26d1..f84636b 100644 --- a/R/add_genes.R +++ b/R/add_genes.R @@ -1,3 +1,4 @@ +#' @describeIn add_ add_ #' Add genes #' #' Add genes associated with each phenotype @@ -19,8 +20,7 @@ add_genes <- function(phenos = NULL, by = c("hpo_id","disease_id"), gene_col = "gene_symbol", all.x = FALSE, - allow.cartesian = FALSE, - verbose = TRUE){ + allow.cartesian = FALSE){ # devoptera::args2vars(add_genes, reassign = TRUE) #### Prepare gene data #### @@ -38,7 +38,7 @@ add_genes <- function(phenos = NULL, ## Genes driving celltype-symptom enrichment. if(!is.null(gene_col) && gene_col %in% names(phenos)){ - phenos <- unlist_col(dt=phenos, + phenos <- unlist_col(dat=phenos, col=gene_col) data.table::setnames(phenos,gene_col,"gene_symbol") by <- unique(c(by,"gene_symbol")) @@ -46,11 +46,9 @@ add_genes <- function(phenos = NULL, #### Ensure necessary columns are in phenos #### phenos <- add_hpo_id(phenos = phenos, phenotype_to_genes = phenotype_to_genes, - hpo = hpo, - verbose = verbose) + hpo = hpo) phenos <- add_disease(phenos = phenos, - allow.cartesian = allow.cartesian, - verbose = verbose) + allow.cartesian = allow.cartesian) #### Add Gene col to data #### if(!"gene_symbol" %in% names(phenos)){ by <- by[by %in% names(phenos)] diff --git a/R/add_hpo_definition.R b/R/add_hpo_definition.R index a2a51ae..2137d25 100644 --- a/R/add_hpo_definition.R +++ b/R/add_hpo_definition.R @@ -1,3 +1,4 @@ +#' @describeIn add_ add_ #' Get term definition #' #' This function accesses the HPO API to get a description/definition of an diff --git a/R/add_hpo_id.R b/R/add_hpo_id.R index dcf2ffa..c8877b5 100644 --- a/R/add_hpo_id.R +++ b/R/add_hpo_id.R @@ -1,29 +1,19 @@ +#' @describeIn add_ add_ #' Add HPO ID column to dataframe #' -#' This adds the HPO term id column to the subset of ewce results data -#' to be plotted -#' in the cell select app. It also checks if it is a valid HPO term id to -#' prevent error and adds -#' a boolean column where TRUE if term is valid. If the HPO Id is not correct, -#' it caused an error in the ontologyPlot package. -#' @inheritParams make_phenos_dataframe -#' @inheritParams make_network_object -#' @returns The subset of EWCE results dataframe with a HPO Id column added. -#' +#' Adds the HPO term ID column "hpo_id". #' @export -#' @importFrom data.table := setkeyv #' @examples #' phenotype_to_genes <- load_phenotype_to_genes() #' phenos <- unique(phenotype_to_genes[,c("hpo_id","hpo_name")]) #' phenos2 <- add_hpo_id(phenos=phenos) add_hpo_id <- function(phenos, hpo = get_hpo(), - phenotype_to_genes = NULL, - verbose = FALSE) { + phenotype_to_genes = NULL) { HPO_term_valid <- hpo_id <- NULL; if(!"hpo_id" %in% names(phenos)){ - messager("Adding HPO IDs.",v=verbose) + messager("Adding HPO IDs.") alt_names <- grep("hpo_id","^id$",names(phenos), value=TRUE, ignore.case = TRUE) if(length(alt_names)>0){ @@ -33,10 +23,10 @@ add_hpo_id <- function(phenos, if(is.null(phenotype_to_genes)) { phenotype_to_genes <- load_phenotype_to_genes() } - phenos <- fix_hpo_ids(dt=phenos, + phenos <- fix_hpo_ids(dat=phenos, phenotype_to_genes=phenotype_to_genes) } - phenos[,HPO_term_valid:=(hpo_id %in% hpo$id)] + phenos[,HPO_term_valid:=(hpo_id %in% hpo@terms)] } return(phenos) } diff --git a/R/add_hpo_synonym.R b/R/add_hpo_synonym.R index 9d5f0cc..5aaa1ef 100644 --- a/R/add_hpo_synonym.R +++ b/R/add_hpo_synonym.R @@ -1,44 +1,44 @@ -#' Add HPO ID synonyms -#' -#' Map HPO IDs to synonynmous IDs from other ontologies -#' (e.g. UMLS, SNOMEDCT_US, MSH, ICD-10) -#' @inheritParams make_network_object -#' @returns A data.table with the HPO ID synonyms in a new column with -#' the same name as \code{to}. -#' -#' @param to The ontology to map to. Default is \code{UMLS}. -#' @param to_col The name of the new column to add the synonyms to. -#' @export -#' @examples -#' phenos <- HPOExplorer::example_phenos() -#' phenos2 <- add_hpo_synonym(phenos) -add_hpo_synonym <- function(phenos, - to="UMLS", - to_col=paste0(to,"_ID"), - hpo=get_hpo(), - verbose=TRUE){ - hpo_id <- NULL; - messager("Adding HPO ID synonyms:",to,v=verbose) - dict <- unlist(hpo$xref) - ## Fix typos in mapping - dict <- gsub("ICD10","ICD-10",dict) - dict <- gsub("ICD9","ICD-10",dict) - ## Get all prefixes - # prefixes <- sort( - # table(stringr::str_split(unname(dict),":",simplify = TRUE)[,1]), - # decreasing = TRUE) - if(!is.null(to)){ - dict <- grep(paste0("^",to,":"),dict, value = TRUE) - phenos[,(to_col):=dict[hpo_id]] - n_mapped <- length(unique(na.omit(phenos[[to_col]]))) - n_total <- length(unique(phenos$hpo_id)) - messager(formatC(n_mapped,big.mark = ","),"/", - formatC(n_total,big.mark = ","), - paste0("(",round(n_mapped/n_total*100,1),"%)"), - "hpo_id mapped to",paste0(to_col,"."), v=verbose - ) - return(phenos) - } else{ - return(dict) - } -} +##' @describeIn add_ add_ +##' Add HPO ID synonyms +##' +##' Map HPO IDs to synonynmous IDs from other ontologies +##' (e.g. UMLS, SNOMEDCT_US, MSH, ICD-10), +##' @returns A data.table with the HPO ID synonyms in a new column with +##' the same name as \code{to}. +##' +##' @param to The ontology to map to. Default is \code{UMLS}. +##' @param to_col The name of the new column to add the synonyms to. +##' @export +##' @examples +##' phenos <- xample_phenos() +##' phenos2 <- add_hpo_synonym(phenos) +#add_hpo_synonym <- function(phenos, +# to="UMLS", +# to_col=paste0(to,"_ID"), +# hpo=get_hpo()){ +# hpo_id <- NULL; +# messager("Adding HPO ID synonyms:",to) +# dict <- unlist(hpo$xref) +# ## Fix typos in mapping +# dict <- gsub("ICD10","ICD-10",dict) +# dict <- gsub("ICD9","ICD-10",dict) +# ## Get all prefixes +# # prefixes <- sort( +# # table(stringr::str_split(unname(dict),":",simplify = TRUE)[,1]), +# # decreasing = TRUE) +# if(!is.null(to)){ +# dict <- grep(paste0("^",to,":"),dict, value = TRUE) +# phenos[,(to_col):=dict[hpo_id]] +# n_mapped <- length(unique(na.omit(phenos[[to_col]]))) +# n_total <- length(unique(phenos$hpo_id)) +# messager(formatC(n_mapped,big.mark = ","),"/", +# formatC(n_total,big.mark = ","), +# paste0("(",round(n_mapped/n_total*100,1),"%)"), +# "hpo_id mapped to",paste0(to_col,"."), v=verbose +# ) +# return(phenos) +# } else{ +# return(dict) +# } +#} +# diff --git a/R/add_info_content.R b/R/add_info_content.R index a253bfb..655eb13 100644 --- a/R/add_info_content.R +++ b/R/add_info_content.R @@ -1,3 +1,4 @@ +#' @describeIn add_ add_ #' Add information content #' #' Add a column containing the information content score for each HPO ID. @@ -6,22 +7,21 @@ #' @returns phenos data.table with extra column #' #' @export -#' @importFrom data.table := -#' @importFrom ontologyIndex get_term_info_content +#' @import simona #' @examples #' phenos <- example_phenos() #' phenos2 <- add_info_content(phenos = phenos) add_info_content <- function(phenos, - hpo = get_hpo(), - verbose = TRUE){ + hpo = get_hpo()){ info_content <- hpo_id <- NULL; if(!"info_content" %in% names(phenos)){ - messager("Adding information_content scores.",v=verbose) - ic_dict <- ontologyIndex::get_term_info_content( - ontology = hpo, - term_sets = phenos$hpo_id, - patch_missing = FALSE) + messager("Adding information_content scores.") + if(!"IC" %in% names(hpo@elementMetadata)){ + simona::mcols(hpo)$IC <- simona::term_IC(hpo) + } + ic_dict <- KGExplorer::get_ontology_dict(ont=hpo, + to = "IC") phenos[,info_content:=ic_dict[hpo_id]] } return(phenos) diff --git a/R/add_medgen.R b/R/add_medgen.R deleted file mode 100644 index 0545a93..0000000 --- a/R/add_medgen.R +++ /dev/null @@ -1,44 +0,0 @@ -add_medgen <- function(phenos, - id_col = "hpo_id", - to=""){ - source_id <- MONDO_ID<-definition<-DEF<-NULL; - - URL <- "https://ftp.ncbi.nlm.nih.gov/pub/medgen/" - def <- data.table::fread(paste0(URL,"csv/MGDEF.csv.gz"), fill = TRUE) - map <- data.table::fread(paste0(URL,"MedGenIDMappings.txt.gz"), - header = TRUE, - key = "source_id") - data.table::setnames(map,"#CUI","CUI") - # sort(table(map$source), decreasing = TRUE) - #### Conform disease_id to MedGen format #### - phenos <- HPOExplorer::make_phenos_dataframe(add_disease_data = FALSE) - phenos <- HPOExplorer::add_mondo(phenos, - id_col = id_col) - phenos[,source_id:=get(id_col)] - phenos <- phenos[!is.na(get(id_col))] - if(id_col=="disease_id"){ - phenos[,source_id:=gsub("OMIM:","",source_id)] - phenos[,source_id:=gsub("ORPHANET:","Orphanet_",source_id)] - phenos[,source_id:=ifelse(source_id %in% unique(map$source_id), source_id, MONDO_ID)] - } - mapped <- data.table::merge.data.table( - phenos,map, - by="source_id", - all.x = TRUE - ) |> - data.table::merge.data.table(def, - by=c("CUI","source"), - all.x = TRUE) - data.table::uniqueN(phenos[!is.na(definition)]$hpo_id) - data.table::uniqueN(grep("^HP:",map$source_id, value = TRUE)) - data.table::uniqueN(def[source=="HPO"]$CUI) - data.table::uniqueN(mapped[!is.na(DEF)]$hpo_id) - HPOExplorer:::report_missing(mapped, - id_col = "disease_id", - report_col = "CUI") - - ### CONCLUSIONS #### - # Medgen has HPO-CUI mappings for 16569 HPO IDs - # However, their definitions data only has definitions for 22 HPO IDs max - # ....not very useful. -} diff --git a/R/add_mondo.R b/R/add_mondo.R index c2f863a..d04d516 100644 --- a/R/add_mondo.R +++ b/R/add_mondo.R @@ -1,60 +1,29 @@ -#' Add MONDO +#' @describeIn add_ add_ +#' Add Mondo metadata +#' +#' Add Mondo metadata (MONDO ID mappings, names, and definitions) for diseases +#' using files from their respective databases: +#' e.g. OMIM, DECIPHER, Orphanet. +#' @inheritParams KGExplorer::map_mondo +#' @inheritDotParams KGExplorer::map_mondo +#' @returns phenos data.table with extra columns. #' -#' Add metadata from \href{https://mondo.monarchinitiative.org/}{MONDO}, -#' including: -#' \itemize{ -#' \item{MONDO_ID: }{MONDO term ID.} -#' \item{MONDO_name: }{MONDO term name.} -#' \item{MONDO_definition: }{MONDO term definition.} -#' } -#' @param id_col ID column to map to MONDO IDs. -#' @inheritParams add_disease -#' @inheritParams data.table::merge.data.table #' @export -#' @importFrom data.table := +#' @importFrom KGExplorer map_mondo #' @examples -#' phenos <- example_phenos() +#' phenos <- load_phenotype_to_genes(3)[seq(1000)] #' phenos2 <- add_mondo(phenos = phenos) add_mondo <- function(phenos, - id_col = "disease_id", - add_definitions = TRUE, - all.x = TRUE, - allow.cartesian = FALSE, - verbose = TRUE){ - - # devoptera::args2vars(add_mondo) - MONDO_definition <- MONDO_name <- MONDO_ID <- NULL; - - if(!all(c("MONDO_ID","MONDO_name","MONDO_definition") %in% names(phenos))){ - phenos <- add_disease(phenos = phenos, - all.x = all.x, - allow.cartesian = allow.cartesian, - add_definitions = add_definitions, - include_mondo = FALSE, - verbose = verbose) - messager("Annotating phenos with MONDO metadata.",v=verbose) - mondo <- get_mondo() - dict <- mondo_dict(mondo = mondo) - - #### Assess missing values #### - # sum(is.na(mondo$name)) - # sum(is.na(mondo$def)) - # xref_ids <- unique(names(unlist(mondo$xref))) - # sum(!xref_ids %in% names(mondo$id)) - # sum(!xref_ids %in% names(mondo$name)) - # sum(!xref_ids %in% names(mondo$def)) - # names(mondo$name)[is.na(unname(mondo$name))] - # sum(is.na(unname(mondo$name))) - # names(unlist(mondo$xref)) + input_col = "disease_id", + map_to = "hpo", + ...) { - ### Add new columns #### - phenos[,MONDO_ID:=dict[get(id_col)]] - phenos[,MONDO_name:=mondo$name[MONDO_ID]] - phenos[,MONDO_definition:=mondo$def[MONDO_ID]] - report_missing(phenos = phenos, - id_col = id_col, - report_col = c("MONDO_ID","MONDO_name","MONDO_definition"), - verbose = verbose) - } + phenos[,(input_col):=gsub("^ORPHA","Orphanet",get(input_col))] + #### Map (skips is output_col already present) #### + phenos <- KGExplorer::map_mondo(dat = phenos, + input_col = input_col, + map_to = map_to, + ...) + phenos[,(input_col):=gsub("^Orphanet","ORPHA",get(input_col))] return(phenos) } diff --git a/R/add_ndisease.R b/R/add_ndisease.R index 96f8bd2..a2d7730 100644 --- a/R/add_ndisease.R +++ b/R/add_ndisease.R @@ -1,3 +1,4 @@ +#' @describeIn add_ add_ #' Add N diseases #' #' Annotate each HPO term with the total number of disease diff --git a/R/add_omop.R b/R/add_omop.R index 5a8ba9b..b580b0a 100644 --- a/R/add_omop.R +++ b/R/add_omop.R @@ -1,52 +1,50 @@ -#' Add MONDO +#' @describeIn add_ add_ +#' Add OMOP #' #' Add metadata from \href{https://mondo.monarchinitiative.org/}{MONDO}, #' including: #' \itemize{ -#' \item{MONDO_ID: }{MONDO term ID.} -#' \item{MONDO_name: }{MONDO term name.} -#' \item{MONDO_definition: }{MONDO term definition.} +#' \item{mondo_id: }{MONDO term ID.} +#' \item{mondo_name: }{MONDO term name.} +#' \item{mondo_def: }{MONDO term definition.} #' } -#' @param id_col ID column to map to MONDO IDs. +#' @param input_col ID column to map to MONDO IDs. #' @param force_new Force a new query to the OARD API instead of #' using pre-downloaded data. -#' @inheritParams add_disease -#' @inheritParams data.table::merge.data.table +#' #' @export -#' @importFrom data.table := +#' @import data.table #' @examples #' phenos <- example_phenos() #' phenos2 <- add_omop(phenos = phenos) add_omop <- function(phenos, - id_col = "hpo_id", + input_col = "hpo_id", all.x = TRUE, allow.cartesian = FALSE, force_new = FALSE, verbose = TRUE){ - - # devoptera::args2vars(add_omop) - if(!id_col %in% names(phenos)){ - stop("id_col not found in phenos.") + if(!input_col %in% names(phenos)){ + stop("input_col not found in phenos.") } if(!all(c("OMOP_ID","OMOP_NAME") %in% names(phenos))){ messager("Annotating phenos with OMOP metadata.",v=verbose) #### Select the correct map #### - if(id_col=="hpo_id" || isTRUE(force_new)){ + if(input_col=="hpo_id" || isTRUE(force_new)){ omop <- pkg_data("hpo_id_to_omop") - } else if(id_col=="disease_id"){ + } else if(input_col=="disease_id"){ omop <- pkg_data("disease_id_to_omop") } else { - omop <- oard_query_api(ids = phenos[[id_col]]) + omop <- KGExplorer::query_oard(ids = phenos[[input_col]]) } phenos2 <- data.table::merge.data.table( phenos, omop, - by=id_col, + by=input_col, all.x = all.x, allow.cartesian = allow.cartesian, ) - report_missing(phenos = phenos2, - id_col = id_col, + report_missing(phenos2, + input_col = input_col, report_col = c("OMOP_ID","OMOP_NAME"), verbose = verbose) return(phenos2) diff --git a/R/add_onset.R b/R/add_onset.R index e684c26..4dad09e 100644 --- a/R/add_onset.R +++ b/R/add_onset.R @@ -1,3 +1,4 @@ +#' @describeIn add_ add_ #' Add age of onset #' #' Add age of onset for each HPO ID. @@ -45,19 +46,15 @@ add_onset <- function(phenos, keep_onsets = NULL, agg_by = NULL, all.x = TRUE, - allow.cartesian = FALSE, - verbose = TRUE){ - - # devoptera::args2vars(add_onset) + allow.cartesian = FALSE){ onset_latest <- onset_score <- onset_name <- NULL; if(!any(c("onset","onset_name","onset_names","onset_score") %in% names(phenos))){ - messager("Annotating phenos with onset.",v=verbose) + messager("Annotating phenos with onset.") phenos <- add_disease(phenos = phenos, all.x = all.x, - allow.cartesian = allow.cartesian, - verbose = verbose) + allow.cartesian = allow.cartesian) hpo_onsets <- pkg_data("hpo_onsets") dict <- hpo_dict(type = "onset") hpo_onsets[,onset_score:=dict[onset_name]] diff --git a/R/add_ont_lvl.R b/R/add_ont_lvl.R index 1df0890..b080e9d 100644 --- a/R/add_ont_lvl.R +++ b/R/add_ont_lvl.R @@ -1,12 +1,12 @@ +#' @describeIn add_ add_ #' Add ontology level #' #' Add the relative ontology level for each HPO ID. #' @param keep_ont_levels Only keep phenotypes at certain \emph{absolute} #' ontology levels to keep. -#' See \link[HPOExplorer]{add_ont_lvl} for details. -#' @inheritParams get_ont_lvls -#' @inheritParams make_network_object -#' @inheritParams data.table::merge.data.table +#' See \link{add_ont_lvl} for details. +#' @inheritDotParams KGExplorer::get_ontology_levels +#' @inheritParams KGExplorer::get_ontology_levels #' @returns phenos data.table with extra column #' #' @export @@ -16,35 +16,27 @@ #' phenos2 <- add_ont_lvl(phenos = phenos) add_ont_lvl <- function(phenos, hpo = get_hpo(), - adjacency = NULL, absolute = TRUE, - exclude_top_lvl = TRUE, keep_ont_levels = NULL, - reverse = TRUE, - verbose = TRUE){ - + ...){ ontLvl_geneCount_ratio <- geneCount <- hpo_id <- ontLvl <- tmp <- NULL; col <- if(isTRUE(absolute)) "ontLvl" else "ontLvl_relative" if(col %in% names(phenos)) return(phenos) if("hpo_id" %in% names(phenos)){ - lvls_dict <- get_ont_lvls(terms = unique(phenos$hpo_id), - hpo = hpo, - adjacency = adjacency, - absolute = absolute, - reverse = reverse, - exclude_top_lvl = exclude_top_lvl, - verbose = verbose) + lvls_dict <- KGExplorer::get_ontology_levels(ont = hpo, + absolute = absolute, + ...) #### Add the new column #### phenos[,tmp:=lvls_dict[hpo_id]] data.table::setnames(phenos,old = "tmp", new = col) #### Compute gene ratio #### if(all(c("ontLvl","geneCount") %in% names(phenos))){ - messager("Computing ontology level / gene count ratio.",v=verbose) + messager("Computing ontology level / gene count ratio.") phenos[,ontLvl_geneCount_ratio:=(ontLvl/geneCount)] } } else { - messager("hpo_id column not found. Cannot add ontology level.",v=verbose) + messager("hpo_id column not found. Cannot add ontology level.") } #### Filter #### if(!is.null(keep_ont_levels)){ diff --git a/R/add_pheno_frequency.R b/R/add_pheno_frequency.R index e73c832..14588e2 100644 --- a/R/add_pheno_frequency.R +++ b/R/add_pheno_frequency.R @@ -1,3 +1,4 @@ +#' @describeIn add_ add_ #' Add phenotype frequency #' #' Add phenotype-level frequency, i.e. @@ -8,8 +9,6 @@ #' Include \code{NA} if you wish to retain phenotypes that #' do not have any frequency data. #' See \link[HPOExplorer]{add_pheno_frequency} for details. -#' @inheritParams make_network_object -#' @inheritParams data.table::merge.data.table #' @returns phenos data.table with extra column #' #' @export @@ -22,18 +21,15 @@ add_pheno_frequency <- function(phenos, pheno_frequency_threshold = NULL, all.x = TRUE, - allow.cartesian = FALSE, - verbose = TRUE){ - # devoptera::args2vars(add_pheno_frequency) + allow.cartesian = FALSE){ pheno_freq_mean <- NULL; new_cols <- c("pheno_freq_min","pheno_freq_max","pheno_freq_mean") if(!all(new_cols %in% names(phenos))){ - messager("Annotating phenotype frequencies.",v=verbose) + messager("Annotating phenotype frequencies.") phenos <- add_disease(phenos = phenos, all.x = all.x, - allow.cartesian = allow.cartesian, - verbose = verbose) + allow.cartesian = allow.cartesian) #### Get precomputed phenotype-disease frequencies #### hpo_frequencies <- pkg_data("hpo_frequencies") # hpo_frequencies <- hpo_frequencies_agg(hpo_frequencies) diff --git a/R/add_prevalence.R b/R/add_prevalence.R index 8854db2..585ba05 100644 --- a/R/add_prevalence.R +++ b/R/add_prevalence.R @@ -1,31 +1,58 @@ +#' @describeIn add_ add_ #' Add prevalence #' #' Add a column containing the prevalence score for each disease ("disease_id") #' or phenotype ("hpo_id"). #' @param method One of "orphanet" or "oard". -#' @param id_col Name of the column containing the disease or phenotype IDs. +#' @param input_col Name of the column containing the disease or phenotype IDs. #' @param drop_na Whether to drop rows with missing prevalence data. -#' @inheritParams make_network_object -#' @returns phenos data.table with extra column #' #' @export #' @examples #' phenos <- example_phenos() #' phenos2 <- add_prevalence(phenos = phenos) add_prevalence <- function(phenos, - id_col="disease_id", + input_col="disease_id", drop_na=TRUE, - verbose=TRUE, method="orphanet"){ - + method <- match.arg(method) + #### Example phenos data + if(is.null(phenos)){ + phenos <- make_phenos_dataframe(add_disease_data = TRUE, + include_mondo = FALSE) + } else { + phenos <- data.table::copy(phenos) + } + phenos <- add_disease(phenos) #### From OARD #### # res <- add_prevalence_oard(phenos$hpo_id[1:100]) #### From ORPHANET #### if(method=="orphanet"){ - phenos2 <- add_prevalence_orphanet(phenos=phenos, - id_col=id_col, - drop_na=drop_na, - verbose=verbose) + mean_prevalence <- NULL; + #### Add MONDO IDs #### + phenos <- add_mondo(phenos = phenos, + input_col = input_col) + #### Get Orphanet data #### + dprev <- KGExplorer:::get_prevalence_orphanet(agg_by=c("mondo_id", + "id","Name")) + phenos2 <- data.table::merge.data.table( + phenos, + dprev, + by="mondo_id", + all.x=TRUE + )[order(-mean_prevalence)] + #### Drop NAs #### + if(isTRUE(drop_na)) phenos2 <- phenos2[!is.na(mean_prevalence),] + #### Report missing #### + add_prevalence_report(phenos = phenos, + phenos2 = phenos2, + input_col = sort( + unique(c(input_col, + "hpo_id", + "disease_id", + "mondo_id") + ) + )) + return(phenos2) } - return(phenos2) } diff --git a/R/add_prevalence_oard.R b/R/add_prevalence_oard.R deleted file mode 100644 index 9d0484d..0000000 --- a/R/add_prevalence_oard.R +++ /dev/null @@ -1,10 +0,0 @@ -add_prevalence_oard <- function(ids){ - - # ids <- unique(phenos$hpo_id)[1:100] - res <- oard_query_api(ids=ids) - res2 <- oard_query_api(ids=res$results$concept_id, - dataset_id=1, - endpoint="frequencies/singleConceptFreq", - concept_prefix="concept_id=") - return(res2) -} diff --git a/R/add_prevalence_orphanet.R b/R/add_prevalence_orphanet.R deleted file mode 100644 index 6176906..0000000 --- a/R/add_prevalence_orphanet.R +++ /dev/null @@ -1,42 +0,0 @@ -add_prevalence_orphanet <- function(phenos=NULL, - id_col="disease_id", - drop_na=TRUE, - verbose=TRUE){ - # devoptera::args2vars(add_prevalence_orphanet) - - mean_prevalence <- NULL; - #### Example phenos data - if(is.null(phenos)){ - phenos <- make_phenos_dataframe(add_disease_data = TRUE, - include_mondo = FALSE, - verbose = verbose) - } else { - phenos <- data.table::copy(phenos) - } - #### Add MONDO IDs #### - phenos <- add_mondo(phenos = phenos, - id_col = id_col, - verbose = verbose) - #### Get Orphanet data #### - dprev <- get_orphanet_epidemiology(agg_by=c("MONDO_ID","id","Name")) - phenos2 <- data.table::merge.data.table( - phenos, - dprev, - by="MONDO_ID", - all.x=TRUE - )[order(-mean_prevalence)] - #### Drop NAs #### - if(isTRUE(drop_na)) phenos2[!is.na(mean_prevalence),] - #### Report missing #### - add_prevalence_report(phenos = phenos, - phenos2 = phenos2, - id_col = sort( - unique(c(id_col, - "hpo_id", - "disease_id", - "MONDO_ID") - ) - ), - verbose = verbose) - return(phenos2) -} diff --git a/R/add_prevalence_report.R b/R/add_prevalence_report.R index dd3e028..d8e7801 100644 --- a/R/add_prevalence_report.R +++ b/R/add_prevalence_report.R @@ -1,11 +1,11 @@ add_prevalence_report <- function(phenos, phenos2, - id_col, + input_col, verbose=TRUE){ mean_prevalence <- NULL; phenos2 <- data.table::copy(phenos2) phenos2 <- phenos2[!is.na(mean_prevalence),] - for(idc in id_col){ + for(idc in input_col){ n_end <- length(unique(phenos2[[idc]])) n_start <- length(unique(phenos[[idc]])) messager( diff --git a/R/add_severity.R b/R/add_severity.R index 40db4d6..7f322db 100644 --- a/R/add_severity.R +++ b/R/add_severity.R @@ -1,3 +1,4 @@ +#' @describeIn add_ add_ #' Add HPO modifiers #' #' Annotate each HPO with modifier terms, including @@ -28,21 +29,17 @@ add_severity <- function(phenos, hpo = get_hpo(), all.x = TRUE, allow.cartesian = FALSE, - severity_threshold = NULL, - verbose = TRUE){ - - # devoptera::args2vars(add_severity) + severity_threshold = NULL){ Severity_score <- NULL; if(!all(c("modifier","modifier_name") %in% names(phenos))){ - messager("Annotating phenos with modifiers",v=verbose) + messager("Annotating phenos with modifiers") phenos <- add_disease(phenos = phenos, all.x = all.x, - allow.cartesian = allow.cartesian, - verbose = verbose) + allow.cartesian = allow.cartesian) hpo_mod <- pkg_data("hpo_modifiers") #### Aggregate to HPO level #### - # hpo_agg <- hpo_modifiers_agg(dt = hpo_mod, + # hpo_agg <- hpo_modifiers_agg(dat = hpo_mod, # by = by) #### Add diseases #### phenos <- data.table::merge.data.table( diff --git a/R/add_tier.R b/R/add_tier.R index 528d21f..cbbb7f1 100644 --- a/R/add_tier.R +++ b/R/add_tier.R @@ -1,3 +1,4 @@ +#' @describeIn add_ add_ #' Add severity Tiers #' #' Add severity Tier for each HPO ID, in accordance with the diff --git a/R/adjacency_matrix.R b/R/adjacency_matrix.R deleted file mode 100644 index c8b7369..0000000 --- a/R/adjacency_matrix.R +++ /dev/null @@ -1,55 +0,0 @@ -#' Adjacency matrix -#' -#' Create adjacency matrix of HPO child-parent relationships. -#' @param terms Character vector of term IDs to include. -#' @param remove_terms Character vector of term IDs to exclude. -#' @param method Method to use to create the adjacency matrix. -#' @inheritParams make_phenos_dataframe -#' @inheritParams ontologyIndex::get_term_descendancy_matrix -#' @returns adjacency matrix -#' -#' @export -#' @importFrom ontologyIndex get_term_descendancy_matrix -#' @examples -#' terms <- get_hpo()$id[seq(1000)] -#' adjacency <- adjacency_matrix(terms = terms) -adjacency_matrix <- function(hpo = get_hpo(), - terms = unique(hpo$id), - remove_terms=grep(":",terms, - invert = TRUE, value = TRUE), - method = "HPOExplorer", - verbose = TRUE) { - #### Filter terms - if(is.null(terms)){ - terms <- hpo$id - } - terms <- terms[!terms %in% remove_terms] - #### Report - messager("Creating adjacency matrix for", - formatC(length(terms),big.mark = ","),"term(s).", - v=verbose) - if(tolower(method)=="ontologyindex"){ - #### ontologyIndex method #### - ## Returns same object but not sure if it's the same in all conditions - adjacency <- ontologyIndex::get_term_descendancy_matrix(ontology = hpo, - terms = terms) *1 - } else { - #### Custom script ##### - terms <- unique(terms) - terms <- terms[terms %in% hpo$id] - if(length(terms)==0) stop("0 valid HPO IDs (terms) provided.") - size <- length(terms) - adjacency <- matrix(nrow = size, - ncol = size, - dimnames = list(terms, terms)) - adjacency[is.na(adjacency)] <- 0 - for (id in terms) { - children <- hpo$children[[id]] - children <- children[children %in% terms] - if(length(children)>0){ - adjacency[id, children] <- 1 - } - } - } - return(adjacency) -} diff --git a/R/annotate_phenos.R b/R/annotate_phenos.R index ac2eb46..33304d5 100644 --- a/R/annotate_phenos.R +++ b/R/annotate_phenos.R @@ -1,21 +1,18 @@ +#' @describeIn main main #' Annotate phenotypes #' #' Annotate phenotypes \link[data.table]{data.table} without various types #' of metadata. -#' @inheritParams make_network_object -#' @inheritParams make_phenos_dataframe -#' #' @export #' @examples #' phenos <- example_phenos() #' phenos2 <- annotate_phenos(phenos) annotate_phenos <- function(phenos, hpo = get_hpo(), - adjacency = NULL, ##### Phenotype metadata #### add_ont_lvl_absolute = TRUE, add_ont_lvl_relative = FALSE, - add_info_contents = FALSE, + add_info_content = TRUE, add_description = TRUE, #### Disease/symptom metadata #### add_disease_data = FALSE, @@ -30,76 +27,58 @@ annotate_phenos <- function(phenos, #### Extra ##### add_hoverboxes = FALSE, columns = list_columns(), - interactive = TRUE, - verbose = TRUE){ + interactive = TRUE){ #### Add ontology levels: absolute #### if(isTRUE(add_ont_lvl_absolute)){ phenos <- add_ont_lvl(phenos = phenos, hpo = hpo, - absolute = TRUE, - verbose = verbose) + absolute = TRUE) } #### Add ontology levels: relative #### if(isTRUE(add_ont_lvl_relative)){ phenos <- add_ont_lvl(phenos = phenos, hpo = hpo, - adjacency = adjacency, - absolute = FALSE, - verbose = verbose) + absolute = FALSE) } #### Add info content #### if(isTRUE(add_info_content)){ phenos <- add_info_content(phenos = phenos, - hpo = hpo, - verbose = verbose) + hpo = hpo) } - phenos <- add_hpo_definition(phenos = phenos, - verbose = verbose) - phenos <- add_ancestor(phenos = phenos, - verbose = verbose) + phenos <- add_hpo_definition(phenos = phenos) + phenos <- add_ancestor(phenos = phenos) if(isTRUE(add_ndiseases)){ - phenos <- add_ndisease(phenos = phenos, - verbose = verbose) + phenos <- add_ndisease(phenos = phenos) } #### Add age of onset #### if(isTRUE(add_onsets)){ - phenos <- add_onset(phenos = phenos, - verbose = verbose) + phenos <- add_onset(phenos = phenos) } #### Add age of death #### if(isTRUE(add_deaths)){ phenos <- add_death(phenos = phenos, - allow.cartesian = TRUE, - verbose = verbose) + allow.cartesian = TRUE) } #### Add Tiers #### if(isTRUE(add_tiers)){ - phenos <- add_tier(phenos = phenos, - verbose = verbose) + phenos <- add_tier(phenos = phenos) } #### Add Severities #### if(isTRUE(add_severities)){ - phenos <- add_severity(phenos = phenos, - verbose = verbose) + phenos <- add_severity(phenos = phenos) } #### Add phenotype-disease freqs #### if(isTRUE(add_pheno_frequencies)){ - phenos <- add_pheno_frequency(phenos = phenos, - verbose = verbose) + phenos <- add_pheno_frequency(phenos = phenos) } #### Add phenotype-disease freqs #### if(isTRUE(add_disease_definitions)){ - phenos <- add_disease_definition(phenos = phenos, - include_mondo = include_mondo, - verbose = verbose) + phenos <- add_mondo(phenos = phenos) } #### Add hoverboxes #### if(isTRUE(add_hoverboxes)){ - phenos <- make_hoverboxes(phenos = phenos, - interactive = interactive, - columns = columns, - verbose = verbose) + phenos <- KGExplorer::add_hoverboxes(g=phenos) } return(phenos) } diff --git a/R/as_ascii.R b/R/as_ascii.R index 4a44e4e..7e0a1b6 100644 --- a/R/as_ascii.R +++ b/R/as_ascii.R @@ -1,14 +1,14 @@ -as_ascii <- function(dt, - cols=names(dt)){ - cols <- cols[cols %in% names(dt)] +as_ascii <- function(dat, + cols=names(dat)){ + cols <- cols[cols %in% names(dat)] func <- function(v){ Encoding(v) <- "latin1" iconv(v, "latin1", "UTF-8") } for(col in cols){ - if(is.character(dt[[col]])){ - dt[[col]] <- func(dt[[col]]) + if(is.character(dat[[col]])){ + dat[[col]] <- func(dat[[col]]) } } - return(dt) + return(dat) } diff --git a/R/clear_cache.R b/R/clear_cache.R deleted file mode 100644 index 976c04c..0000000 --- a/R/clear_cache.R +++ /dev/null @@ -1,26 +0,0 @@ -#' Clear cache -#' -#' Remove all data cached by \pkg{HPOExplorer}. -#' @param save_dir Path to cache directory. -#' @param verbose Print messages. -#' @inheritParams base::unlink -#' @inheritDotParams base::unlink -#' @returns Null. -#' -#' @export -#' @importFrom tools R_user_dir -#' @examples -#' \dontrun{ -#' clear_cache() -#' } -clear_cache <- function(save_dir=tools::R_user_dir(package = "HPOExplorer", - which = "cache"), - recursive=TRUE, - verbose=TRUE, - ... - ){ - - f <- list.files(save_dir,recursive = recursive) - messager("Clearing",length(f),"cached files from:",save_dir,v=verbose) - unlink(x = save_dir,recursive = recursive,...) -} diff --git a/R/convert_ontology.R b/R/convert_ontology.R deleted file mode 100644 index fbc93f4..0000000 --- a/R/convert_ontology.R +++ /dev/null @@ -1,99 +0,0 @@ -#' Convert ontology -#' -#' Convert an \link[ontologyIndex]{ontology_index} to -#' a number of other useful formats. -#' @param ont An \link[ontologyIndex]{ontology_index} object. -#' @param to A character string specifying the format to convert to. -#' @param as_sparse If TRUE, return a sparse matrix where possible. -#' @inheritParams adjacency_matrix -#' @returns An object of the specified format. -#' -#' @export -#' @importFrom ontologyIndex get_term_descendancy_matrix -#' @importFrom stats as.dist hclust cutree -#' @examples -#' ont <- get_hpo() -#' terms <- head(ont$id,100) -#' obj <- convert_ontology(ont=ont, terms=terms, to="igraph_dist_hclust") -convert_ontology <- function(ont=get_hpo(), - terms=unique(ont$id), - remove_terms=grep(":",terms, - invert = TRUE, value = TRUE), - to=c("adjacency", - "descendancy", - "dist", - "dist_hclust", - "dist_hclust_dendrogram", - "clusters", - "igraph", - "igraph_dist", - "igraph_dist_hclust", - "igraph_dist_hclust_dendrogram", - "tidygraph", - "data.frame", - "data.table"), - as_sparse=FALSE){ - # devoptera::args2vars(convert_ontology, reassign = TRUE) - to <- match.arg(to) - if(to=="tidygraph") requireNamespace("tidygraph") - if(to=="igraph") requireNamespace("igraph") - terms <- terms[!terms %in% remove_terms] - - if(to=="adjacency"){ - obj <- adjacency_matrix(hpo = ont, - terms = terms, - remove_terms = remove_terms) - } else if(to=="descendancy"){ - obj <- ontologyIndex::get_term_descendancy_matrix(ontology = ont, - terms = terms) - } else if(to=="dist"){ - adj <- convert_ontology(ont, terms, remove_terms, to="adjacency") - # obj <- stats::dist(adj) ### seems to take forever - obj <- stats::as.dist(abs(adj-max(adj))) - } else if(to=="dist_hclust"){ - d <- convert_ontology(ont, terms, remove_terms, to="dist") - obj <- stats::hclust(d) - } else if(to=="dist_hclust_dendrogram"){ - dh <- convert_ontology(ont, terms, remove_terms, to="dist_hclust") - obj <- stats::as.dendrogram(dh) - }else if(to=="clusters"){ - hc <- convert_ontology(ont, terms, remove_terms, to="hclust") - obj <- stats::cutree(hc,k=10) - } else if(to=="igraph"){ - adj <- convert_ontology(ont, terms, remove_terms, to="adjacency") - obj <- igraph::graph_from_adjacency_matrix(adj) - } else if (to=="igraph_dist"){ - g <- convert_ontology(ont, terms, remove_terms, to="igraph") - obj <- igraph::distances(g) - } else if(to=="igraph_dist_hclust"){ - gd <- convert_ontology(ont, terms, remove_terms, to="igraph_dist") - if(any(is.infinite(gd))) gd[is.infinite(gd)] <- max(gd[!is.infinite(gd)]) - obj <- stats::hclust(stats::as.dist(gd)) - } else if(to=="igraph_dist_hclust_dendrogram"){ - gdh <- convert_ontology(ont, terms, remove_terms, to="igraph_dist_hclust") - obj <- stats::as.dendrogram(gdh) - } else if(to=="tidygraph"){ - g <- convert_ontology(ont, terms, remove_terms, to="igraph") - obj <- tidygraph::as_tbl_graph(g) - } else if(to=="data.frame"){ - obj <- as.data.frame(ont) - if(!is.null(terms)) obj <- obj[terms,] - if(!is.null(remove_terms)) obj <- obj[!obj$id %in% remove_terms,] - } else if(to=="data.table"){ - df <- convert_ontology(ont, terms, remove_terms, to="data.frame") - obj <- data.table::as.data.table(df) - }else { - stop("Unknown conversion type.") - } - #### Convert to sparse #### - if(isTRUE(as_sparse)){ - if(methods::is(obj,"matrix")){ - obj <- Matrix::Matrix(adj, sparse=TRUE) - } - } - ## Report - messager("Converted ontology to:",to, - if(as_sparse) paste0("(sparse)") else NULL - ) - return(obj) -} diff --git a/R/data.R b/R/data.R index 1831299..ea3c714 100644 --- a/R/data.R +++ b/R/data.R @@ -26,8 +26,7 @@ #' \code{ #' annot <- load_phenotype_to_genes(file = "phenotype.hpoa") #' annot <- annot[onset!="",] -#' annot$onset_name <- harmonise_phenotypes(phenotypes = annot$onset, -#' as_hpo_ids = FALSE) +#' annot$onset_name <- map_phenotypes(terms = annot$onset) #' counts <- dplyr::group_by(annot, disease_id) |> #' dplyr::summarise(hpo_ids=length(unique(hpo_id)), #' onsets=length(unique(onset))) @@ -46,10 +45,10 @@ #' #' @description #' HPO severity tiers automatically assigned using -#' \link[HPOExplorer]{assign_tiers}. +#' \link[HPOExplorer]{make_tiers}. #' @source #' \code{ -#' hpo_tiers_auto <- HPOExplorer:::assign_tiers(as_datatable=TRUE) +#' hpo_tiers_auto <- HPOExplorer:::make_tiers(as_datatable=TRUE) #' hpo_tiers_auto <- add_onset(phenos = hpo_tiers_auto) #' #### Filter by onset criterion #### #' ### Tier 1: Shortened life span: infancy @@ -85,7 +84,7 @@ #' annot <- HPOExplorer::load_phenotype_to_genes(3) #' annot <- annot[modifier!=""] #' parse_mod <- function(x){ -#' unique(harmonise_phenotypes(strsplit(x,";")[[1]])) +#' unique(map_phenotypes(strsplit(x,";")[[1]])) #' } #' annot <- annot[,modifier_name:=lapply(modifier,parse_mod)][modifier!="",] #' annot <- annot[,.(modifier_name=unlist(modifier_name)), @@ -119,7 +118,7 @@ #' \code{ #' annot <- load_phenotype_to_genes("phenotype.hpoa") #' hpo_frequencies <- HPOExplorer:::parse_pheno_frequency(annot=annot) -#' hpo_frequencies <- HPOExplorer:::as_ascii(dt=hpo_frequencies) +#' hpo_frequencies <- HPOExplorer:::as_ascii(dat=hpo_frequencies) #' data.table::setcolorder(hpo_frequencies,c("disease_id","hpo_id")) #' usethis::use_data(hpo_frequencies, overwrite = TRUE) #' } @@ -134,9 +133,7 @@ #' Age of Death associated with each disease, and by extension, each phenotype. #' @source #' \code{ -#' terms <- ontologyIndex::get_descendants(ontology = get_hpo(), -#' roots = "HP:0011420", -#' exclude_roots = TRUE) +#' terms <- simona::dag_offspring(get_hpo(), term = "HP:0011420") #' aod <- lapply(stats::setNames(terms, terms), #' function(hpo_id){ #' message("Extracting API data for",hpo_id) @@ -144,8 +141,7 @@ #' }) |> data.table::rbindlist(fill = TRUE, #' use.names = TRUE, #' idcol = "AgeOfDeath") -#' aod$AgeOfDeath_name <- harmonise_phenotypes(phenotypes = aod$AgeOfDeath, -#' as_hpo_ids = FALSE) +#' aod$AgeOfDeath_name <- map_phenotypes(terms = aod$AgeOfDeath) #' #### Convert AoD to numeric scores #### #' dict <- HPOExplorer:::hpo_dict(type="AgeOfDeath") #' aod$AgeOfDeath_score <- dict[aod$AgeOfDeath_name] @@ -167,14 +163,14 @@ #' @source #' \code{ #' hpo <- get_hpo() -#' ids <- unique(hpo$id) +#' ids <- unique(hpo@terms) #' hpo_id_to_omop <- oard_query_api(ids = ids, workers=10) -#' id_col="hpo_id" +#' input_col="hpo_id" #' data.table::setnames(hpo_id_to_omop, #' toupper(gsub("concept_","OMOP_",names(hpo_id_to_omop))) #' ) -#' data.table::setnames(hpo_id_to_omop, "OMOP_CODE",id_col) -#' hpo_id_to_omop <- hpo_id_to_omop[,c(id_col,"OMOP_ID","OMOP_NAME"), +#' data.table::setnames(hpo_id_to_omop, "OMOP_CODE",input_col) +#' hpo_id_to_omop <- hpo_id_to_omop[,c(input_col,"OMOP_ID","OMOP_NAME"), #' with=FALSE] #' usethis::use_data(hpo_id_to_omop, overwrite = TRUE) #' } @@ -190,7 +186,7 @@ #' @source #' \code{ #' annot <- load_phenotype_to_genes(3) -#' id_col <- "disease_id" +#' input_col <- "disease_id" #' ## NOTE: must keep batch_size=1 as the OARD API returns results only for #' ## the IDs it can map. This leads to a mismatch between the input and output #' ## which is exacerbated that the concept_id is automatically converted to @@ -201,8 +197,8 @@ #' data.table::setnames(disease_id_to_omop, #' toupper(gsub("concept_","OMOP_",names(disease_id_to_omop))) #' ) -#' data.table::setnames(disease_id_to_omop,"QUERY",id_col) -#' disease_id_to_omop <- disease_id_to_omop[,c(id_col,"OMOP_ID","OMOP_NAME"), +#' data.table::setnames(disease_id_to_omop,"QUERY",input_col) +#' disease_id_to_omop <- disease_id_to_omop[,c(input_col,"OMOP_ID","OMOP_NAME"), #' with=FALSE] #' usethis::use_data(disease_id_to_omop, overwrite = TRUE) #' } diff --git a/R/example_phenos.R b/R/example_phenos.R index d3bf20b..3844718 100644 --- a/R/example_phenos.R +++ b/R/example_phenos.R @@ -1,3 +1,4 @@ +#' @describeIn main main #' Example phenotypes dataframe #' #' Create a minimal example of a phenos dataframe. @@ -8,6 +9,7 @@ #' @importFrom data.table data.table #' @examples #' phenos <- example_phenos() -example_phenos <- function(i=seq(10)){ - data.table::data.table(hpo_id=grep("^HP:",get_hpo()$id, value = TRUE)[i]) +example_phenos <- function(i=seq(10), + hpo=get_hpo()){ + data.table::data.table(hpo_id=grep("^HP:",hpo@terms, value = TRUE)[i]) } diff --git a/R/filter_descendants.R b/R/filter_descendants.R new file mode 100644 index 0000000..9929cdb --- /dev/null +++ b/R/filter_descendants.R @@ -0,0 +1,39 @@ +#' @describeIn filter_ filter_ +#' Filter descendants +#' +#' Subset a \code{phenos} data.table to only +#' descendants of an ancestor HPO ID term. +#' @param ancestor Phenotype names (or HPO ID) of an ancestor in the +#' \href{https://hpo.jax.org/}{Human Phenotype Ontology}. +#' Only phenotypes that are descendants of this ancestor will be kept. +#' Set to \code{NULL} (default) to skip this filtering step. +#' @returns data.table of phenotypes, with additional columns: +#' "ancestor", "ancestor_id" +#' +#' @export +#' @examples +#' phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") +#' phenos2 <- filter_descendants(phenos = phenos, +#' ancestor = "Motor delay") +filter_descendants <- function(phenos, + ancestor = NULL, + hpo = get_hpo()){ + + hpo_id <- NULL; + + if(!is.null(ancestor)){ + messager("Subsetting phenotypes to only ancestors of:", + paste(ancestor,collapse = ",")) + ancestor_id <- map_phenotypes(terms = ancestor, + hpo = hpo, + to="id") + all_ids <- simona::dag_offspring(dag = hpo, + term=ancestor_id, + include_self = TRUE ) + phenos <- phenos[hpo_id %in% all_ids, + ][,ancestor:=ancestor][,ancestor_id:=ancestor_id] + messager(formatC(nrow(phenos),big.mark = ","), + "associations remain after filtering.") + } + return(phenos) +} diff --git a/R/fix_hpo_ids.R b/R/fix_hpo_ids.R index b62c489..7fe2498 100644 --- a/R/fix_hpo_ids.R +++ b/R/fix_hpo_ids.R @@ -2,39 +2,36 @@ #' #' Fix missing HPO IDs by inferring their identity from the \code{hpo_name} #' column using several methods. -#' @param dt \link[data.table]{data.table} +#' @param dat \link[data.table]{data.table} #' containing a column named "hpo_name". -#' @inheritParams make_phenos_dataframe #' @returns data.table with the column "hpo_id" #' #' @keywords internal -#' @importFrom data.table setcolorder setkey -#' @importFrom stats setNames -fix_hpo_ids <- function(dt, +fix_hpo_ids <- function(dat, phenotype_to_genes = load_phenotype_to_genes()) { hpo_id <- NULL; - dt$hpo_id <- harmonise_phenotypes(dt$hpo_name, as_hpo_ids = TRUE) + dat$hpo_id <- map_phenotypes(dat$hpo_name, to = "id") dict <- stats::setNames(phenotype_to_genes$hpo_id, phenotype_to_genes$hpo_name) - dt[is.na(hpo_id)]$hpo_id <- dict[dt[is.na(hpo_id)]$hpo_name] + dat[is.na(hpo_id)]$hpo_id <- dict[dat[is.na(hpo_id)]$hpo_name] #### Check if any IDs are still missing #### - missing1 <- sum(is.na(dt$hpo_id)) + missing1 <- sum(is.na(dat$hpo_id)) if(missing1>0){ stp <- paste(missing1,"HPO IDs are still missing.") warning(stp) } - missing2 <- sum(!grepl("HP",dt$hpo_id)) + missing2 <- sum(!grepl("HP",dat$hpo_id)) if(missing2>0){ stp <- paste(missing2,"HPO IDs are still missing.") warning(stp) } - missing3 <- sum(is.na(dt$hpo_name)) + missing3 <- sum(is.na(dat$hpo_name)) if(missing3>0){ stp <- paste(missing3,"hpo_names are still missing.") warning(stp) } - data.table::setcolorder(dt,"hpo_id") - data.table::setkey(dt,"hpo_id") - return(dt) + data.table::setcolorder(dat,"hpo_id") + data.table::setkey(dat,"hpo_id") + return(dat) } diff --git a/R/get_data.R b/R/get_data.R index cde6645..0549823 100644 --- a/R/get_data.R +++ b/R/get_data.R @@ -12,10 +12,7 @@ get_data <- function(file, tag = "latest", repo = "neurogenomics/HPOExplorer", - save_dir = tools::R_user_dir( - package = "HPOExplorer", - which = "cache" - ), + save_dir = KGExplorer::cache_dir(package = "HPOExplorer"), add_version = FALSE, overwrite = TRUE ){ diff --git a/R/get_gencc.R b/R/get_gencc.R deleted file mode 100644 index 752bc34..0000000 --- a/R/get_gencc.R +++ /dev/null @@ -1,74 +0,0 @@ -#' Get GenCC -#' -#' Get phenotype-gene evidence score from the -#' \href{https://thegencc.org/}{Gene Curation Coalition}. -#' Data downloaded from \href{https://search.thegencc.org/download}{here}.\cr -#' \emph{NOTE:} Due to licensing restrictions, a GenCC download does not -#' include OMIM data. OMIM data can be accessed and downloaded through -#' \href{https://omim.org/downloads/}{OMIM}.\cr -#' \emph{NOTE:} GenCC does not currently have any systematic versioning. -#' There for the \code{attr(obj,"version")} attribute is set to the date it was -#' downloaded and cached by \link[HPOExplorer]{get_gencc}. -#' -#' @param dict A named vector of evidence score mappings. -#' See \href{https://thegencc.org/faq.html#validity-termsdelphi-survey}{here} -#' for more information. -#' @inheritParams add_evidence -#' @inheritParams get_data -#' @returns \link[data.table]{data.table} with columns: -#' \itemize{ -#' \item{"disease_id": }{Disease ID.} -#' \item{"gene_symbol": }{Gene symbol.} -#' \item{"evidence_score": }{Evidence score.} -#' } -#' -#' @export -#' @import data.table -#' @importFrom utils download.file -#' @examples -#' d <- get_gencc() -get_gencc <- function(agg_by=c("disease_id", - "gene_symbol"), - dict = c("Definitive"=6,# GENCC:100001 - "Strong"=5, # GENCC:100002 - "Moderate"=4, # GENCC:100003 - "Supportive"=3, # GENCC:100009 - "Limited"=2, # GENCC:100004 - "Disputed Evidence"=1, # GENCC:100005 - "Refuted Evidence"=0, # GENCC:100006 - "No Known Disease Relationship"=0 # GENCC:100008 - ), - save_dir=tools::R_user_dir("HPOExplorer", - which="cache"), - verbose=TRUE){ - # devoptera::args2vars(get_gencc) - - disease_id <- disease_original_curie <- classification_title <- - evidence_score <- NULL; - - messager("Gathering data from GenCC.",v=verbose) - URL <- "https://search.thegencc.org/download/action/submissions-export-csv" - f <- file.path(save_dir,"genCC_submission.csv") - if(!file.exists(f)){ - dir.create(save_dir, showWarnings = FALSE, recursive = TRUE) - utils::download.file(URL, f) - } else { - messager("Importing cached file.",v=verbose) - } - d <- data.table::fread(f) - d[,disease_id:=gsub("^Orphanet","ORPHA",disease_original_curie)] - #### Encode evidence numerically #### - d[,evidence_score:=dict[classification_title]] - #### Aggregate so that there's 1 entry/gene/disease #### - if(!is.null(agg_by)){ - d <- d[,list(evidence_score_min=min(evidence_score, na.rm = TRUE), - evidence_score_max=max(evidence_score, na.rm = TRUE), - evidence_score_mean=mean(evidence_score, na.rm=TRUE)), - by=agg_by] - } - #### Add version #### - attr(d,"version") <- format(file.info(f)$mtime, "%Y-%m-%d") - get_version(obj=d, verbose=verbose) - #### Return #### - return(d) -} diff --git a/R/get_gene_lengths.R b/R/get_gene_lengths.R deleted file mode 100644 index 5492d2b..0000000 --- a/R/get_gene_lengths.R +++ /dev/null @@ -1,42 +0,0 @@ -get_gene_lengths <- function(gene_list, - keep_seqnames = c(seq(22),"X","Y"), - ensembl_version = 75, - verbose = TRUE){ - - requireNamespace("ensembldb") - requireNamespace("AnnotationFilter") - - gene_list <- unique(gene_list) - messager("Gathering metadata for",length(gene_list),"unique genes.", - v=verbose) - if(is.null(keep_seqnames)){ - keep_seqnames <- eval(formals(get_gene_lengths)$keep_seqnames) - } - #### Standardise seqnames #### - keep_seqnames <- unique( - c(tolower(keep_seqnames), - paste0("chr",keep_seqnames), - keep_seqnames) - ) - #### Get gene reference database #### - if(ensembl_version==75){ - #### Outdated ref #### - requireNamespace("EnsDb.Hsapiens.v75") - txdb <- EnsDb.Hsapiens.v75::EnsDb.Hsapiens.v75 - } else { - #### Updated ref #### - requireNamespace("AnnotationHub") - hub <- AnnotationHub::AnnotationHub() - q <- AnnotationHub::query(hub, c("EnsDb",ensembl_version,"sapiens")) - txdb <- q[[length(q)]] - } - #### Set - tx_gr <- ensembldb::genes( - txdb, - columns = c(ensembldb::listColumns(txdb, "gene"), "entrezid"), - filter=AnnotationFilter::AnnotationFilterList( - AnnotationFilter::SymbolFilter(value = gene_list), - AnnotationFilter::SeqNameFilter(value = keep_seqnames) - )) - return(tx_gr) -} diff --git a/R/get_gene_lists.R b/R/get_gene_lists.R index 043a9db..044d694 100644 --- a/R/get_gene_lists.R +++ b/R/get_gene_lists.R @@ -25,10 +25,9 @@ get_gene_lists <- function(phenotypes, # devoptera::args2vars(get_gene_lists) hpo_id <- NULL; - hpo_ids <- harmonise_phenotypes(phenotypes = phenotypes, - hpo = hpo, - as_hpo_ids = TRUE, - verbose = TRUE) + hpo_ids <- map_phenotypes(terms = phenotypes, + hpo = hpo, + to="id") p2g <- phenotype_to_genes[hpo_id %in% hpo_ids] #### Return at data.table ##### if(isFALSE(as_list)) return(p2g) diff --git a/R/get_hpo.R b/R/get_hpo.R index 59d0560..d0b41ee 100644 --- a/R/get_hpo.R +++ b/R/get_hpo.R @@ -1,6 +1,7 @@ +#' @describeIn get_ get_ #' Get Human Phenotype Ontology (HPO) #' -#' Updated version of Human Phenotype Ontology (HPO) from 2023-10-09. +#' Updated version of Human Phenotype Ontology (HPO). #' Created from the OBO files distributed by the HPO project's #' \href{https://github.com/obophenotype/human-phenotype-ontology}{GitHub}. #' @@ -8,26 +9,28 @@ #' Note that the maximum ontology level depth in the 2016 version was 14, #' whereas in the 2023 version the maximum ontology level depth is 16 #' (due to an expansion of the HPO). -#' @source \href{https://bioportal.bioontology.org/ontologies/HP}{BioPortal} -#' \code{ -#' hpo <- HPOExplorer:::make_ontology(upload_tag="latest") -#' } -#' @inheritParams get_data -#' @inheritParams piggyback::pb_download -#' @importFrom tools R_user_dir -#' @returns \link[ontologyIndex]{ontology_index} object. +#' @inheritParams KGExplorer::add_ancestors +#' @inheritDotParams KGExplorer::get_ontology +#' @returns \link[simona]{ontology_DAG} object. #' #' @export +#' @import KGExplorer #' @examples #' hpo <- get_hpo() -get_hpo <- function(save_dir=tools::R_user_dir("HPOExplorer", - which="cache"), - tag = "latest", - overwrite = TRUE){ - #### ontologyIndex data outdated, from 2016. Don't use. #### - # utils::data("hpo",package = "ontologyIndex") - get_data(file = "hp-base.rds", - tag = tag, - overwrite = overwrite, - save_dir = save_dir) +get_hpo <- function(lvl, + add_ancestors = TRUE, + force_new = FALSE, + ...){ + save_dir <- KGExplorer::cache_dir(package = "HPOExplorer") + file <- file.path(save_dir,"hpo.rds") + if(!file.exists(file) || isTRUE(force_new)){ + ont <- KGExplorer::get_ontology(name = "hp", + add_ancestors = add_ancestors, + force_new = force_new, + ...) + saveRDS(ont,file) + } else { + ont <- readRDS(file) + } + return(ont) } diff --git a/R/get_hpo_id_direct.R b/R/get_hpo_id_direct.R index 46a8702..f267289 100644 --- a/R/get_hpo_id_direct.R +++ b/R/get_hpo_id_direct.R @@ -1,16 +1,15 @@ +#' @describeIn get_ get_ #' Get HPO term ID direct #' #' Directly retrieves it from the HPO -#' ontology object (found in the \pkg{ontologyIndex} package \code{data(hpo)}). -#' @param phenotype One or more phenotype names in to get HPO IDs for. -#' @inheritParams make_phenos_dataframe +#' ontology object. #' @returns The HPO ID(s) of phenotype(s) #' #' @export #' @examples -#' phenotype = "Phenotypic abnormality" -#' pheno_abnormality_id <- get_hpo_id_direct(phenotype = phenotype) -get_hpo_id_direct <- function(phenotype, +#' term = "Phenotypic abnormality" +#' pheno_abnormality_id <- get_hpo_id_direct(term = term) +get_hpo_id_direct <- function(term, hpo = get_hpo()) { - return(hpo$id[which(phenotype == hpo$name)]) + return(hpo@terms[which(term == hpo@elementMetadata$name)]) } diff --git a/R/get_kg_monarch.R b/R/get_kg_monarch.R deleted file mode 100644 index 7890f70..0000000 --- a/R/get_kg_monarch.R +++ /dev/null @@ -1,42 +0,0 @@ -#' Get knowledge graph: Monarch -#' -#' -#' Option 1: Use the \href{https://api.monarchinitiative.org/api/}{biolink API} -#' to efficiently extract specific subset of data -#' from the Monarch server. -#' Option 2: Import the entire knowledge graph from the -#' \href{https://data.monarchinitiative.org/monarch-kg/latest/}{Monarch server}. -#' @source \href{https://pubmed.ncbi.nlm.nih.gov/37707514/}{BioThings Explorer} -get_kg_monarch <- function(){ - ### Option 1 #### - # - # con <- neo4r::neo4j_api$new( - # url = "https://api.monarchinitiative.org/api/", - # user = "neo4j", - # password = "plop" - # ) - # jsonlite::read_json("https://api.monarchinitiative.org/api/bioentity/MONDO:0012990") - # neo2R::graphRequest(graph = , - # endpoint = "/bioentity/", - # postText = "MONDO:0012990") - # neo <- neo2R::graphRequest("https://data.monarchinitiative.org/monarch-kg-dev/latest/monarch-kg.neo4j.dump", ) - # top_targets <- add_mondo(top_targets) - - ### Option 2 #### - d <- data.table::fread("https://data.monarchinitiative.org/monarch-kg/latest/monarch-kg-denormalized-edges.tsv.gz") - - sort(unique(d$subject_category)) - - d[subject_category== "biolink:Cell",]$subject_label |> - gsub(pattern="[0-9]+",replacement="") |> - # stringr::str_trunc(20) |> - unique() |> sort() - - d2 <- d[#(subject_category=="biolink:Disease" & object_category=="biolink:PhenotypicFeature") | - (subject_category=="biolink:PhenotypicFeature" & object_category=="biolink:Gene") | - (subject_category=="biolink:Disease" & object_category=="biolink:Gene")] - - #### Use evidence_count as connection weights #### - round(table(d$evidence_count,useNA = "always")/nrow(d)*100,6) - -} diff --git a/R/get_max_ont_lvl.R b/R/get_max_ont_lvl.R deleted file mode 100644 index e30286b..0000000 --- a/R/get_max_ont_lvl.R +++ /dev/null @@ -1,15 +0,0 @@ -get_max_ont_lvl <- function(hpo = get_hpo(), - absolute = TRUE, - exclude_top_lvl=FALSE){ - #### Get the maximum ontology level #### - max_lvl <- 0 - for(i in seq(length(hpo$id))){ - tmp <- get_ont_lvl(term = hpo$id[[i]], - hpo = hpo, - absolute = absolute) - if(tmp > max_lvl) max_lvl <- tmp - } - #### Exclude the top level term "All" (HP:0000001) #### - if(isTRUE(exclude_top_lvl)) max_lvl <- max_lvl-1 - return(max_lvl) -} diff --git a/R/get_metadata_omim.R b/R/get_metadata_omim.R deleted file mode 100644 index d246227..0000000 --- a/R/get_metadata_omim.R +++ /dev/null @@ -1,20 +0,0 @@ -get_metadata_omim <- function(save_dir = file.path( - tools::R_user_dir("HPOExplorer", - which="cache")), - verbose = TRUE){ - Class.ID <- id <- NULL; - - messager("Importing OMIM metadata.",v=verbose) - f <- file.path(save_dir,"OMIM.csv.gz") - if(!file.exists(f)){ - dir.create(save_dir, showWarnings = FALSE, recursive = TRUE) - utils::download.file(paste0( - "https://data.bioontology.org/ontologies/OMIM/download?", - "apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb&download_format=csv" - ), f) - } - meta <- data.table::fread(f) - colnames(meta) <- gsub(" ",".",colnames(meta)) - meta[,id:=paste0("OMIM:",basename(Class.ID))] - return(meta) -} diff --git a/R/get_metadata_orphanet.R b/R/get_metadata_orphanet.R deleted file mode 100644 index 6cb59ae..0000000 --- a/R/get_metadata_orphanet.R +++ /dev/null @@ -1,21 +0,0 @@ -get_metadata_orphanet <- function(save_dir = file.path( - tools::R_user_dir("HPOExplorer", - which="cache")), - verbose = TRUE){ - Class.ID <- id <- NULL; - - options(download.file.method = "curl") - messager("Importing Orphanet metadata.",v=verbose) - f <- file.path(save_dir,"ORDO.csv.gz") - if(!file.exists(f)){ - dir.create(save_dir, showWarnings = FALSE, recursive = TRUE) - utils::download.file(paste0( - "https://data.bioontology.org/ontologies/ORDO/download?", - "apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb&download_format=csv" - ), f) - } - meta <- data.table::fread(f) - colnames(meta) <- gsub(" ",".",colnames(meta)) - meta[,id:=gsub("Orphanet_","ORPHA:",basename(Class.ID))] - return(meta) -} diff --git a/R/get_monarch.R b/R/get_monarch.R deleted file mode 100644 index 6d84282..0000000 --- a/R/get_monarch.R +++ /dev/null @@ -1,58 +0,0 @@ -#' Get Monarch -#' -#' Get key datasets from the Monarch Initiative server. -#' @param file Short name of the file to retrieve. -#' \itemize{ -#' \item{"gene_to_gene"}{Gene-to-gene mappings across species orthologous.} -#' \item{"phenotype_to_gene"}{Phenotype-to-gene mappings for multiple species.} -#' \item{"disease_to_model"}{Disease-to-model mappings for multiple -#' model species.} -#' \item{"phenotype_to_phenotype"}{Phenotype-to-phenotype mappings across -#' species homologs.} -#' } -#' @param file_map Mapping between short name and full name of each file. -#' @param domain Monarch Initiative server domain. -#' @inheritDotParams data.table::fread -#' @returns \link[data.table]{data.table} -#' -#' @export -#' @examples -#' dat <- get_monarch() -get_monarch <- function(file=c("gene_to_gene", - "phenotype_to_gene", - "disease_to_model", - "phenotype_to_phenotype" - ), - file_map=list( - ## Gene-gene homology across 12 species - gene_to_gene= - paste0( - "monarch-kg-dev/latest/tsv/all_associations/", - "gene_to_gene_homology_association.all.tsv.gz" - ), - ## Gene-phenotype associations across 8 species - phenotype_to_gene= - paste0( - "monarch-kg-dev/latest/tsv/all_associations/", - "gene_to_phenotypic_feature_association.all.tsv.gz" - ), - disease_to_model= - paste0( - "latest/tsv/model_associations/", - "model_disease.all.tsv.gz" - ), - phenotype_to_phenotype= - paste0( - "upheno2/current/upheno-release/all/", - "upheno_mapping_all.csv" - ) - ), - domain="https://data.monarchinitiative.org", - ... - ){ - - file <- match.arg(file) - URL <- paste(domain,file_map[[file]], sep="/") - dat <- data.table::fread(URL,...) - return(dat) -} diff --git a/R/get_mondo.R b/R/get_mondo.R deleted file mode 100644 index 33703a7..0000000 --- a/R/get_mondo.R +++ /dev/null @@ -1,28 +0,0 @@ -#' Get Mondo Disease Ontology -#' -#' Import \href{https://mondo.monarchinitiative.org/}{Mondo} as an R object. -#' @source -#' \code{ -#' mondo <- HPOExplorer:::make_ontology(file="mondo-base.obo", -#' repo="monarch-initiative/mondo", -#' upload_tag="latest") -#' } -#' @inheritParams get_data -#' @inheritParams piggyback::pb_download -#' @importFrom tools R_user_dir -#' @returns \link[ontologyIndex]{ontology_index} -#' -#' @export -#' @examples -#' mondo <- get_mondo() -get_mondo <- function(save_dir=tools::R_user_dir("HPOExplorer", - which="cache"), - tag = "latest", - overwrite = TRUE){ - #### ontoProc data outdated, from 2021 Don't use. #### - # mondo <- ontoProc::getMondoOnto() - get_data(file = "mondo-base.rds", - tag = tag, - overwrite = overwrite, - save_dir = save_dir) -} diff --git a/R/get_ont_lvl.R b/R/get_ont_lvl.R deleted file mode 100644 index e7adaf0..0000000 --- a/R/get_ont_lvl.R +++ /dev/null @@ -1,73 +0,0 @@ -#' Get HPO term level -#' -#' In this function, ontology level refers to the number of generations of -#' sub-phenotypes a term has below it in the HPO DAG. For example, the root of -#' the HPO is term "HP:0000001", the longest path to a term with no child terms -#' is 14. In other words there are 14 generations of "is-a" relationships below -#' HP:0000001 and it is therefore at ontology level 14. A term with no -#' sub-phenotypes below it (a leaf node), is at ontology level 0. -#' @inheritParams make_phenos_dataframe -#' @inheritParams make_network_object -#' @inheritParams adjacency_matrix -#' @returns Ontology level of HPO ID. -#' -#' @keywords internal -#' @source -#' \code{ -#' lvl_absolute <- get_ont_lvl(term = "HP:0000001", absolute=TRUE) -#' lvl_relative <- get_ont_lvl(term = "HP:0000001", absolute=FALSE) -#' } -get_ont_lvl <- function(term, - hpo = get_hpo(), - adjacency = NULL, - absolute = TRUE, - verbose = FALSE) { - - messager("Getting ontology level for:", paste(term,collapse = ", "), - v=verbose) - #### Children method #### - if(isTRUE(absolute)){ - children <- unique( - setdiff(unlist(hpo$children[term]), - term) - ) - if (length(children) == 0) { - return(0) - } else { - return(1 + get_ont_lvl(term = children, - hpo = hpo, - absolute = absolute, - verbose = verbose)) #<- recursion.. - } - } else { - #### Parents method #### - if(is.null(adjacency)){ - stp <- "Must supply adjacency when absolute=FALSE." - stop(stp) - } - phenotypes <- rownames(adjacency) - pos_parents <- hpo$parents[term] - paths <- lapply(phenotypes, function(p){ - if (adjacency[p, term] == 1) { - if (p %in% pos_parents) { - 1 + get_ont_lvl(term = p, - adjacency = adjacency, - absolute = absolute, - hpo = hpo) # <- recursion - } - } - }) |> unlist() - #### Check count #### - if (length(paths) == 0) { - return(0) - } else { - parents <- 0 - for (i in seq(length(paths))) { - if (paths[[i]] > parents) { - parents <- paths[[i]] - } - } - } - return(parents) - } -} diff --git a/R/get_ont_lvls.R b/R/get_ont_lvls.R deleted file mode 100644 index bfd2e9d..0000000 --- a/R/get_ont_lvls.R +++ /dev/null @@ -1,77 +0,0 @@ -#' Get ontology level for HPO terms -#' -#' For a given set of HPO terms, get their level -#' within the hierarchically organised HPO ontology. -#' Ontology level can be computed either absolute mode (\code{absolute=TRUE}) -#' where the entire ontology is considered when assigning levels, or -#' relative mode (\code{absolute=FALSE}) where only a subset of the ontology -#' that is connected to a given term is considered when assigning levels. -#' Relative mode can be helpful when trying to make plot where nodes are -#' scaled to the ontology level. -#' @param absolute Make the levels absolute in the sense that they consider -#' the entire HPO ontology (\code{TRUE}). -#' Otherwise, levels will be relative to only the HPO terms that are in -#' the provided subset of \code{terms} AND are directly adjacent (connected) -#' to a given cluster of terms (\code{FALSE}). -#' @param reverse If \code{TRUE}, ontology -#' level numbers with be revered such that the level of the parent terms -#' are larger than the child terms. -#' @param exclude_top_lvl Exclude the top level term of the HPO -#' (i.e. "All" (HP:0000001)) when computing \emph{absolute} levels. -#' This argument is not used when computing \emph{relative} levels. -#' @inheritParams make_phenos_dataframe -#' @inheritParams make_network_object -#' @inheritParams adjacency_matrix -#' @returns A named vector of relative ontology level, -#' where names are HPO Ids and -#' value is relative ontology level. -#' -#' @export -#' @importFrom stats setNames -#' @examples -#' terms <- ontologyIndex::get_descendants(ontology = get_hpo(), -#' roots = "HP:0000002") -#' lvls <- get_ont_lvls(terms = terms) -#' lvls_abs <- get_ont_lvls(terms = terms, absolute=TRUE) -get_ont_lvls <- function(terms, - hpo = get_hpo(), - adjacency = NULL, - absolute = TRUE, - exclude_top_lvl=TRUE, - reverse = TRUE, - verbose = TRUE) { - - terms <- unique(terms) - messager("Getting",if(absolute)"absolute"else"relative", - "ontology level for", - formatC(length(terms),big.mark = ","),"HPO IDs.", v=verbose) - #### Create adjacency matrix if needed #### - if(isFALSE(absolute) & - is.null(adjacency)){ - adjacency <- adjacency_matrix(terms = terms, - hpo = hpo, - verbose = verbose) - } - #### Get ontology levle for each HPO term #### - hierarchy <- lapply(stats::setNames(terms, - terms), - function(term){ - get_ont_lvl(term = term, - hpo = hpo, - adjacency = adjacency, - absolute = absolute, - verbose = FALSE) - }) |> unlist() - #### Reverse levels #### - if (isTRUE(reverse) && - isFALSE(absolute)) { - hierarchy <- max(hierarchy) - hierarchy - } - if(isFALSE(reverse) && - isTRUE(absolute)){ - max_lvl <- get_max_ont_lvl(hpo = hpo, - exclude_top_lvl = exclude_top_lvl) - hierarchy <- max_lvl- hierarchy - } - return(hierarchy) -} diff --git a/R/get_ont_lvls2.R b/R/get_ont_lvls2.R deleted file mode 100644 index db38514..0000000 --- a/R/get_ont_lvls2.R +++ /dev/null @@ -1,49 +0,0 @@ -#' Get relative ontology level for multiple HPO terms -#' -#' This calls the \code{get_ont_lvl} function on all phenotypes in a -#' subset of the HPO. The subset chosen when creating the adjacency from the main -#' adjacency matrix of all phenotypes. So, the phenotypes can be found in the -#' row and column names of adjacency. -#' @param reverse A boolean, if TRUE it will reverse the ontology -#' level numbers so that -#' the parent terms are larger than the child terms. -#' @param absolute Make the levels absolute in the sense that they consider -#' the entire HPO ontology (\code{TRUE}). -#' Otherwise, levels will be relative to only the subset \code{terms} provided -#' (\code{FALSE}). -#' @inheritParams make_phenos_dataframe -#' @inheritParams make_network_object -#' @inheritParams adjacency_matrix -#' @returns A named vector of relative ontology level, -#' where names are HPO Ids and -#' value is relative ontology level. -#' -#' @export -#' @importFrom stats setNames -#' @examples -#' terms <- ontologyIndex::get_descendants(ontology = get_hpo(), -#' roots = "HP:0000002") -#' lvls <- get_ont_lvls2(terms = terms) -get_ont_lvls2 <- function(terms, - hpo = get_hpo(), - adjacency = - adjacency_matrix(terms = terms, - hpo = hpo), - absolute = FALSE, - reverse = TRUE, - verbose = TRUE) { - - messager("Getting ontology level for", - formatC(length(terms),big.mark = ","),"HPO IDs.", v=verbose) - # if(isTRUE(absolute)){ - # adjacency <- adjacency_matrix(hpo = hpo, - # verbose = verbose) - # } - - - - # if (isTRUE(reverse)) { - # hierarchy <- max(hierarchy) - hierarchy - # } - # return(hierarchy) -} diff --git a/R/get_orphanet_epidemiology.R b/R/get_orphanet_epidemiology.R deleted file mode 100644 index e37fd2c..0000000 --- a/R/get_orphanet_epidemiology.R +++ /dev/null @@ -1,79 +0,0 @@ -get_orphanet_epidemiology <- function(agg_by=c("MONDO_ID","id","Name"), - include_mondo=TRUE){ - # https://www.orphadata.com/epidemiology/ - # tmp <- tempfile(fileext = ".xml") - # download.file("https://www.orphadata.com/data/xml/en_product9_prev.xml", - # destfile = tmp) - # xml <- rvest::read_html(tmp) - - # xml <- xml2::read_xml(tmp) - # xmlconvert::xml_to_df(xml, records.xpath = "/html") - # xmls <- xml |> - # rvest::html_nodes("Disorder") |> - # # xml2::xml_find_all(".//Disorder") |> - # xml2::as_list()# |> - # tibble::as_tibble(xmls[[1]]) |> - # tidyr::unnest_longer(col=c(OrphaCode,ExpertLink, - # Name,DisorderType,DisorderGroup), simplify = TRUE) |> - # tidyr::unnest_wider(col=PrevalenceList) |> - # tidyr::unnest_longer(col = c(Source,PrevalenceType,PrevalenceQualification,ValMoy)) - # - # - # nodes <- xml2::xml_find_all(xml, ".//Disorder") - # ## Preview structure - # nodes[1]|> xml2::html_structure() - # # xml2::xml_path()|> - # # xml2::xml_text() - # - # ### Convert each node to a flattened data.table - # xml2::xml_find_all(nodes[1:10], ".//Prevalence") |> - # xml2::as_list() |> - # purrr::flatten() |> - # data.table::as.data.table()# |> - # data.table::transpose() - - - mean_prevalence <- Prevalence.ValMoy <- prevalence_denominator <- MONDO_ID <- - NULL; - ## Open in Excel and convert to CSV first ## - path <- system.file("extdata", "orphanet_epidemiology.csv.gz", - package = "HPOExplorer") - d <- data.table::fread(path) - #### Fix column names #### - names(d) <- trimws(whitespace = "[.]", - gsub("[.]+",".", - gsub("/|#",".", - gsub(paste("/DisorderList", - "/Disorder", - "/PrevalenceList", - sep = "|"), - "", - names(d) - ) - ) - ) - ) |> `names<-`(names(d)) - #### Prepare numeric prevalence data #### - d[,prevalence_numerator:=`Prevalence.ValMoy`] - d$prevalence_denominator <- gsub( - " +","", - stringr::str_split(d$Prevalence.PrevalenceClass.Name," / ", - simplify = TRUE)[,2] - ) |> as.numeric() - d[,prevalence:=prevalence_numerator/prevalence_denominator*100] - - #### Add MONDO ID #### - if(isTRUE(include_mondo)){ - d[,MONDO_ID:=mondo_dict(ids = id)] - } - - if(!is.null(agg_by)){ - ## Compute mean prevalence - dprev <- d[,list(n=.N, mean_prevalence=mean(prevalence, na.rm=TRUE)), - by=agg_by #"Prevalence.PrevalenceType.Name" - ][order(-mean_prevalence)] - return(dprev) - } else { - return(d) - } -} diff --git a/R/get_term_definition_api.R b/R/get_term_definition_api.R index d90d767..f552d71 100644 --- a/R/get_term_definition_api.R +++ b/R/get_term_definition_api.R @@ -4,7 +4,6 @@ #' HPO term. If a \code{line_length} \> 0 is passed to the function, it will add #' newlines every nth word. This can be useful when displaying the description #' in plots with limited space. -#' @param term One or more HPO IDs. #' @returns Character vector of definitions. #' @keywords internal get_term_definition_api <- function(term, diff --git a/R/get_term_definition_data.R b/R/get_term_definition_data.R index df27352..b243652 100644 --- a/R/get_term_definition_data.R +++ b/R/get_term_definition_data.R @@ -1,7 +1,9 @@ get_term_definition_data <- function(term, line_length, hpo = get_hpo()){ - definitions <- hpo$def[term] + dict <- stats::setNames(hpo@elementMetadata$definition, + hpo@elementMetadata$id) + definitions <- dict[term] if (line_length > 0) { definitions <- substr(x=definitions, start=0, stop=line_length) } diff --git a/R/get_upheno.R b/R/get_upheno.R deleted file mode 100644 index 4d29dad..0000000 --- a/R/get_upheno.R +++ /dev/null @@ -1,68 +0,0 @@ -#' Get UPHENO -#' -#' Get data from the \href{https://github.com/obophenotype/upheno}{ -#' Unified Phenotype Ontology (UPHENO)}. -#' -#' @param file Can be one of the following: -#' \itemize{ -#' \item{"ontology"}{Creates an \link[ontologyIndex]{ontologyIndex} R object by -#' importing the OBO file directly from the official -#' \href{https://github.com/obophenotype/upheno}{UPHENO GitHub repository}.} -#' \item{"bestmatches"}{Returns a merged table with the best matches between -#' human and non-human homologous phenotypes (from multiple species). -#' Distributed by the official -#' \href{https://github.com/obophenotype/upheno/tree/master/mappings}{ -#' UPHENO GitHub repository}.} -#' \item{"upheno_mapping"}{Return a merged table with matches bteween human -#' and non-human homologous phenotypes (from multiple species). -#' Distributed by the -#' \href{https://data.monarchinitiative.org/upheno2/current/upheno-release/all/index.html}{ -#' Monarch Initiative server}.} -#' } -#' @import data.table -#' @importFrom stringr str_split -#' @returns \link[ontologyIndex]{ontologyIndex} or -#' \link[data.table]{data.table}. -#' -#' @export -#' @examples -#' upheno <- get_upheno() -get_upheno <- function(file=c("ontology", - "bestmatches", - "upheno_mapping")){ - file <- match.arg(file) - if(file=="ontology"){ - requireNamespace("ontologyIndex") - upheno <- ontologyIndex::get_OBO( - "https://github.com/obophenotype/upheno-dev/raw/master/upheno.obo", - extract_tags = "everything") - return(upheno) - } - if(file=="bestmatches"){ - #### Fuzzy query #### - ## Only between HPO and 3 ontologies - base_url <- "http://purl.obolibrary.org/obo/upheno/mappings/" - URLs <- paste0(base_url, - c("hp-to-zp-bestmatches.tsv", - "hp-to-mp-bestmatches.tsv", - "hp-to-wbphenotype-bestmatches.tsv")) - pheno_map <- lapply(URLs, function(x){ - data.table::fread(x) - }) |> `names<-`(gsub("-bestmatches.tsv","",basename(URLs))) |> - data.table::rbindlist(idcol = "map", fill = TRUE) |> - `names<-`(c("map","id1","label1","id2", - "label2","equivalence_score","subclass_score")) - pheno_map$db2 <- stringr::str_split(pheno_map$id2,":", simplify = TRUE)[,1] - return(pheno_map) - } - if (file=="upheno_mapping"){ - id1 <- id2 <- p1 <- p2 <- NULL; - pheno_map <- get_monarch("phenotype_to_phenotype") - pheno_map[,id1:=gsub("_",":",basename(p1)) - ][,id2:=gsub("_",":",basename(p2))] - pheno_map$db1 <- stringr::str_split(pheno_map$id1,":", simplify = TRUE)[,1] - pheno_map$db2 <- stringr::str_split(pheno_map$id2,":", simplify = TRUE)[,1] - data.table::setkeyv(pheno_map,"id1") - return(pheno_map) - } -} diff --git a/R/get_version.R b/R/get_version.R deleted file mode 100644 index f23d222..0000000 --- a/R/get_version.R +++ /dev/null @@ -1,34 +0,0 @@ -#' Get version -#' -#' Extract the precise version of the HPO release that the data object -#' was built from. All HPO Releases can be found at the official -#' \href{https://github.com/obophenotype/human-phenotype-ontology/releases}{ -#' HPO GitHub Releases page}. -#' @param obj An object from -#' \link[HPOExplorer]{get_hpo} or -#' \link[HPOExplorer]{load_phenotype_to_genes}. -#' @param verbose Print messages. -#' @param return_version Return the version as a character string. -#' @returns HPO Release version a character string. -#' -#' @export -#' @examples -#' obj <- get_hpo() -#' get_version(obj=obj) -get_version <- function(obj, - return_version = FALSE, - verbose = TRUE){ - ## Extract - if(methods::is(obj,"ontology_index")){ - x <- grep("data-version:",attr(obj,"version"),value=TRUE) - v <- paste0("v", - trimws(gsub("data-version:|hp/releases/|/hp-base.owl","",x)) - ) - } else { - v <- attr(obj,"version") - } - ## Print - messager("+ Version:",v,v=verbose) - ## Return - if(isTRUE(return_version)) return(v) -} diff --git a/R/gpt_annot_check.R b/R/gpt_annot_check.R index d52a6e0..939c917 100644 --- a/R/gpt_annot_check.R +++ b/R/gpt_annot_check.R @@ -27,8 +27,8 @@ gpt_annot_check <- function(annot = gpt_annot_read(), # prior_ids <- unique(HPOExplorer::hpo_modifiers$hpo_id) # new_ids <- unique(annot$hpo_id) # length(new_ids)/length(prior_ids) - # length(prior_ids)/length(hpo$id) - # length(new_ids)/length(hpo$id) + # length(prior_ids)/length(hpo@terms) + # length(new_ids)/length(hpo@terms) #### Check annotation consistency #### nm <- names(annot)[!grepl("hpo_name|justification|hpo_id",names(annot), ignore.case = TRUE)] diff --git a/R/harmonise_phenotypes.R b/R/harmonise_phenotypes.R deleted file mode 100644 index b3f6d30..0000000 --- a/R/harmonise_phenotypes.R +++ /dev/null @@ -1,83 +0,0 @@ -#' Harmonise phenotypes -#' -#' Harmonise a mixed vector of phenotype names (e.g. "Focal motor seizure") -#' and HPO IDs (e.g. c("HP:0000002","HP:0000003")). -#' @param phenotypes A character vector of phenotype names and/or HPO IDs. -#' @param as_hpo_ids Return all \code{phenotypes} as HPO IDs (\code{TRUE}), -#' or phenotype names (\code{FALSE}). -#' @param ignore_case Ignore casing when searching for matches. -#' @param keep_order Return a named list of the same length and order -#' as \code{phenotypes}. -#' If \code{FALSE}, return a named list of only the unique \code{phenotypes}, -#' sometimes in a different order. -#' @param validate Remove any phenotype names or IDs that cannot be found in the -#' \code{hpo}. -#' @param invert Invert the keys/values of the dictionary, -#' such that the key becomes the values (and vice versa). -#' @param verbose Print messages. -#' @inheritParams make_phenos_dataframe -#' @returns Character vector -#' -#' @export -#' @importFrom stats setNames -#' @examples -#' phenotypes <- c("Focal motor seizure","HP:0000002","HP:0000003") -#' #### As phenotype names #### -#' pheno_names <- harmonise_phenotypes(phenotypes=phenotypes) -#' #### As HPO IDs #### -#' pheno_ids <- harmonise_phenotypes(phenotypes=phenotypes, as_hpo_ids=TRUE) -harmonise_phenotypes <- function(phenotypes, - hpo = get_hpo(), - as_hpo_ids = FALSE, - validate = TRUE, - ignore_case = TRUE, - keep_order = TRUE, - invert = FALSE, - verbose = TRUE){ - - # devoptera::args2vars(harmonise_phenotypes) - phenotypes_og <- phenotypes - phenotypes <- unique(phenotypes) - hpo_ids <- grep("^HP:|^HP_",phenotypes, - value = TRUE, - ignore.case = ignore_case) - ptypes <- phenotypes[!phenotypes %in% hpo_ids] - #### Validate phenotypes #### - if(isTRUE(as_hpo_ids)){ - messager("Translating all phenotypes to HPO IDs.",v=verbose) - out <- stats::setNames(hpo_ids,hpo_ids) - if(length(ptypes)>0) { - id_dict <- stats::setNames(names(hpo$name), - unname(hpo$name)) - out <- c(out,id_dict[ptypes]) - } - if(isTRUE(validate)){ - out <- out[out %in% unname(hpo$id)] - } - } else { - messager("Translating all phenotypes to names.",v=verbose) - out <- stats::setNames(ptypes,ptypes) - if(length(hpo_ids)>0){ - out <- c(out, hpo$name[hpo_ids]) - } - if(isTRUE(validate)){ - out <- out[out %in% unname(hpo$name)] - } - } - #### Return #### - if(isFALSE(keep_order)){ - messager("+ Returning a dictionary of phenotypes", - "(different order as input).", - v=verbose) - } else { - messager("+ Returning a vector of phenotypes", - "(same order as input).", - v=verbose) - out <- out[phenotypes_og] - } - #### Invert #### - if(isTRUE(invert)){ - out <- stats::setNames(names(out), unname(out)) - } - return(out) -} diff --git a/R/hpo_modifiers_agg.R b/R/hpo_modifiers_agg.R index 376ec14..598eee1 100644 --- a/R/hpo_modifiers_agg.R +++ b/R/hpo_modifiers_agg.R @@ -1,11 +1,11 @@ -hpo_modifiers_agg <- function(dt, +hpo_modifiers_agg <- function(dat, by = c("disease_id","hpo_id")){ modifier <- modifier_name <- disease_name <- disease_name <- disease_id <- Severity_score <- Severity_score_min <- hpo_id <- NULL; by <- by[1] if(by=="hpo_id"){ - dt2 <- dt[,list( + dt2 <- dat[,list( modifier=paste(unique(modifier),collapse=";"), modifier_name=paste(unique(modifier_name),collapse=";"), modifier_count=paste(table(unlist(modifier_name)),collapse=";"), @@ -16,7 +16,7 @@ hpo_modifiers_agg <- function(dt, Severity_score_min=min(Severity_score, na.rm=TRUE) ), by=by] } else if (by=="disease_id"){ - dt2 <- dt[,list( + dt2 <- dat[,list( disease_name=disease_name, modifier=paste(unique(modifier),collapse=";"), modifier_name=paste(unique(modifier_name),collapse=";"), diff --git a/R/hpo_to_matrix.R b/R/hpo_to_matrix.R index fd8aff9..324c645 100644 --- a/R/hpo_to_matrix.R +++ b/R/hpo_to_matrix.R @@ -55,8 +55,7 @@ hpo_to_matrix <- function(terms = NULL, phenotype_to_genes[,dummy:=1] value.var <- "dummy" } else if(grepl("^evidence_score",value.var)){ - phenotype_to_genes <- add_evidence(phenos = phenotype_to_genes, - verbose = verbose) + phenotype_to_genes <- add_evidence(phenos = phenotype_to_genes) } else if(!value.var %in% names(phenotype_to_genes)){ stp <- paste("value.var not found in phenotype_to_genes.") stop(stp) @@ -82,7 +81,10 @@ hpo_to_matrix <- function(terms = NULL, #### Format and return #### if(isTRUE(as_matrix) || isTRUE(run_cor)){ - X <- as.matrix(X_dt[,-meta_vars,with=FALSE])|> `rownames<-`(rn) + X <- dt_to_matrix(dat=X_dt, + omit_cols = meta_vars, + rownames = rn, + as_sparse = FALSE) if(isTRUE(run_cor)){ messager("Computing all parwise correlations.",v=verbose) X_cor <- stats::cor(X, method = method) diff --git a/R/ids_to_mondo.R b/R/ids_to_mondo.R deleted file mode 100644 index 97d738f..0000000 --- a/R/ids_to_mondo.R +++ /dev/null @@ -1,12 +0,0 @@ -mondo_dict <- function(mondo=get_mondo(), - ids=NULL){ - xref <- unlist(mondo$xref) - # xref <- xref[names(xref) %in% names(mondo$id)] - dict <- xref |> invert_dict() - names(dict) <- gsub("^Orphanet","ORPHA",names(dict)) - if(!is.null(ids)) { - return(dict[ids]) - } else { - return(dict) - } -} diff --git a/R/igraph_to_ggnetwork.R b/R/igraph_to_ggnetwork.R deleted file mode 100644 index b494293..0000000 --- a/R/igraph_to_ggnetwork.R +++ /dev/null @@ -1,5 +0,0 @@ -igraph_to_ggnetwork <- function(g){ - g |> - igraph::simplify() |> - ggnetwork::fortify() -} diff --git a/R/igraph_to_plotly.R b/R/igraph_to_plotly.R deleted file mode 100644 index 0fdf6a9..0000000 --- a/R/igraph_to_plotly.R +++ /dev/null @@ -1,69 +0,0 @@ -#' igraph to plotly data -#' -#' Convert an igraph to data for a 3D plotly plot. -#' @param dim Number of dimensions to create layout in. -#' @inheritParams network_3d -#' @returns Named list of data.frames. -#' -#' @keywords internal -#' @importFrom data.table data.table as.data.table .SD := -igraph_to_plotly <- function(g, - layout_func = igraph::layout.fruchterman.reingold, - dim = 3, - seed = 2023, - verbose = TRUE){ - - requireNamespace("igraph") - .SD <- NULL; - - set.seed(seed) - messager("Converting igraph to plotly data.",v=verbose) - layout_coords <- cbind( - name=names(igraph::V(g)), - layout_func(g,dim=dim) |> - data.table::as.data.table()|> - `colnames<-`(c("x","y","z")[seq(dim)]) - ) - # d <- data.table::as.data.table(igraph::as_edgelist(g)) - d <- igraph::as_data_frame(g, what = "both") - # d <- igraph::as_long_data_frame(g) - # gnet <- data.table::data.table( - # igraph_to_ggnetwork(g) - # )[,-c("x","y","xend","yend")] - # if("node_type" %in% names(gnet)){ - # gnet <- unique(gnet[node_type %in% c("hpo_name","ancestor_name")]) - # } - - #### Vertex data #### - #### Merge layout coordinates with vertex metadata #### - vdf <- merge( - layout_coords, - d$vertices, - by = "name", - all = TRUE - ) - ## Make NAs into characters so that they still get assigned a point color - cols <- names(vdf)[sapply(vdf, class) == 'character'] - vdf[, (cols) := lapply(.SD, replace,NA,"NA"),.SDcols=cols] - #### Edge data #### - edf <- data.table::merge.data.table( - data.table::data.table(d$edges), - layout_coords, - by.x="from", - by.y="name" - ) |> data.table::merge.data.table( - data.table::copy(layout_coords) |> - data.table::setnames(c("x","y","z")[seq(dim)], - c("xend","yend","zend")[seq(dim)] - ), - by.x="to", - by.y="name" - ) - - if(!"hpo_name" %in% names(vdf)){ - vdf$hpo_name <- harmonise_phenotypes(phenotypes = vdf$hpo_id, - as_hpo_ids = FALSE) - } - return(list(vertices=vdf, - edges=edf)) -} diff --git a/R/invert_dict.R b/R/invert_dict.R deleted file mode 100644 index 14100f8..0000000 --- a/R/invert_dict.R +++ /dev/null @@ -1,3 +0,0 @@ -invert_dict <- function(dict){ - stats::setNames(names(dict),unname(dict)) -} diff --git a/R/kde_surface.R b/R/kde_surface.R deleted file mode 100644 index 416f842..0000000 --- a/R/kde_surface.R +++ /dev/null @@ -1,57 +0,0 @@ -#' Kernel Density Estimation (KDE) surface -#' -#' Compute a KDE surface based on the density of points along -#' an x- and y-axis. -#' @param xyz A data.frame containing the columns "x" and "y" (required). -#' Can also include a third column "z" (optional). -#' @param extend_kde Extend the limits of the KDE area by setting -#' \code{extend_kde>1}. -#' This makes the "mountains' of the KDE end more smoothly along the edges of -#' the x/y coordinates. -#' @param rescale_z Rescale the z-axis so that the KDE can be positioned -#' below the original data points (when plotting them together). -#' @inheritParams MASS::kde2d -#' @returns -#' \itemize{ -#' \item{\code{x, y}: }{ -#' The x and y coordinates of the grid points, vectors of length n. -#' } -#' \item{\code{z}: }{ -#' An \code{n[1]} by \code{n[2]} matrix of the estimated density: -#' rows correspond to the value of \code{x}, columns to the value of \code{y}. -#' } -#' } -#' -#' @export -#' @examples -#' xyz <- data.frame(x=stats::rnorm(50), -#' y=stats::rnorm(50), -#' z=stats::rnorm(50)) -#' kd <- kde_surface(xyz = xyz) -kde_surface <- function(xyz, - n = 50, - extend_kde = 1, - rescale_z = TRUE){ - # devoptera::args2vars(kde_surface) - requireNamespace("MASS") - requireNamespace("scales") - - kd <- MASS::kde2d(x = xyz$x, - y = xyz$y, - n = n, - lims = c(range(xyz$x), - range(xyz$y) - )*extend_kde) - #### Rescale the z-axis so that the KDE plot is below the points #### - if(isTRUE(rescale_z)){ - if("z" %in% names(xyz)){ - kd$z <- scales::rescale(x = kd$z, - to = c(min(xyz$z*2), - min(xyz$z)-.25)) - } else{ - wrn <- "Cannot rescale_z when z column not present in xyz data.frame." - warning(wrn) - } - } - return(kd) -} diff --git a/R/list_deaths.R b/R/list_deaths.R index 932273d..9d91160 100644 --- a/R/list_deaths.R +++ b/R/list_deaths.R @@ -1,3 +1,4 @@ +#' @describeIn map_ map_ #' List age of death HPO terms #' #' List age of death phenotypes in the HPO. @@ -10,22 +11,19 @@ #' deaths <- list_deaths() list_deaths <- function(hpo = get_hpo(), exclude = FALSE,# c("Miscarriage","Stillbirth","Prenatal death" ) - as_hpo_ids = FALSE, - include_na = TRUE, - verbose = TRUE){ - # devoptera::args2vars(list_opts) - - opts <- harmonise_phenotypes(phenotypes = names(hpo_dict(type = "AgeOfDeath")), - hpo = hpo, - as_hpo_ids = TRUE, - keep_order = FALSE, - invert = TRUE, - verbose = verbose) + to = c("name","id"), + include_na = TRUE){ + to <- match.arg(to) + opts <- map_phenotypes(terms = names(hpo_dict(type = "AgeOfDeath")), + hpo = hpo, + to="id", + keep_order = FALSE, + invert = TRUE) if(!is.null(exclude)){ opts <- opts[!grepl(paste(exclude,collapse = "|"),opts, ignore.case = TRUE)] } - if(isTRUE(as_hpo_ids)){ + if(to=="id"){ ids <- names(opts) if(isTRUE(include_na)){ ids <- c(ids,NA) diff --git a/R/list_onsets.R b/R/list_onsets.R index 5b36850..153cc47 100644 --- a/R/list_onsets.R +++ b/R/list_onsets.R @@ -1,8 +1,8 @@ +#' @describeIn map_ map_ #' List age of onset HPO terms #' #' List age of onset phenotypes in the HPO. #' @param exclude HPO phenotype names to exclude. -#' @param as_hpo_ids Return as a character vector vector HPO IDs only. #' @param include_na Include NA values for onset. #' @inheritParams make_phenos_dataframe #' @returns Named list of HPO IDs. @@ -12,22 +12,20 @@ #' onsets <- list_onsets() list_onsets <- function(hpo = get_hpo(), exclude = FALSE, #c("Antenatal","Fetal","Congenital") - as_hpo_ids = FALSE, + to = c("name","id"), include_na = TRUE, verbose = TRUE){ - # devoptera::args2vars(list_onsets) - - opts <- harmonise_phenotypes(phenotypes = names(hpo_dict(type = "onset")), - hpo = hpo, - as_hpo_ids = TRUE, - keep_order = FALSE, - invert = TRUE, - verbose = verbose) + to <- match.arg(to) + opts <- map_phenotypes(terms = names(hpo_dict(type = "onset")), + hpo = hpo, + to="id", + keep_order = FALSE, + invert = TRUE) if(!is.null(exclude)){ opts <- opts[!grepl(paste(exclude,collapse = "|"),opts, ignore.case = TRUE)] } - if(isTRUE(as_hpo_ids)){ + if(to=="id"){ ids <- names(opts) if(isTRUE(include_na)){ ids <- c(ids,NA) diff --git a/R/load_decipher_genes.R b/R/load_decipher_genes.R deleted file mode 100644 index f0f201b..0000000 --- a/R/load_decipher_genes.R +++ /dev/null @@ -1,27 +0,0 @@ -load_decipher_genes <- function(verbose=TRUE){ - - requireNamespace("R.utils") - disease_name <- disease_name <- ID <- `disease_id` <- NULL; - - messager("Loading DECIPHER genes.",v=verbose) - #### UCSC bigbed file #### - # "https://hgdownload.soe.ucsc.edu/gbdb/hg38/" - - #### EBI annotations file #### - dat <- data.table::fread( - "https://www.ebi.ac.uk/gene2phenotype/downloads/DDG2P.csv.gz") - names(dat) <- gsub(" ","_",names(dat)) - dat[,disease_name:=tolower(disease_name)] - ## Bizarrely, the do not provide their disease IDs in their own database. - ## Have to instead infer from the HPO data - d <- load_phenotype_to_genes("phenotype.hpoa") - d <- d[grepl("DECIPHER",d$disease_id),] - d[,ID:=disease_id][,disease_name:=tolower(disease_name)] - dat <- data.table::merge.data.table(dat, - d[,c("ID","disease_name")], - by.x = "disease_name", - by.y = "disease_name", - all.x = TRUE) - # sum(!is.na(dat$ID)) - return(dat) -} diff --git a/R/load_disease_genes.R b/R/load_disease_genes.R deleted file mode 100644 index d7b1fdb..0000000 --- a/R/load_disease_genes.R +++ /dev/null @@ -1,31 +0,0 @@ -#' Load disease genes -#' -#' Load gene lists associated with each disease phenotype from: -#' \itemize{ -#' \item{DECIPHER} -#' \item{ORPHANET} -#' \item{OMIM} -#' } -#' @param verbose Print messages. -#' @returns data.table -#' -#' @export -#' @importFrom data.table := rbindlist -#' @examples -#' dat <- load_disease_genes() -load_disease_genes <- function(verbose=TRUE){ - # devoptera::args2vars(load_disease_genes) - - #### Import data #### - decipher <- load_decipher_genes(verbose = verbose) - orphanet <- load_orphanet_genes(verbose = verbose) - omim <- load_omim_genes(verbose = verbose) - #### Merge data #### - nms <- c("disease_id","disease_name_og","gene_symbol") - dgenes <- list( - DECIPHER=decipher[,c("ID","disease_name","gene_symbol")] |> `names<-`(nms), - ORPHANET=orphanet[,c("Orphanet_ID","phenotype","gene")] |> `names<-`(nms), - OMIMG=omim[,c("OMIMID","Disease","GeneSym")] |> `names<-`(nms) - ) |> data.table::rbindlist(fill = TRUE,use.names = TRUE, idcol = "Database") - return(dgenes) -} diff --git a/R/load_harmonizome_genes.R b/R/load_harmonizome_genes.R deleted file mode 100644 index e98b2af..0000000 --- a/R/load_harmonizome_genes.R +++ /dev/null @@ -1,11 +0,0 @@ -load_harmonizome_genes <- function(dataset){ - # dataset <- c("omim","clinvar") - dataset <- tolower(dataset[1]) - baseurl <- paste0( - "https://maayanlab.cloud/static/hdfs/harmonizome/data/",dataset - ) - genes <- data.table::fread(paste0(baseurl,"/gene_list_terms.txt.gz")) - terms <- data.table::fread(paste0(baseurl,"/attribute_list_entries.txt.gz")) - dat <- data.table::merge.data.table(terms, genes) - return(dat) -} diff --git a/R/load_omim_genes.R b/R/load_omim_genes.R deleted file mode 100644 index 8be2f6e..0000000 --- a/R/load_omim_genes.R +++ /dev/null @@ -1,12 +0,0 @@ -load_omim_genes <- function(verbose=TRUE){ - - messager("Loading OMIM genes.",v=verbose) - #### From enrichr ##### - ## Outdated file - # dat <- read_enrichr(file = "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=OMIM_Expanded") - - #### From Harmonizome #### - # j <- jsonlite::fromJSON("https://maayanlab.cloud/Harmonizome/api/1.0/dataset/OMIM+Gene-Disease+Associations") - dat <- load_harmonizome_genes(dataset = "omim") - return(dat) -} diff --git a/R/load_orphanet_genes.R b/R/load_orphanet_genes.R deleted file mode 100644 index 644ac3a..0000000 --- a/R/load_orphanet_genes.R +++ /dev/null @@ -1,35 +0,0 @@ -load_orphanet_genes <- function(verbose=TRUE){ - - Orphanet_ID <- NULL; - messager("Loading Orphanet genes.",v=verbose) - # URL <- "https://hgdownload.soe.ucsc.edu/gbdb/hg38/bbi/orphanet/orphadata.bb" - # bb <- rtracklayer::import(URL) - - #### From Orphanet XML file #### - # (they don't provide the data in any easy-to-use format) - # xml <- xml2::read_xml("https://www.orphadata.com/data/xml/en_product6.xml") - # xml_list <- xml2::as_list(xml) - # df <- lapply(xml_list$JDBOR$DisorderList, function(x){ - # data.table::data.table( - # t(unlist(x[names(x)!="DisorderGeneAssociationList"])), - # t(unlist(x$DisorderGeneAssociationList$DisorderGeneAssociation$Gene[ - # c("Symbol") - # ])), - # t(unlist(x$DisorderGeneAssociationList$DisorderGeneAssociation[ - # c("SourceOfValidation","DisorderGeneAssociationType","DisorderGeneAssociationStatus") - # ])) - # ) - # }) |> data.table::rbindlist(fill = TRUE) - - #### From enrichrR #### - dat <- read_enrichr(file = "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=Orphanet_Augmented_2021") - splt <- cbind( - ( - data.table::as.data.table( - stringr::str_split(dat$term,"ORPHA:",simplify = TRUE)) |> - `colnames<-`(c("phenotype","Orphanet_ID")) - )[,Orphanet_ID:=paste0("ORPHA:",Orphanet_ID)], - gene=dat$gene - ) - return(splt) -} diff --git a/R/load_phenotype_to_genes.R b/R/load_phenotype_to_genes.R index cffb1ff..1ad0d48 100644 --- a/R/load_phenotype_to_genes.R +++ b/R/load_phenotype_to_genes.R @@ -20,8 +20,9 @@ load_phenotype_to_genes <- function(file = c("phenotype_to_genes.txt", "genes_to_phenotype.txt", "phenotype.hpoa"), save_dir = file.path( - tools::R_user_dir("HPOExplorer", - which="cache"), + KGExplorer::cache_dir( + package = "HPOExplorer" + ), "data"), repo = paste( "obophenotype", @@ -49,7 +50,7 @@ load_phenotype_to_genes <- function(file = c("phenotype_to_genes.txt", !overwrite){ messager("Reading cached RDS file:",file,v=verbose) obj <- readRDS(path_rds) - get_version(obj,verbose=verbose) + KGExplorer::get_version(obj,verbose=verbose) return(obj) } #### Stored in GitHub Releases #### @@ -72,7 +73,7 @@ load_phenotype_to_genes <- function(file = c("phenotype_to_genes.txt", #### Add version #### if(!is.null(attr(f,"version"))){ attr(phenotype_to_genes,"version") <- attr(f,"version") - get_version(phenotype_to_genes,verbose=verbose) + KGExplorer::get_version(phenotype_to_genes, verbose=verbose) } #### Save RDS #### ## This lets up keep track of the version of the data without having to diff --git a/R/make_hoverboxes.R b/R/make_hoverboxes.R deleted file mode 100644 index 30c5355..0000000 --- a/R/make_hoverboxes.R +++ /dev/null @@ -1,69 +0,0 @@ -#' Make hoverboxes -#' -#' A hoverbox is a box of text that shows up when the cursor -#' hovers over something. -#' These can be useful when making interactive network plots -#' of the HPO phenotypes because we can include a hoverbox that gives -#' information and data associated with each phenotype. -#' -#' This function expects a dataframe of with a "hpo_name" column that has the -#' name of each phenotype. It must then include columns -#' for all of the parameters you wish to include in the hoverbox. -#' @inheritParams make_phenos_dataframe -#' @inheritParams make_network_object -#' @inheritParams base::round -#' @inheritParams stringr::str_wrap -#' @returns A nicely formatted string with newlines etc, -#' to be used as a hoverbox. -#' -#' @export -#' @importFrom stats setNames -#' @importFrom data.table := -#' @importFrom stringr str_wrap -#' @examples -#' phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay", -#' add_hoverboxes = FALSE) -#' phenos <- make_hoverboxes(phenos = phenos) -make_hoverboxes <- function(phenos, - columns = list_columns(), - interactive = TRUE, - width = 60, - digits = 3, - verbose = TRUE) { - # templateR:::source_all() - # devoptera::args2vars(make_hoverboxes) - - hover <- hpo_id <- hpo_name <- NULL; - - #### Select sep #### - sep <- if(isTRUE(interactive)) "
" else "\n" - columns <- columns[unname(columns) %in% names(phenos)] - if(length(columns)==0){ - messager("No columns found. Making hoverboxes from hpo_id only.",v=verbose) - phenos[hover:=paste("hpo_id:",hpo_id)] - } else { - messager("Making hoverboxes from:", - paste(shQuote(columns),collapse = ", "),v=verbose) - #### Iterate over phenotypes #### - hoverBoxes <- lapply(stats::setNames( - unique(phenos$hpo_name), - unique(phenos$hpo_name) - ), function(pheno_i){ - lapply(seq(length(columns)), function(i){ - val <- phenos[hpo_name == pheno_i, ][1,][[columns[[i]]]] - val <- if(is.numeric(val)) round(val,digits = digits) else { - paste( - stringr::str_wrap(val, width = width), - collapse = sep - ) - } - paste0("",names(columns)[[i]],"", - ": ",val - ) - }) |> paste(collapse = sep) - }) - #### Assign to each row ##### - phenos$hover <- unlist(hoverBoxes[phenos$hpo_name]) - } - return(phenos) -} diff --git a/R/make_igraph_object.R b/R/make_igraph_object.R index 7a9832c..a5a04fc 100644 --- a/R/make_igraph_object.R +++ b/R/make_igraph_object.R @@ -1,3 +1,4 @@ +#' @describeIn make_ make_ #' Make an \link[igraph]{igraph} object #' #' This uses the network package to coerce the adjacency matrix into a @@ -8,8 +9,6 @@ #' hpo_id. #' @inheritParams make_phenos_dataframe #' @inheritParams make_network_object -#' @inheritParams ggnetwork::fortify.network -#' @inheritDotParams ggnetwork::ggnetwork #' @returns A \link[igraph]{igraph} object. #' #' @export @@ -18,11 +17,7 @@ #' g <- make_igraph_object(phenos = phenos) make_igraph_object <- function(phenos, hpo = get_hpo(), - adjacency = adjacency_matrix( - terms = phenos$hpo_id, - hpo = hpo), colour_var = "fold_change", - add_ont_lvl_absolute = FALSE, cols = list_columns( extra_cols = c( colour_var, @@ -31,27 +26,20 @@ make_igraph_object <- function(phenos, value = TRUE) ) ), - layout = "fruchtermanreingold", - verbose = TRUE, ... ){ - requireNamespace("igraph") - phenoNet <- make_network_object(phenos = phenos, - hpo = hpo, - adjacency = adjacency, - colour_var = colour_var, - add_ont_lvl_absolute = add_ont_lvl_absolute, - cols = cols, - verbose = verbose, - ...) - messager("Creating igraph object.",v=verbose) - vertices <- unique( - phenoNet[,!names(phenoNet) %in% c("x","y","vertex.names","xend","yend")] - ) - rownames(vertices) <- vertices$hpo_id - g <- igraph::graph_from_adjacency_matrix(adjacency) - for(n in names(vertices)){ - igraph::vertex_attr(g,name = n) <- vertices[names(igraph::V(g)),n] + requireNamespace("tidygraph") + g <- KGExplorer::ontology_to(ont = hpo, + terms = phenos$hpo_id, + to = "tbl_graph") + pcols <- intersect(names(phenos), names(cols)) + gcols <- KGExplorer:::get_graph_colnames(g) + cols <- setdiff(pcols, gcols) + if(length(cols)>0){ + phenos <- phenos[,cols,with=FALSE] + g <- g |> tidygraph::activate("nodes") |> + tidygraph::left_join(by=c("name" = "hpo_id"), + phenos) } return(g) } diff --git a/R/make_network_object.R b/R/make_network_object.R index 569c3e0..903b493 100644 --- a/R/make_network_object.R +++ b/R/make_network_object.R @@ -1,3 +1,4 @@ +#' @describeIn make_ make_ #' Make a \link[ggnetwork]{ggnetwork} object #' #' This uses the network package to coerce the adjacency matrix into a @@ -6,15 +7,11 @@ #' #' It expects there to be a column of HPO IDs in the phenos dataframe called #' hpo_id. -#' @param phenos dataframe of phenotypes and values / parameters. -#' @param adjacency An adjacency matrix generated -#' by \link[HPOExplorer]{adjacency_matrix}. #' @param colour_var The column from phenos that you wish #' to map to node colour. #' @param cols Columns to add to metadata of \link[ggnetwork]{ggnetwork} object. -#' @inheritParams make_phenos_dataframe -#' @inheritParams ggnetwork::fortify.network -#' @inheritDotParams ggnetwork::ggnetwork +#' @inheritParams ggnetwork::fortify +#' @inheritDotParams ggnetwork::fortify #' @returns A \link[ggnetwork]{ggnetwork} object. #' #' @export @@ -27,10 +24,6 @@ #' colour_var = "ontLvl_geneCount_ratio") make_network_object <- function(phenos, hpo = get_hpo(), - adjacency = - adjacency_matrix( - terms = phenos$hpo_id, - hpo = hpo), colour_var = "fold_change", add_ont_lvl_absolute = FALSE, cols = list_columns( @@ -41,42 +34,35 @@ make_network_object <- function(phenos, value = TRUE) ) ), - layout = "fruchtermanreingold", - verbose = TRUE, + as=c("ggnetwork","tbl_graph"), ...) { - # devoptera::args2vars(make_network_object, reassign = TRUE) - - messager("Making phenotype network object.",v=verbose) - if("hpo_id" %in% names(phenos)){ - adjacency <- adjacency[unique(phenos$hpo_id), - unique(phenos$hpo_id)] - } - if(!"ontLvl" %in% names(phenos) && - isTRUE(add_ont_lvl_absolute)){ - phenos <- add_ont_lvl(phenos = phenos, - hpo = hpo, - absolute = TRUE, - verbose = verbose) - } - #### Create phenoNet obj #### - messager("Creating ggnetwork object.",v=verbose) - phenoNet <- ggnetwork::ggnetwork(x = adjacency, - arrow.gap = 0, - layout = layout, - ...) - cols <- cols[cols %in% names(phenos)] - for(col in cols){ - #### Add metadata to phenoNet #### - phenoNet <- make_node_data(phenoNet = phenoNet, - phenos = phenos, - phenos_column = col, - verbose = verbose) - } - #### Add number of total edges for each node #### - if("hpo_id" %in% names(phenos)){ - messager("Adding n_edges per node.",v=verbose) - phenoNet$n_edges <- rowSums(adjacency)[phenoNet$vertex.names] - } - phenoNet <- unique(phenoNet) - return(phenoNet) + as <- match.arg(as) + messager("Making phenotype network object.") + if(!"ontLvl" %in% names(phenos) && + isTRUE(add_ont_lvl_absolute)){ + phenos <- add_ont_lvl(phenos = phenos, + hpo = hpo, + absolute = TRUE) + } + #### Create phenoNet obj #### + g <- make_igraph_object(phenos, + hpo = hpo, + colour_var = colour_var, + cols = cols) + #### Return tbl_graph object if requested #### + if(as == "tbl_graph") return(g) + #### Proceed to convert to ggnetwork object #### + messager("Creating ggnetwork object.") + phenoNet <- ggnetwork::fortify(g, + ...) + cols <- cols[cols %in% names(phenos)] + #### No longer needed as make_igraph_object() adds these columns #### + # for(col in cols){ + # #### Add metadata to phenoNet #### + # phenoNet <- make_node_data(phenoNet = phenoNet, + # phenos = phenos, + # phenos_column = col) + # } + phenoNet <- unique(phenoNet) + return(phenoNet) } diff --git a/R/make_node_data.R b/R/make_node_data.R index 821002d..c6b7f2e 100644 --- a/R/make_node_data.R +++ b/R/make_node_data.R @@ -1,19 +1,18 @@ make_node_data <- function(phenoNet, - phenos, - phenos_column, - new_column=phenos_column, - verbose=TRUE) { + phenos, + phenos_column, + new_column=phenos_column) { if(!phenos_column %in% names(phenos)){ messager(paste0("phenos_column=",shQuote(phenos_column)), - "not found in phenos.",v=verbose) + "not found in phenos.") return(phenoNet) } - messager("Adding",paste0("new_column=",shQuote(new_column)),v=verbose) + messager("Adding",paste0("new_column=",shQuote(new_column))) # data.table::setkeyv(phenos,"hpo_id") # phenoNet[[new_column]] <- phenos[phenoNet$vertex.names, ][[phenos_column]] dat <- unique(phenos[,c("hpo_id",phenos_column), with=FALSE]) data_dict <- stats::setNames(dat[[phenos_column]], dat$hpo_id) - phenoNet[[new_column]] <- data_dict[phenoNet$vertex.names] + phenoNet[[new_column]] <- data_dict[phenoNet$name] return(phenoNet) } diff --git a/R/make_ontology.R b/R/make_ontology.R deleted file mode 100644 index 5b77dc1..0000000 --- a/R/make_ontology.R +++ /dev/null @@ -1,58 +0,0 @@ -#' Make an ontologyIndex object -#' -#' Make a \link[ontologyIndex]{ontology_index} -#' object from the Open Biomedical Ontologies (OBO) file. -#' By default, this function uses the Human Phenotype Ontology (HPO) -#' distributed via the official -#' \href{https://github.com/obophenotype/human-phenotype-ontology/releases}{ -#' HPO GitHub Releases}. -#' @param fix_ascii Fix non-ASCII characters in metadata. -#' @inheritParams get_data -#' @inheritParams piggyback::pb_download -#' @inheritParams ontologyIndex::get_OBO -#' @inheritDotParams ontologyIndex::get_OBO -#' @returns \link[ontologyIndex]{ontology_index} object -#' -#' @keywords internal -#' @importFrom ontologyIndex get_OBO -make_ontology <- function(file = "hp-base.obo", - repo = "obophenotype/human-phenotype-ontology", - tag = "latest", - save_dir = tools::R_user_dir( - package = "HPOExplorer", - which = "cache" - ), - extract_tags = "everything", - fix_ascii = TRUE, - overwrite = TRUE, - upload_tag = NULL, - ...){ - - #### Download data #### - f <- get_data(file = file, - repo = repo, - tag = tag, - save_dir = save_dir, - overwrite = overwrite) - #### Import data #### - hpo <- ontologyIndex::get_OBO(file = f, - extract_tags = "everything", - ...) - #### Fix non-ASCII characters in metadata #### - if(fix_ascii){ - func <- function(v){ - Encoding(v) <- "latin1" - iconv(v, "latin1", "UTF-8") - } - attributes(hpo)$version <- func(attributes(hpo)$version) - } - if(!is.null(upload_tag)){ - save_path <- file.path(save_dir,gsub("\\.obo",".rds",file)) - saveRDS(hpo,save_path) - piggyback::pb_upload(file = save_path, - repo = "neurogenomics/HPOExplorer", - overwrite = TRUE, - tag = upload_tag) - } - return(hpo) -} diff --git a/R/make_phenos_dataframe.R b/R/make_phenos_dataframe.R index 6513d50..d9caf6a 100644 --- a/R/make_phenos_dataframe.R +++ b/R/make_phenos_dataframe.R @@ -1,58 +1,22 @@ +#' @describeIn make_ make_ #' Make phenotypes dataframe #' #' Make a dataframe from a subset of the Human Phenotype Ontology. -#' @param hpo Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}. -#' @param phenotype_to_genes Output of -#' \link[HPOExplorer]{load_phenotype_to_genes} mapping phenotypes -#' to gene annotations. -#' @param ancestor The ancestor to get all descendants of. If \code{NULL}, -#' returns the entirely ontology. -#' @param add_ont_lvl_absolute Add the absolute ontology level of each HPO term. -#' See \link[HPOExplorer]{get_ont_lvls} for more details. -#' @param add_ont_lvl_relative Add the relative ontology level of each HPO term. -#' See \link[HPOExplorer]{get_ont_lvls} for more details. -#' @param add_description Whether to get the phenotype descriptions as -#' well (slower). -#' @param add_info_contents Add information content column for each phenotype. -#' @param add_disease_data Add all disease metadata columns. -#' This will expand the data using \code{allow.cartesian=TRUE}. -#' @param add_ndiseases Add the number of diseases per phenotype. -#' @param add_pheno_frequencies Add the frequency of each phenotype in -#' each disease. -#' @param add_tiers Add severity Tiers column using -#' \link[HPOExplorer]{add_tier}. -#' @param add_severities Add severity column using -#' \link[HPOExplorer]{add_severity}. -#' @param add_hoverboxes Add hoverdata with -#' \link[HPOExplorer]{make_hoverboxes}. -#' @param add_onsets Add age of onset columns using -#' \link[HPOExplorer]{add_onset}. -#' @param add_deaths Add age of death columns using -#' \link[HPOExplorer]{add_death}. -#' @param columns A named vector of columns in \code{phenos} -#' to add to the hoverdata via \link[HPOExplorer]{make_hoverboxes}. -#' @param add_disease_definitions Add disease definitions. -#' @param include_mondo Add \href{https://mondo.monarchinitiative.org/}{MONDO} -#' IDs, names, and definitions to each disease. -#' @param verbose Print messages. #' @inheritParams ggnetwork_plot #' @inheritParams make_network_object #' @returns The HPO in dataframe format. #' #' @export -#' @importFrom ontologyIndex get_descendants -#' @importFrom data.table setnames := .N #' @examples #' phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") make_phenos_dataframe <- function(ancestor = NULL, hpo = get_hpo(), phenotype_to_genes = load_phenotype_to_genes(), - adjacency = NULL, ##### Phenotype metadata #### add_ont_lvl_absolute = TRUE, add_ont_lvl_relative = FALSE, - add_info_contents = FALSE, + add_info_content = FALSE, add_description = TRUE, #### Disease/symptom metadata #### add_disease_data = FALSE, @@ -70,34 +34,30 @@ make_phenos_dataframe <- function(ancestor = NULL, interactive = TRUE, verbose = TRUE ){ - # devoptera::args2vars(make_phenos_dataframe) - # ancestor = "Neurodevelopmental delay" - hpo_id <- . <- NULL; if(!is.null(ancestor)){ - IDx <- get_hpo_id_direct(phenotype = ancestor, + IDx <- get_hpo_id_direct(term = ancestor, hpo = hpo) - IDx_all <- ontologyIndex::get_descendants(ontology = hpo, - roots = IDx, - exclude_roots = FALSE) + IDx_all <- simona::dag_offspring(hpo, + term=IDx, + include_self = TRUE) descendants <- phenotype_to_genes[hpo_id %in% IDx_all,] } else { descendants <- phenotype_to_genes } messager("Extracting data for", formatC(length(unique(descendants$hpo_name)),big.mark = ","), - "descendents.",v=verbose) - messager("Computing gene counts.",v=verbose) + "descendents.") + messager("Computing gene counts.") phenos <- descendants[,.(geneCount=.N), by=c("hpo_id","hpo_name")] #### Annotate phenotypes #### phenos <- annotate_phenos(phenos = phenos, hpo = hpo, - adjacency = adjacency, ##### Phenotype metadata #### add_ont_lvl_absolute = add_ont_lvl_absolute, add_ont_lvl_relative = add_ont_lvl_relative, - add_info_contents = add_info_contents, + add_info_content = add_info_content, add_description = add_description, #### Disease/symptom metadata #### add_disease_data = add_disease_data, @@ -112,7 +72,6 @@ make_phenos_dataframe <- function(ancestor = NULL, #### Extra ##### add_hoverboxes = add_hoverboxes, columns = columns, - interactive = interactive, - verbose = verbose) + interactive = interactive) return(phenos) } diff --git a/R/assign_tiers.R b/R/make_tiers.R similarity index 77% rename from R/assign_tiers.R rename to R/make_tiers.R index 286d7c5..7df6ed5 100644 --- a/R/assign_tiers.R +++ b/R/make_tiers.R @@ -1,4 +1,5 @@ -#' Add severity Tiers (auto) +#' @describeIn make_ make_ +#' Make severity Tiers (auto) #' #' Automatically add severity Tier for each HPO ID, in accordance with the #' rating system provided by @@ -32,21 +33,15 @@ #' @param search_ancestors Inherit Tiers of ancestors. #' @param search_descendants Inherit Tiers of descendants. #' @param as_datatable Return the results as a \link[data.table]{data.table}. -#' @inheritParams make_phenos_dataframe -#' @inheritParams make_network_object #' @returns Tier assignments for each term in \code{terms}. #' Will be returned as either a named vector or a \link[data.table]{data.table}. #' #' @export -#' @importFrom data.table data.table -#' @importFrom utils data -#' @importFrom stats setNames -#' @importFrom ontologyIndex get_ancestors get_descendants #' @examples -#' terms <- get_hpo()$id[seq(10)] -#' tiers <- assign_tiers(terms = terms) -assign_tiers <- function(hpo=get_hpo(), - terms=hpo$id, +#' terms <- get_hpo()@terms[seq(100)] +#' tiers <- make_tiers(terms = terms) +make_tiers <- function(hpo=get_hpo(), + terms=hpo@terms, keyword_sets = list( Tier1=c("intellectual disability","death"), Tier2=c("impaired mobility","malform"), @@ -62,10 +57,7 @@ assign_tiers <- function(hpo=get_hpo(), search_descendants=TRUE, as_datatable=FALSE, verbose=TRUE){ - - # devoptera::args2vars(assign_tiers) - - disease_name <- definition <- NULL; + disease_name <- NULL; #### Gather sets of IDs in the HPO that would qualify #### annot <- load_phenotype_to_genes("phenotype.hpoa") hpo_sets <- lapply(stats::setNames(names(keyword_sets), @@ -76,28 +68,28 @@ assign_tiers <- function(hpo=get_hpo(), #### Matched in the name ##### if(isTRUE(check_names)){ ids1 <- grep(paste(k,collapse = "|"), - hpo$name,value = TRUE, ignore.case = TRUE) + hpo@elementMetadata$name,value = TRUE, ignore.case = TRUE) ids <- c(ids,ids1) } #### Matched in the definition #### if(isTRUE(check_definitions)){ hp_sub <- grep(paste(k,collapse = "|"), - hpo$def, + hpo@elementMetadata$definition, ignore.case = TRUE,value = TRUE) - ids2 <- hpo$name[names(hp_sub)] + ids2 <- hpo@elementMetadata$name[names(hp_sub)] ids <- c(ids,ids2) } #### Matches in the disease association #### if(isTRUE(check_diseases)){ annot_sub <- annot[grepl(paste(k,collapse = "|"), disease_name, ignore.case = TRUE),] - ids3 <- hpo$name[names(hpo$name) %in% annot_sub$hpo_id] + ids3 <- hpo@terms[hpo@terms %in% annot_sub$hpo_id] ids <- c(ids,ids3) } ids <- ids[!duplicated(names(ids))] messager(paste0("keyword_set=",nm,":"), "Found",formatC(length(unique(ids)),big.mark = ","), - "terms matching category.",v=verbose) + "terms matching category.") return(ids) }) #### Remove phenotypes belonging to multiple tiers #### @@ -108,8 +100,7 @@ assign_tiers <- function(hpo=get_hpo(), terms <- unique(terms) messager("Auto-assigning severity Tiers to", formatC(length(terms),big.mark = ","), - "terms.", - v=verbose) + "terms.") tiers <- lapply(stats::setNames(terms, terms), function(term){ @@ -118,17 +109,20 @@ assign_tiers <- function(hpo=get_hpo(), return("direct") } else if(isTRUE(search_descendants) && any(names(h) %in% - ontologyIndex::get_descendants( - ontology = hpo, - roots = term))){ + simona::dag_offspring( + dag = hpo, + term = term) + ) + ){ return("descendant") } else if(isTRUE(search_ancestors) && any(names(h) %in% - ontologyIndex::get_ancestors( - ontology = hpo, - terms = term))){ + simona::dag_ancestors( + dag = hpo, + term = term) + ) + ){ return("ancestor") - } else { NULL } @@ -136,11 +130,11 @@ assign_tiers <- function(hpo=get_hpo(), }) |> unlist() #### Return #### if(isTRUE(as_datatable)){ - dt <- data.table::data.table(hpo_id=names(tiers), - tier_auto=unname(tiers), - key="hpo_id") - dups <- dt$hpo_id[duplicated(dt$hpo_id)] - return(dt) + dat <- data.table::data.table(hpo_id=names(tiers), + tier_auto=unname(tiers), + key="hpo_id") + dups <- dat$hpo_id[duplicated(dat$hpo_id)] + return(dat) } else { return(tiers) } diff --git a/R/map_models.R b/R/map_models.R deleted file mode 100644 index 9690575..0000000 --- a/R/map_models.R +++ /dev/null @@ -1,26 +0,0 @@ -map_models <- function(){ - - #### Get disease models #### - ## Link by disease (MONDO_ID) - models <- get_monarch("disease_to_model") - top_targets <- MultiEWCE::example_targets$top_targets - top_targets_map <- data.table::merge.data.table( - top_targets, - map, - by.x = "hpo_id", - by.y = "id1", - all = FALSE - ) - top_targets_fmap <- data.table::merge.data.table(top_targets, - map2[,-c("hpo_name")], - by="hpo_id") - messager(round(length(unique(top_targets_map$hpo_id)) / - length(unique(top_targets$hpo_id))*100,1), - "% of hpo_ids mapped across non-human ontologies.") - messager(round(length(unique(top_targets_fmap$hpo_id)) / - length(unique(top_targets$hpo_id))*100,1), - "% of hpo_ids mapped across non-human ontologies.") - - - sum(top_targets$hpo_id %in% map2$hpo_id) -} diff --git a/R/map_phenotypes.R b/R/map_phenotypes.R new file mode 100644 index 0000000..ff64ba1 --- /dev/null +++ b/R/map_phenotypes.R @@ -0,0 +1,26 @@ +#' @describeIn map_ map_ +#' Harmonise phenotypes +#' +#' Harmonise a mixed vector of phenotype names (e.g. "Focal motor seizure") +#' and HPO IDs (e.g. c("HP:0000002","HP:0000003")). +#' @returns Character vector +#' +#' @export +#' @import KGExplorer +#' @examples +#' terms <- c("Focal motor seizure","HP:0000002","HP:0000003") +#' #### As phenotype names #### +#' term_names <- map_phenotypes(terms=terms) +#' #### As HPO IDs #### +#' term_ids <- map_phenotypes(terms=terms, to="id") +map_phenotypes <- function(terms, + hpo = get_hpo(), + to=c("name","id"), + keep_order = TRUE, + invert = FALSE){ + KGExplorer::map_ontology_terms(terms = terms, + ont = hpo, + to = to, + keep_order = keep_order, + invert = invert) +} diff --git a/R/map_upheno.R b/R/map_upheno.R deleted file mode 100644 index e5658de..0000000 --- a/R/map_upheno.R +++ /dev/null @@ -1,69 +0,0 @@ -#' Map phenotypes across uPheno -#' -#' Map phenotypes across species within the Unified Phenotype Ontology (uPheno). -#' First, gathers phenotype-phenotype mappings across ontologies. -#' Next, gathers all phenotype-gene associations for each ontology, -#' converts all genes to human HGNC orthologs, and computes the number of -#' overlapping orthologs between all pairs mapped phenotypes. -#' Finally, plots the results as the proportion of intersecting genes between -#' all pairs of phenotypes. -#' @source https://data.monarchinitiative.org/upheno2/current/qc/index -#' @source https://data.monarchinitiative.org/upheno2/current/upheno-release/all/index.html -#' -#' @param save_dir Directory to save cached data. -#' @param pheno_map_method Method to use for mapping phenotypes across ontologies. -#' \itemize{ -#' \item{"upheno"}{Use uPheno's phenotype-to-phenotype mappings. -#' Contains fewer ontologies but with greater coverage of phenotypes.} -#' \item{"monarch"}{Use Monarch's phenotype-to-phenotype mappings. -#' Contains more ontologies but with less coverage of phenotypes. -#' } -#' } -#' @param gene_map_method Method to use for mapping genes across species. -#' \itemize{ -#' \item{"monarch"}{Use Monarch's gene-to-gene mappings.} -#' \item{"orthogene"}{Use \link[orthogene]{convert_orthologs} to generate -#' gene-to-gene mappings.} -#' } -#' @param force_new Force new data to be downloaded and processed. -#' @param subset_db1 Subset of ontologies to include in the plot. -#' @param save_dir Directory to save cached data. -#' @param show_plot Show the plot. -#' @param fill_scores Fill missing scores in the "equivalence_score" and -#' "subclass_score" columns with this value. These columns represent the -#' quality of mapping between two phenotypes on a scale from 0-1. -#' @param terms A subset of HPO IDs to include in the final dataset and plots -#' (e.g. c("HP:0001508","HP:0001507")). -#' @returns A list containing the data and plot. -#' -#' @export -#' @import data.table -#' @importFrom tools R_user_dir -#' @examples -#' res <- map_upheno() -map_upheno <- function(pheno_map_method=c("upheno","monarch"), - gene_map_method=c("monarch","orthogene"), - subset_db1=c("HP"), - terms=NULL, - fill_scores=NULL, - show_plot=TRUE, - force_new=FALSE, - save_dir=tools::R_user_dir(package = "HPOExplorer", - which = "cache")){ - # devoptera::args2vars(map_upheno) - - #### Get data #### - pheno_map_genes_match <- map_upheno_data(pheno_map_method=pheno_map_method, - gene_map_method=gene_map_method, - fill_scores=fill_scores, - force_new = force_new, - terms=terms) - #### Plots #### - plots <- map_upheno_plot(pheno_map_genes_match, - subset_db1=subset_db1) - #### Return #### - return( - list(data=pheno_map_genes_match, - plots=plots) - ) -} diff --git a/R/map_upheno_data.R b/R/map_upheno_data.R deleted file mode 100644 index a666e86..0000000 --- a/R/map_upheno_data.R +++ /dev/null @@ -1,61 +0,0 @@ -#' Map uPheno data -#' -#' Get uPheno cross-species mapping data by: -#' \itemize{ -#' \item{Downloading cross-species phenotype-phenotype mappings.} -#' \item{Downloading within-spceies phenotype-gene mappings, -#' and converting these genes to human orthologs.} -#' \item{Merging the phenotype-phenotype and phenotype-gene mappings.} -#' \item{Filtering out any mappings that do not have a human ortholog within -#' each respective phenotype.} -#' \item{Calculating the proportion of orthologous genes overlapping across -#' the species-specific phenotype-gene mappings.} -#' \item{Iterating the above steps using multiple methods -#' (\code{pheno_map_method}) and concatenating the results together.} -#' } -#' @param keep_nogenes Logical indicating whether to keep mappings that do not -#' have any orthologous genes. -#' @inheritParams map_upheno -#' @returns A data.table containing the mapped data. -#' -#' @export -#' @examples -#' pheno_map_genes_match <- map_upheno_data() -map_upheno_data <- function(pheno_map_method=c("upheno","monarch"), - gene_map_method=c("monarch","orthogene"), - keep_nogenes=FALSE, - fill_scores=NULL, - terms=NULL, - save_dir=tools::R_user_dir(package = "HPOExplorer", - which = "cache"), - force_new=FALSE){ - # devoptera::args2vars(map_upheno_data) - - #### Check for cached data #### - path <- file.path(save_dir,"pheno_map_genes_match.rds") - if(file.exists(path) && - isFALSE(force_new)){ - ## Read from cache - messager("Importing cached data:",path) - pheno_map_genes_match <- readRDS(path) - } else { - ## Create - pheno_map_genes_match <- lapply( - stats::setNames(pheno_map_method, - pheno_map_method), - function(m){ - map_upheno_data_i(pheno_map_method=m, - gene_map_method=gene_map_method, - keep_nogenes=keep_nogenes, - fill_scores=fill_scores, - terms=terms) - }) |> data.table::rbindlist(fill=TRUE, idcol = "pheno_map_method") - ## Save - messager("Caching processed file -->",path) - attr(pheno_map_genes_match,"version") <- format(Sys.Date(), "%Y-%m-%d") - dir.create(save_dir, showWarnings = FALSE, recursive = TRUE) - saveRDS(pheno_map_genes_match,path) - } - ## Return - return(pheno_map_genes_match) -} diff --git a/R/map_upheno_data_i.R b/R/map_upheno_data_i.R deleted file mode 100644 index 575a54b..0000000 --- a/R/map_upheno_data_i.R +++ /dev/null @@ -1,198 +0,0 @@ -map_upheno_data_i <- function(pheno_map_method, - gene_map_method, - keep_nogenes, - fill_scores, - terms){ - hgnc_id <- gene_label1 <- subject <- subject_taxon_label <- - hgnc_label <- n_phenotypes <- n_genes_intersect <- - prop_intersect <- p1 <- p2 <- db1 <- db2 <- id1 <- id2 <- - equivalence_score <- subclass_score <- hgnc_label1 <- - hgnc_label2 <- hgnc_id2 <- gene_id <- gene_label1 <- gene_label2 <- - n_genes_db1 <- object <- - n_genes_db2 <- subject_taxon_label1 <- subject_taxon_label2 <- - phenotype_genotype_score <- equivalence_score <- NULL; - - pheno_map_method <- pheno_map_method[1] - gene_map_method <- gene_map_method[1] - messager(paste0("map_upheno_data: pheno_map_method=", - shQuote(pheno_map_method))) - #### Import data #### - ## Cross-species phenotype map - { - if(pheno_map_method=="upheno"){ - pheno_map <- get_upheno("bestmatches") - pheno_map[,db1:=gsub("*:.*","",basename(id1))] - } else if(pheno_map_method=="monarch"){ - pheno_map <- get_monarch("phenotype_to_phenotype") |> - data.table::setnames(c("label_x","label_y"),c("label1","label2")) - pheno_map[,id1:=gsub("_",":",basename(p1)) - ][,id2:=gsub("_",":",basename(p2))] - pheno_map[,db1:=gsub("*_.*","",basename(p1)) - ][,db2:=gsub("*_.*","",basename(p2))] - pheno_map[,equivalence_score:=NA][,subclass_score:=NA] - } - #### Filter data #### - if(!is.null(terms)){ - pheno_map <- pheno_map[id1 %in% terms,] - if(nrow(pheno_map)==0){ - stop("No terms found in pheno_map") - } - } - } - - ## Gene-phenotype associations across 8 species - { - genes <- get_monarch("phenotype_to_gene") - data.table::setkeyv(genes,"object") - messager("Unique species with genes:", - data.table::uniqueN(genes$subject_taxon_label)) - # genes[,db:=gsub("*:.*","",object)] - ## Create an ID-label map for each phenotype - genes_map <- genes[,c("object","object_label", - "subject_taxon","subject_taxon_label")] |> unique() - genes_map <- genes_map[,.SD[1], by="object" - ][,db:=gsub("*:.*","",object)] - ## Create an db-species map for each Ontology - species_map <- genes_map[,.SD[1], keyby="db"][,.(db,subject_taxon_label)] - } - - ## Gene-gene homology across 12 species - if(gene_map_method=="monarch"){ - { - homol <- get_monarch("gene_to_gene") - messager("Unique species with ortholog:", - data.table::uniqueN(homol$subject_taxon_label)) - ## Subset to only convert human --> non-human - homol <- homol[grep("^HGNC",subject),] |> - data.table::setnames( - c("subject","subject_label","object","object_label"), - c("hgnc_id","hgnc_label","gene_id","gene_label")) - ## Add human-to-human back into map - hhomol <- (homol[,c("hgnc_id","hgnc_label", - "subject_taxon","subject_taxon_label")] |> unique() - )[,gene_id:=hgnc_id][,gene_label:=hgnc_label] - homol <- rbind(hhomol,homol,fill=TRUE) - message("Unique orthologs: ",data.table::uniqueN(homol$gene_id)) - } - ## Orthogene - } else if(gene_map_method=="orthogene"){ - requireNamespace("orthogene") - homol <- lapply(stats::setNames(unique(genes$subject_taxon_label), - unique(genes$subject_taxon_label)), - function(x){ - - input_species <- if(x=="Xenopus laevis"){ - "Xenopus" - } else if(x=="Caenorhabditis elegans"){ - "celegans" - } else { - x - } - orthogene::convert_orthologs( - gene_df = genes[subject_taxon_label==x,]$subject_label|> - unique(), - # gene_input = "subject_label", - gene_output = "columns", - input_species = input_species, - non121_strategy = "kbs", - output_species = "human") - }) |> - data.table::rbindlist(fill=TRUE, idcol = "subject_taxon_label") |> - data.table::setkeyv(c("subject_taxon_label","input_gene")) - data.table::setnames(homol,"input_gene","subject_label") - message("Unique orthologs: ", - data.table::uniqueN( - homol[subject_taxon_label!="Homo sapiens"]$subject_label)) - } - #### Map non-human genes onto human orthologs #### - { - genes_homol <- data.table::merge.data.table( - genes, - homol[,c("hgnc_id","hgnc_label","gene_id","gene_label")], - by.x="subject", - by.y="gene_id") - messager(formatC(nrow(genes_homol),big.mark = ","),"/", - formatC(nrow(genes),big.mark = ","), - "rows remain after cross-species gene mapping.") - messager(data.table::uniqueN(genes_homol$subject_taxon_label),"/", - data.table::uniqueN(genes$subject_taxon_label), - "species remain after cross-species gene mapping.") - } - - #### Map non-human phenotypes onto human phenotypes #### - #### Merge nonhuman ontology genes with human HPO genes #### - { - pheno_map_genes <- data.table::merge.data.table( - pheno_map, - genes_homol, - by.x="id1", - by.y="object", - all.x = keep_nogenes, - allow.cartesian = TRUE) |> - #### Merge nonhuman ontology genes with human HPO genes #### - data.table::merge.data.table( - genes_homol, - by.x="id2", - by.y="object", - all.y = keep_nogenes, - suffixes = c(1,2), - allow.cartesian = TRUE - ) - ## Fill in missing species for those without gene data - pheno_map_genes[ - is.na(subject_taxon_label1), - subject_taxon_label1:=species_map[db1]$subject_taxon_label] - pheno_map_genes[ - is.na(subject_taxon_label2), - subject_taxon_label2:=species_map[db2]$subject_taxon_label] - ## Add gene counts - pheno_map_genes[,n_genes_db1:=data.table::uniqueN(gene_label1), by="id1"] - pheno_map_genes[,n_genes_db2:=data.table::uniqueN(gene_label2), by="id2"] - ## Report - messager(data.table::uniqueN(pheno_map_genes$subject_taxon_label2),"/", - data.table::uniqueN(genes_homol$subject_taxon_label), - "species remain after cross-species phenotype mapping.") - ## Remove - # remove(genes_human,genes_nonhuman,pheno_map) - } - - #### Count the number of overlapping genes - { - if(isFALSE(keep_nogenes)){ - pheno_map_genes_match <- pheno_map_genes[hgnc_label1==hgnc_label2,] - } else { - pheno_map_genes_match <- pheno_map_genes |> data.table::copy() - } - pheno_map_genes_match <- - pheno_map_genes_match[, - list(n_genes_intersect=data.table::uniqueN(hgnc_id2)), - by=c("id1","db1","label1","n_genes_db1", - "id2","db2","label2","n_genes_db2", - "subject_taxon1","subject_taxon_label1", - "subject_taxon2","subject_taxon_label2", - "equivalence_score","subclass_score") - ] |> - data.table::setorderv("n_genes_intersect",-1) - pheno_map_genes_match[,n_phenotypes:=data.table::uniqueN(id1), - by=c("db1","db2", - "subject_taxon1","subject_taxon2", - "subject_taxon_label1","subject_taxon_label2" - )] - pheno_map_genes_match[,prop_intersect:=(n_genes_intersect/n_genes_db1)] - ## Compute a score that captures both the phenotype mapping score and - ## the poportional gene overlap score. - pheno_map_genes_match[,phenotype_genotype_score:=data.table::nafill( - (equivalence_score*prop_intersect)^(1/2),fill = 0)] - # remove(pheno_map_genes) - } - ## Fill missing data - if(!is.null(fill_scores)){ - data.table::setnafill(x = pheno_map_genes_match, - fill = fill_scores, - cols=c("equivalence_score","subclass_score")) - } - ## Check that the number of intersecting nonhuman ontology genes is always - ## less than or equal to the number of total HPO genes. - # pheno_map_genes_match[n_genes>n_genes_hpo,] - return(pheno_map_genes_match) -} diff --git a/R/map_upheno_heatmap.R b/R/map_upheno_heatmap.R deleted file mode 100644 index 9ad5911..0000000 --- a/R/map_upheno_heatmap.R +++ /dev/null @@ -1,97 +0,0 @@ -map_upheno_heatmap <- function(plot_dat, - hpo_ids=NULL, - value.var=c("phenotype_genotype_score", - "prop_intersect", - "equivalence_score", - "subclass_score"), - min_rowsums=NULL, - cluster_from_ontology=FALSE, - save_dir=tempdir(), - height = 15, - width = 10){ - requireNamespace("scKirby") - requireNamespace("ComplexHeatmap") - id1 <- hpo_id <- NULL; - set.seed(2023) - value.var <- match.arg(value.var) - # hpo_ids <- MultiEWCE::example_targets$top_targets$hpo_id - - ### Subset phenotypes - if(!is.null(hpo_ids)) plot_dat <- plot_dat[id1 %in% unique(hpo_ids)] - plot_dat[,hpo_id:=id1][,label1:=gsub(" (HPO)","",label1,fixed = TRUE)] - plot_dat <- add_ancestor(plot_dat) - data.table::setkeyv(plot_dat,"label1") - - ### Plot - X <- data.table::dcast.data.table(plot_dat, - formula = label1 ~ subject_taxon_label2, - fill = 0, - value.var = value.var, - fun.aggregate = mean, - na.rm=TRUE) |> - scKirby::to_sparse() - #### Filter by min_rowsums #### - if(!is.null(min_rowsums)){ - X <- X[Matrix::rowSums(X, na.rm = TRUE)>=min_rowsums,] - } - if(nrow(X)==0) stop("No rows remain after filtering by `min_rowsums`.") - - #### Get clusters from ontology #### - if(isTRUE(cluster_from_ontology)){ - ids <- harmonise_phenotypes(plot_dat$id1,keep_order = FALSE) - ids <- ids[ids %in% rownames(X)] - ## best to do this on the entire HPO, then subset - cluster_rows <- convert_ontology(to="igraph_dist_hclust", - terms = names(ids)) - # leaves <- dendextend::get_leaves_attr(cluster_rows,"label") - # c2 <- dendextend::prune(cluster_rows, - # leaves[!leaves %in% names(ids)], - # keep_branches = FALSE) - ## subset to only those in the heatmap - X <- X[ids,] - } else { - cluster_rows <- TRUE - } - #### Add annotation #### - annot_dat <- plot_dat[rownames(X), - c("label1","n_genes_db1", - "ancestor_name")] |> unique() - col_fun <- colorRamp2::colorRamp2( - seq(min(annot_dat$n_genes_db1), - max(annot_dat$n_genes_db1), - length.out=4), - pals::gnuplot(4)) - la <- ComplexHeatmap::rowAnnotation( - df=data.frame(annot_dat[,-c("label1")], - row.names = annot_dat$label1), - # ?ComplexHeatmap::Legend - show_legend = c(TRUE, FALSE), - annotation_legend_param = list( - n_genes_db1= list(title = "HPO genes", - col_fun = col_fun)) - ) - row_split <- if(isFALSE(cluster_from_ontology)) annot_dat$ancestor_name - #### make heatmap #### - ch <- ComplexHeatmap::Heatmap(matrix = as.matrix(X), - name = value.var, - cluster_rows = cluster_rows, - row_title_rot = 0, - row_names_gp = grid::gpar(fontsize = 7), - row_title_gp = grid::gpar(fontsize = 9), - row_split = row_split, - col = pals::viridis(n = 20), - cluster_columns = FALSE, - left_annotation = la) - # methods::show(ch) - #### Save #### - if(!is.null(save_dir)){ - grDevices::pdf(file = file.path(save_dir, - paste0("map_upheno_heatmap-", - value.var,".pdf")), - height = height, - width = width) - methods::show(ch) - grDevices::dev.off() - } - return(ch) -} diff --git a/R/map_upheno_nn.R b/R/map_upheno_nn.R deleted file mode 100644 index d629cb4..0000000 --- a/R/map_upheno_nn.R +++ /dev/null @@ -1,62 +0,0 @@ -map_upheno_nn <- function(){ - - #### Find nearest neighbors for each HPO term in each non-human ontology #### - Xnonhuman <- data.table::dcast.data.table( - genes_nonhuman, - formula = ortholog_hgnc_label ~ alt_id, - fun.aggregate = length, - ) |> scKirby::to_sparse() - Xhuman <- data.table::dcast.data.table( - genes_human, - formula = hgnc_label ~ hpo_id, - fun.aggregate = length, - ) |> scKirby::to_sparse() - X <- Seurat::RowMergeSparseMatrices(mat1 = Xhuman, Xnonhuman) - obj <- scKirby::process_seurat( - list(data=list(genes=X), - obs=data.frame(genes_map, - row.names = genes_map$object) - ), - nfeatures = 4600 - ) - - dbs <- stringr::str_split(colnames(Xnonhuman),"[:]",simplify = TRUE)[,1] |> - unique() - BPPARAM <- BiocParallel::MulticoreParam(10,progressbar = TRUE) - hpo_ids <- grep("^HP:",rownames(obj@graphs$RNA_snn), value = TRUE) - nei_mat <- BiocParallel::bplapply( - BPPARAM = BPPARAM, - X = stats::setNames(hpo_ids,hpo_ids), - FUN = function(hpo_id){ - lapply(dbs, - function(db){ - obj@graphs$RNA_snn[ - hpo_id, - grepl(db,colnames(obj@graphs$RNA_snn))] |> - sort() |> - tail(10) - }) - }) - dt <- lapply(nei_mat, - function(x) { - lapply(x, - function(y){ - data.table::as.data.table(data.frame(val=y), - keep.rownames = "alt_id") - }) |> - data.table::rbindlist() - }) |> - data.table::rbindlist(idcol = "hpo_id") - - dt_map <- genes_map[dt[val>.5,], on=c("object"="alt_id"), allow.cartesian = TRUE] - dt_map <- genes_map[dt_map, on=c("object"="hpo_id"), allow.cartesian = TRUE] - - # View(dt_map[val>.95]) - # o = order(obj@graphs$RNA_snn, decreasing=TRUE)[1:10] - # mm <- MatrixModels::glm4() - - # res <- phenomix::iterate_lm(xmat = Xhuman, ymat = Xnonhuman, workers = 10) - # orthogene:::sparsity(Xg) - # Xc <- fastcluster::hclust(dist(Xg)) - -} diff --git a/R/map_upheno_plots.R b/R/map_upheno_plots.R deleted file mode 100644 index f5fbdba..0000000 --- a/R/map_upheno_plots.R +++ /dev/null @@ -1,52 +0,0 @@ -#' Map uPheno plot -#' -#' Generate multiple kinds of plots summarising mappings of phenotypes and genes -#' between the HPO and various non-human ontologies within uPheno. -#' @param types A character vector of plot types to generate. -#' @param pheno_map_genes_match uPheno cross-species mapping data generated by -#' \link[HPOExplorer]{map_upheno_data}. -#' @param subset_db1 A character vector of reference ontologies to subset -#' \code{pheno_map_genes_match} by. -#' @returns A named list of \pkg{ggplot2} objects. -#' -#' @export -#' @examples -#' pheno_map_genes_match <- map_upheno_data() -#' upheno_plots <- map_upheno_plot( -#' pheno_map_genes_match = pheno_map_genes_match) -map_upheno_plot <- function(pheno_map_genes_match=NULL, - subset_db1="HP", - types=c("rainplot","scatterplot","heatmap")){ - ## Prepare plot data - n_phenotypes <- db1 <- id1 <-NULL; - { - #### Subset data #### - if(!is.null(subset_db1)) { - plot_dat <- pheno_map_genes_match[db1 %in% subset_db1,] - messager(data.table::uniqueN(plot_dat$subject_taxon_label2),"/", - data.table::uniqueN(pheno_map_genes_match$subject_taxon_label2), - "species remain after filtering by `subset_db1`.") - }else { - plot_dat <- data.table::copy(pheno_map_genes_match) - } - ## Only use one phenotype-phenotype per row - plot_dat <- plot_dat[,.SD[1],by=c("db1","id1","db2","id2")] - ## Recompute n_phenotypes - plot_dat[,n_phenotypes:=data.table::uniqueN(id1), - by=c("db1","db2", - "subject_taxon1","subject_taxon2", - "subject_taxon_label1","subject_taxon_label2" - )] - } - plots <- list() - if("rainplot" %in% types){ - plots[["rainplot"]] <- map_upheno_rainplot(plot_dat = plot_dat) - } - if("scatterplot" %in% types){ - plots[["scatterplot"]] <- map_upheno_scatterplot(plot_dat = plot_dat) - } - if("heatmap" %in% types){ - plots[["heatmap"]] <- map_upheno_heatmap(plot_dat = plot_dat) - } - return(plots) -} diff --git a/R/map_upheno_rainplot.R b/R/map_upheno_rainplot.R deleted file mode 100644 index e3a5ab4..0000000 --- a/R/map_upheno_rainplot.R +++ /dev/null @@ -1,51 +0,0 @@ -map_upheno_rainplot <- function(plot_dat){ - - requireNamespace("ggplot2") - requireNamespace("ggdist") - requireNamespace("tidyquant") - - - ### Plot proportion of intersecting orthologs per ontology #### - ggplot(plot_dat, - aes(x=paste0(subject_taxon_label2, - "\n(n = ",n_phenotypes," phenotypes)"), - y=(n_genes_intersect/n_genes_db1), - fill=factor(db2))) + - facet_grid(db1~., - scales = "free_y", - space = "free_y") + - # add half-violin from {ggdist} package - ggdist::stat_halfeye( - # adjust bandwidth - adjust = 0.5, - # move to the right - justification = -0.1, - # remove the slub interval - .width = 0, - point_colour = NA - ) + - geom_boxplot( - show.legend = FALSE, - width = 0.12, - # removing outliers - outlier.color = NA, - alpha = 0.5 - ) + - # ggdist::stat_dots( - # aes(color=factor(db2)), - # show.legend = FALSE, - # # ploting on left side - # side = "left", - # # adjusting position - # justification = 1.1, - # # adjust grouping (binning) of observations - # # binwidth = 0.25 - # ) + - # Themes and Labels - tidyquant::scale_fill_tq() + - tidyquant::scale_color_tq() + - tidyquant::theme_tq() + - coord_flip() + - labs(x="Species", - fill="Ontology") -} diff --git a/R/map_upheno_scatterplot.R b/R/map_upheno_scatterplot.R deleted file mode 100644 index 81527e1..0000000 --- a/R/map_upheno_scatterplot.R +++ /dev/null @@ -1,11 +0,0 @@ -map_upheno_scatterplot <- function(plot_dat){ - - #### Check if there's a relationship between phenotype mapping scores - # and number of shared genes #### - ggplot(plot_dat, - aes(x=(n_genes_intersect/n_genes_db1), - y=equivalence_score)) + - geom_point() + - facet_grid(rows = "subject_taxon_label2") + - geom_smooth() -} diff --git a/R/mondo_to_matrix.R b/R/mondo_to_matrix.R deleted file mode 100644 index 0e3d54c..0000000 --- a/R/mondo_to_matrix.R +++ /dev/null @@ -1,48 +0,0 @@ -mondo_to_matrix <- function(species=c("Homo sapiens")){ - - subject_taxon_label <- evidence_score <- evidence <- object_definition <- - object <- NULL; - - #### Prepare data #### - dat <- data.table::fread(paste0( - "https://data.monarchinitiative.org/latest/tsv/", - "all_associations/gene_disease.all.tsv.gz" - )) - dat[,evidence_score:=sapply(evidence, - function(x){length(strsplit(x,"|")[[1]])})] - dat <- dat[subject_taxon_label %in% species] - mondo <- get_mondo() - dat[,object_definition:=mondo$def[object]] - #### Make matrix #### - X_dt <- data.table::dcast.data.table(data = dat, - formula = "subject_label ~ object", - value.var = "evidence_score", - fun.aggregate = sum, - fill = 0) - X <- as.matrix(X_dt[,-1], rownames = X_dt[[1]]) |> - methods::as("sparseMatrix") - #### Make col meta.data #### - col_data <- dat[,c("subject_taxon", - "subject_taxon_label", - "object", - "object_label", - "object_definition")] |> - unique() |> - data.frame() - rownames(col_data) <- col_data$object - - # obj <- scNLP::seurat_pipeline(obj = X, - # meta.data = col_data, - # vars.to.regress = "nFeature_RNA", - # nfeatures = NULL) - # tfidf_plt <- scNLP::plot_tfidf(object = obj, - # label_var = "object_definition") - # dp <- Seurat::DimPlot(obj, group.by = "object_label") + Seurat::NoLegend() - # plotly::ggplotly(dp) - # fp <- Seurat::FeaturePlot(obj, features = "n_genes") - - return(list(X=X, - col_data=col_data, - row_data=dat)) - -} diff --git a/R/network_3d.R b/R/network_3d.R deleted file mode 100644 index a64ae4b..0000000 --- a/R/network_3d.R +++ /dev/null @@ -1,231 +0,0 @@ -#' 3D network -#' -#' Plot a subset of the HPO as a 3D network. -#' @param g \link[igraph]{igraph} object -#' generated by \link[HPOExplorer]{make_igraph_object}. -#' @param layout_func Layout function for the graph. -#' @param node_color_var Variable in the vertex metadata to color nodes by. -#' @param edge_color_var Variable in the edge metadata to color edges by. -#' @param text_color_var Variable in the node metadata to color text by. -#' @param node_symbol_var Variable in the vertex metadata to shape nodes by. -#' @param node_opacity Node opacity. -#' @param edge_opacity Edge opacity. -#' @param node_palette Color palette function for the nodes/points. -#' @param edge_palette Color palette function for the edges/lines. -#' @param kde_palette Color palette function for the KDE plot. -#' @param add_kde Add a kernel density estimation (KDE) plot -#' below the 3D scatter plot (i.e. the "mountains" beneath the points). -#' @param extend_kde Extend the area that the KDE plot covers. -#' @param bg_color Plot background color. -#' @param add_labels Add phenotype name labels to each point. -#' @param keep_grid Keep all grid lines and axis labels. -#' @param aspectmode The proportions of the 3D plot. See the -#' \href{https://plotly.com/python/reference/layout/scene/#layout-scene-aspectmode}{ -#' plotly documentation site} for details. -#' @param hover_width Maximum width of the hover text. -#' @param label_width Maximum width of the label text. -#' @param seed Random seed to enable reproducibility. -#' @param showlegend Show node fill legend. -#' @param show_plot Print the plot after it's been generated. -#' @param save_path Path to save interactive plot to -#' as a self-contained HTML file. -#' @param verbose Print messages. -#' @returns A 3D interactive \link[plotly]{plotly} object. -#' -#' @source -#' \href{https://www.r-bloggers.com/2013/09/network-visualization-part-4-3d-networks/}{ -#' R bloggers} -#' @source -#' \href{https://community.plotly.com/t/is-it-possible-to-connect-scatters-in-3d-scatter-plot/}{ -#' Plotly: Connect points in 3D plot} -#' @source \href{https://plotly.com/r/line-charts/}{ -#' Plotly: Connect points in 2D plot} -#' @source \href{https://plotly.com/r/3d-line-plots/}{ -#' Plotly: 3D lines plots} -#' @source \href{https://plotly.com/python/v3/3d-network-graph/}{ -#' Plotly: 3D network plot} -#' -#' @export -#' @importFrom stringr str_wrap -#' @importFrom methods show -#' @examples -#' phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") -#' g <- make_igraph_object(phenos = phenos) -#' plt <- network_3d(g=g, show_plot=FALSE) -network_3d <- function(g, - layout_func = igraph::layout.fruchterman.reingold, - node_color_var = "ontLvl", - edge_color_var = "zend", - text_color_var = node_color_var, - node_symbol_var = "ancestor_name", - node_palette = pals::kovesi.cyclic_mrybm_35_75_c68_s25, - edge_palette = pals::kovesi.cyclic_mrybm_35_75_c68_s25, - node_opacity = .75, - edge_opacity = .5, - kde_palette = pals::gnuplot, - add_kde = TRUE, - extend_kde = 1.5, - bg_color = kde_palette(6)[1], - add_labels = FALSE, - keep_grid = FALSE, - aspectmode = 'cube', - hover_width=80, - label_width=80, - seed = 2023, - showlegend = TRUE, - show_plot = TRUE, - save_path = tempfile(fileext = "network_3d.html"), - verbose = TRUE){ - # devoptera::args2vars(network_3d) - # anc <- "Abnormality of the nervous system" - # phenos <- make_phenos_dataframe(ancestor = anc)[ancestor_name==anc,] - # g <- make_igraph_object(phenos = phenos) - # g <- readRDS("~/Downloads/priotised_igraph.rds") - requireNamespace("plotly") - requireNamespace("pals") - requireNamespace("htmlwidgets") - label <- hpo_name <- NULL; - - #### Convert igraph to plotly data #### - d <- igraph_to_plotly(g = g, - layout_func = layout_func, - dim = 3, - seed = seed, - verbose = verbose) - vdf <- d$vertices - vdf[,label:=gsub("\n","
", - stringr::str_wrap(hpo_name, width = label_width))] - edf <- d$edges - remove(d) - #### Create paths and nodes #### - fig <- - plotly::plot_ly() |> - plotly::add_paths(data = edf, - x = ~xend, - y = ~yend, - z = ~zend, - xend = ~xend, - yend = ~yend, - # zend = ~zend, - color = ~get(edge_color_var), - colors = rev(edge_palette(50)), - line = list(shape = "spline"), - type = "scatter3d", - name = "edge", - opacity = edge_opacity, - mode = "lines", - hoverinfo = "none", - showlegend = FALSE, - inherit = FALSE) |> - plotly::add_markers(data = vdf, - x = ~x, - y = ~y, - z = ~z, - symbol = ~stringr::str_wrap( - get(node_symbol_var), - width = label_width - ), - # size = ~ontLvl, - color = ~ get(node_color_var), - colors = ( - node_palette(length( - unique(vdf[[node_color_var]]) - )) - ), - marker = list( - line = list( - color = bg_color - ) - ), - hovertext = ~ stringr::str_wrap( - paste( - paste0('name: ',name), - paste0('hpo_id: ',hpo_id), - paste0('hpo_name: ',hpo_name), - paste0('ontLvl: ',ontLvl), - paste0('ancestor_name: ',ancestor_name), - sep = "
"), - width = hover_width - ), - opacity = node_opacity, - showlegend = showlegend, - type = "scatter3d", - mode = "markers") - #### Add KDE #### - if(isTRUE(add_kde)){ - kd <- kde_surface(xyz = vdf, - extend_kde = extend_kde) - fig <- fig |> - plotly::add_surface(data = kd, - x = ~x, - y = ~y, - z = ~z, - opacity = 1, - hoverinfo = "none", - colorscale = list( - seq(0,1,length.out=6), - kde_palette(n = 6) - ), - showlegend = FALSE, - inherit = FALSE) - } - #### Add text #### - if(isTRUE(add_labels)){ - fig <- fig |> - plotly::add_text(data = vdf, - x = ~x, - y = ~y, - z = ~z, - text = ~label, - color = ~ get(text_color_var), - colors = ( - node_palette(length( - unique(vdf[[text_color_var]]) - )) - ), - # textfont = list(color="rbga(255,255,255,.8"), - hoverinfo = "none", - inherit = FALSE, - showlegend = FALSE) - } - #### Add layout #### - scene <- lapply(stats::setNames(c("xaxis","yaxis","zaxis"), - c("xaxis","yaxis","zaxis")), - function(x){list(showgrid = keep_grid, - showline = keep_grid, - showspikes = keep_grid, - zeroline = keep_grid, - title = list( - text=if(isTRUE(keep_grid)){ - x - } else {""} - ), - showticklabels = keep_grid)} - ) - scene$aspectmode <- aspectmode - fig <- fig |> - plotly::layout( - hoverlabel = list(align = "left"), - plot_bgcolor = bg_color, - paper_bgcolor = bg_color, - scene = scene - # showlegend = FALSE - ) |> - plotly::colorbar(title=edge_color_var) - # file <- file.path("~/Downloads","hpo_plotly.png") - # plotly::export(p = fig, - # file = file, - # selenium = RSelenium::rsDriver(browser = "chrome")) - - # reticulate::py_install(packages = "kaleido",) - # plotly::save_image(p = fig,file = file, width = 10, height =10) - if(isTRUE(show_plot)) methods::show(fig) - if(!is.null(save_path)) { - messager("Saving interactive plot -->",save_path,v=verbose) - dir.create(dirname(save_path),showWarnings = FALSE, recursive = TRUE) - htmlwidgets::saveWidget(widget = fig, - file = save_path, - selfcontained = TRUE) - } - return(fig) -} diff --git a/R/oard_query_api.R b/R/oard_query_api.R deleted file mode 100644 index b5b07f9..0000000 --- a/R/oard_query_api.R +++ /dev/null @@ -1,49 +0,0 @@ -oard_query_api <- function(ids, - concept_prefix="q=", - domain="https://rare.cohd.io/api/", - dataset_id=NULL, - endpoint="vocabulary/findConceptByAny", - batch_size=100, - workers=1, - verbose=TRUE){ - # devoptera::args2vars(oard_query_api) - requireNamespace("BiocParallel") - - ids <- na.omit(unique(ids)) - messager("Querying OARD API for",formatC(length(ids),big.mark = ","), - "IDs.",v=verbose) - batches <- split(ids, ceiling(seq_along(ids)/batch_size)) - oard_query_api_i <- function(batch, - ...){ - URL <- paste0(paste0(domain,endpoint),"?", - ifelse(is.null(dataset_id), - "", - paste0("dataset_id=",dataset_id,"&") - ), - if(!is.null(batch)){ - paste0(concept_prefix, - htmltools::urlEncodePath( - paste(batch,collapse = ";") - ) - ) - } else { - "" - } - ) - # message(URL) - res <- httr::GET(URL) - cont <- httr::content(res, as = "text", encoding = "UTF-8") - js <- jsonlite::fromJSON(cont, simplifyDataFrame = TRUE) - dt <- js$results - if(length(dt)==0) return(dt) - if(nrow(dt)==length(js$parameter$q)) dt$query <- js$parameter$q - return(dt) - } - BiocParallel::register(BiocParallel::MulticoreParam(workers=workers, - progressbar = TRUE)) - RES <- BiocParallel::bplapply(batches, - oard_query_api_i, - match.call()) |> - data.table::rbindlist(idcol = "batch", fill = TRUE) - return(RES) -} diff --git a/R/parse_pheno_frequency.R b/R/parse_pheno_frequency.R index 7bff400..e286ad0 100644 --- a/R/parse_pheno_frequency.R +++ b/R/parse_pheno_frequency.R @@ -5,28 +5,28 @@ parse_pheno_frequency <- function(annot){ freq_dt <- lapply(seq(nrow(annot)), function(i){ f <- annot[i,]$frequency if(grepl("^HP",f)){ - dt <- data.table::data.table( + dat <- data.table::data.table( pheno_freq_name = get_freq_dict()[f], pheno_freq_min = get_freq_dict(type="min")[f], pheno_freq_max = get_freq_dict(type="max")[f]) } else if(grepl("/",f)){ splt <- as.numeric(strsplit(f,"/")[[1]]) val <- splt[[1]] / splt[[2]]*100 - dt <- data.table::data.table( + dat <- data.table::data.table( pheno_freq_name="ratio", pheno_freq_min=val, pheno_freq_max=val) } else if(grepl("[%]",f)){ val <- as.numeric(gsub("[%]","",f)) - dt <- data.table::data.table( + dat <- data.table::data.table( pheno_freq_name="percentage", pheno_freq_min=val, pheno_freq_max=val) } else { return(NULL) } - dt[,hpo_id:=annot[i,]$hpo_id][,disease_id:=annot[i,]$disease_id] - return(dt) + dat[,hpo_id:=annot[i,]$hpo_id][,disease_id:=annot[i,]$disease_id] + return(dat) }) |> data.table::rbindlist(fill=TRUE) freq_dt$pheno_freq_mean <- rowMeans( freq_dt[,c("pheno_freq_min","pheno_freq_max")], na.rm = TRUE diff --git a/R/per_branch_plot.R b/R/per_branch_plot.R index d158678..97005d7 100644 --- a/R/per_branch_plot.R +++ b/R/per_branch_plot.R @@ -17,54 +17,60 @@ #' @returns ggplot object #' #' @export -#' @import ggplot2 -#' @importFrom ontologyIndex get_descendants #' @examples -#' ## Selecting child terms of -#' ## "Abnormality of the nervous system" as background branches #' plt <- per_branch_plot( #' highlighted_branches = "Abnormality of nervous system physiology", #' ancestor = "Abnormality of the nervous system") per_branch_plot <- function(highlighted_branches, ancestor, hpo = get_hpo(), - background_branches = hpo$children[ - hpo$id[match(ancestor, hpo$name)] - ][[1]], + background_branches = simona::dag_children( + hpo, + term = map_phenotypes(terms = ancestor, + hpo = hpo, + to = "id") + ), phenotype_to_genes = load_phenotype_to_genes()) { - - # templateR:::source_all() - # devoptera::args2vars(per_branch_plot) - requireNamespace("ggplot2") - n_phenos <- branch <- target <- NULL; - highlighted_branches_ids <- hpo$id[match(highlighted_branches, hpo$name)] + highlighted_branches_ids <- map_phenotypes(terms = highlighted_branches, + hpo = hpo, + to = "id", + keep_order = FALSE) #### Count phenotypes per branch #### phenos_per_branch <- lapply(background_branches, function(b){ - n <- length(ontologyIndex::get_descendants(ontology = hpo, - roots = b)) + n <- length(simona::dag_offspring(hpo,term = b)) if (b %in% highlighted_branches_ids) { target_branch <- "target" } else { target_branch <- "other" } - data.table::data.table("branch" = hpo$name[b], + data.table::data.table("branch_id" = b, + "branch_name"=map_phenotypes(terms = b, + hpo = hpo, + to = "name"), "n_phenos" = n, "target" = target_branch) }) |> data.table::rbindlist() data.table::setkeyv(phenos_per_branch, "n_phenos") - phenos_per_branch$branch <- factor(x = phenos_per_branch$branch, - levels = unique(phenos_per_branch$branch), - ordered = TRUE) + phenos_per_branch$branch_name <- factor( + x = phenos_per_branch$branch_name, + levels = unique(phenos_per_branch$branch_name), + ordered = TRUE) + #### Plot #### plt <- ggplot(phenos_per_branch, - aes_string(x = "n_phenos", y = "branch", - color = "target", fill = "target")) + + aes(x = !!sym("n_phenos"), + y = !!sym("branch_name"), + color = !!sym("target"), + fill = !!sym("target") + ) + )+ geom_col(width = .5) + labs(x="Descendants (n)", y="hpo_name", fill="Group", color="Group") + theme_bw() + #### Return #### return(plt) } diff --git a/R/phenos_to_granges.R b/R/phenos_to_granges.R index f74bde4..a5c86cd 100644 --- a/R/phenos_to_granges.R +++ b/R/phenos_to_granges.R @@ -6,7 +6,7 @@ #' The resulting object will contain genes (and gene metadata) for all #' genes associated with each phenotypes. #' @param as_datatable Return as a \link[data.table]{data.table}. -#' @param keep_seqnames Chromosomes to keep. +#' @param keep_chr Chromosomes to keep. #' @inheritParams add_genes #' @inheritParams make_network_object #' @inheritParams make_phenos_dataframe @@ -23,7 +23,7 @@ phenos_to_granges <- function(phenos = NULL, phenotype_to_genes = load_phenotype_to_genes(), hpo = get_hpo(), - keep_seqnames = c(seq(22),"X","Y"), + keep_chr = c(seq(22),"X","Y"), by = c("hpo_id","disease_id"), gene_col = "intersection", split.field = "hpo_id", @@ -42,12 +42,10 @@ phenos_to_granges <- function(phenos = NULL, hpo = hpo, by = by, gene_col = gene_col, - allow.cartesian = allow.cartesian, - verbose = verbose) + allow.cartesian = allow.cartesian) #### Get gene lengths ##### - gr <- get_gene_lengths(gene_list = phenos$gene_symbol, - keep_seqnames = keep_seqnames, - verbose = verbose) + gr <- KGExplorer::get_gene_lengths(genes = phenos$gene_symbol, + keep_chr = keep_chr) #### Merge in gene length data #### gr_dt <- data.table::merge.data.table( phenos, diff --git a/R/read_enrichr.R b/R/read_enrichr.R deleted file mode 100644 index 8b5bd35..0000000 --- a/R/read_enrichr.R +++ /dev/null @@ -1,7 +0,0 @@ -read_enrichr <- function(file){ - lines <- readLines(file) - lapply(strsplit(lines,"\t"), function(l){ - data.table::data.table(term=l[1], - gene=l[-c(1,2)]) - }) |> data.table::rbindlist(fill = TRUE) -} diff --git a/R/report_missing.R b/R/report_missing.R index 1f09289..1b9d74a 100644 --- a/R/report_missing.R +++ b/R/report_missing.R @@ -1,12 +1,12 @@ -report_missing <- function(phenos, - id_col, +report_missing <- function(dat, + input_col, report_col, verbose=TRUE){ - report_missing_i <- function(phenos, - id_col, + report_missing_i <- function(dat, + input_col, report_col, verbose=TRUE){ - phenos2 <- phenos[,c(id_col,report_col),with=FALSE] |> unique() + phenos2 <- dat[,c(input_col,report_col),with=FALSE] |> unique() n_missing <- sum(is.na(phenos2[[report_col]])) n_total <- nrow(phenos2) messager(formatC(n_missing,big.mark = ","), @@ -16,8 +16,8 @@ report_missing <- function(phenos, report_col,"missing.",v=verbose) } for(rc in report_col){ - report_missing_i(phenos = phenos, - id_col = id_col, + report_missing_i(dat = dat, + input_col = input_col, report_col = rc, verbose = verbose) } diff --git a/R/search_hpo.R b/R/search_hpo.R index 1b67a95..52aaa64 100644 --- a/R/search_hpo.R +++ b/R/search_hpo.R @@ -11,7 +11,6 @@ #' @returns Named list of HPO IDs. #' #' @export -#' @importFrom ontologyIndex get_descendants #' @examples #' query_hits <- search_hpo() search_hpo <- function(hpo = get_hpo(), @@ -38,24 +37,24 @@ search_hpo <- function(hpo = get_hpo(), include_descendants=TRUE, include_ancestors=FALSE, verbose=TRUE){ - # devoptera::args2vars(search_hpo) + name <- NULL; query_hits <- lapply(queries, function(q){ terms <- grep(paste(q,collapse = "|"), - hpo$name, + hpo@elementMetadata$name, ignore.case = TRUE, value = TRUE) + dat <- subset(hpo@elementMetadata, name %in% terms) res <- c() if(isTRUE(include_descendants)){ res <- c(res, - ontologyIndex::get_descendants(ontology = hpo, - roots = names(terms), - exclude_roots = FALSE) + simona::dag_offspring(hpo, + term = dat$id) ) } if(isTRUE(include_ancestors)){ res <- c(res, - ontologyIndex::get_ancestors(ontology = hpo, - terms = names(terms)) + simona::dag_ancestors(hpo, + term = dat$id) ) } return(unique(res)) diff --git a/R/subset_descendants.R b/R/subset_descendants.R deleted file mode 100644 index bfb0006..0000000 --- a/R/subset_descendants.R +++ /dev/null @@ -1,46 +0,0 @@ -#' Subset descendants -#' -#' Subset a \code{phenos} datatable to only -#' descendants of an ancestor HPO ID term. -#' @param ancestor Phenotype names (or HPO ID) of an ancestor in the -#' \href{https://hpo.jax.org/}{Human Phenotype Ontology}. -#' Only phenotypes that are descendants of this ancestor will be kept. -#' Set to \code{NULL} (default) to skip this filtering step. -#' @inheritParams make_phenos_dataframe -#' @inheritParams make_network_object -#' @inheritParams harmonise_phenotypes -#' @returns data.table of phenotypes, with additional columns: -#' "ancestor", "ancestor_id" -#' -#' @export -#' @importFrom ontologyIndex get_descendants -#' @importFrom data.table := -#' @examples -#' phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") -#' phenos2 <- subset_descendants(phenos = phenos, -#' ancestor = "Motor delay") -subset_descendants <- function(phenos, - ancestor = NULL, - hpo = get_hpo(), - ignore_case = TRUE, - verbose = TRUE){ - - hpo_id <- NULL; - - if(!is.null(ancestor)){ - messager("Subsetting phenotypes to only ancestors of:", - paste(ancestor,collapse = ","),v=verbose) - ancestor_id <- harmonise_phenotypes(phenotypes = ancestor, - hpo = hpo, - as_hpo_ids = TRUE, - ignore_case = ignore_case) - all_ids <- ontologyIndex::get_descendants(ontology = hpo, - roots = ancestor_id, - exclude_roots = FALSE) - phenos <- phenos[hpo_id %in% all_ids, - ][,ancestor:=ancestor][,ancestor_id:=ancestor_id] - messager(formatC(nrow(phenos),big.mark = ","), - "associations remain after filtering.",v=verbose) - } - return(phenos) -} diff --git a/R/unlist_col.R b/R/unlist_col.R index d3e1ec0..3f58163 100644 --- a/R/unlist_col.R +++ b/R/unlist_col.R @@ -1,13 +1,13 @@ #' Unlist column #' #' Unlist a column in a \link[data.table]{data.table}. -#' @param dt \link[data.table]{data.table} +#' @param dat \link[data.table]{data.table} #' @param col Column name. #' @returns \link[data.table]{data.table} #' #' @keywords internal -unlist_col <- function(dt, +unlist_col <- function(dat, col){ - dt[rep(dt[,.I],lengths(get(col))) - ][, eval(col) := unlist(dt[[col]])][] + dat[rep(dat[,.I],lengths(get(col))) + ][, eval(col) := unlist(dat[[col]])][] } diff --git a/inst/.DS_Store b/inst/.DS_Store index aa49b00..368127d 100644 Binary files a/inst/.DS_Store and b/inst/.DS_Store differ diff --git a/inst/extdata/.DS_Store b/inst/extdata/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/inst/extdata/.DS_Store differ diff --git a/inst/extdata/orphanet_epidemiology.csv.gz b/inst/extdata/orphanet_epidemiology.csv.gz deleted file mode 100644 index 580368d..0000000 Binary files a/inst/extdata/orphanet_epidemiology.csv.gz and /dev/null differ diff --git a/inst/hex/hexSticker.Rmd b/inst/hex/hexSticker.Rmd index ef23e9f..2047054 100644 --- a/inst/hex/hexSticker.Rmd +++ b/inst/hex/hexSticker.Rmd @@ -2,7 +2,7 @@ title: "hexSticker" date: "

Updated: `r format( Sys.Date(), '%b-%d-%Y')`

" output: - BiocStyle::html_document + rmarkdown::html_document vignette: > %\VignetteIndexEntry{hexSticker} %\VignetteEngine{knitr::rmarkdown} diff --git a/man/add_.Rd b/man/add_.Rd new file mode 100644 index 0000000..f38d171 --- /dev/null +++ b/man/add_.Rd @@ -0,0 +1,594 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/_docs.R, R/add_ancestor.R, R/add_death.R, +% R/add_disease.R, R/add_disease_genes.R, R/add_evidence.R, +% R/add_gene_frequency.R, R/add_genes.R, R/add_hpo_definition.R, +% R/add_hpo_id.R, R/add_info_content.R, R/add_mondo.R, R/add_ndisease.R, +% R/add_omop.R, R/add_onset.R, R/add_ont_lvl.R, R/add_pheno_frequency.R, +% R/add_prevalence.R, R/add_severity.R, R/add_tier.R +\name{add_} +\alias{add_} +\alias{add_ancestor} +\alias{add_death} +\alias{add_disease} +\alias{add_disease_genes} +\alias{add_evidence} +\alias{add_gene_frequency} +\alias{add_genes} +\alias{add_hpo_definition} +\alias{add_hpo_id} +\alias{add_info_content} +\alias{add_mondo} +\alias{add_ndisease} +\alias{add_omop} +\alias{add_onset} +\alias{add_ont_lvl} +\alias{add_pheno_frequency} +\alias{add_prevalence} +\alias{add_severity} +\alias{add_tier} +\title{Add functions} +\usage{ +add_ancestor(phenos, lvl = 3, hpo = get_hpo(), remove_descendants = NULL) + +add_death( + phenos, + keep_deaths = NULL, + all.x = TRUE, + allow.cartesian = FALSE, + agg_by = NULL +) + +add_disease( + phenos, + extra_cols = NULL, + all.x = TRUE, + allow.cartesian = FALSE, + add_definitions = FALSE +) + +add_disease_genes(phenos, all.x = TRUE, verbose = TRUE) + +add_evidence( + phenos, + keep_evidence = NULL, + all.x = TRUE, + allow.cartesian = FALSE, + agg_by = c("disease_id", "gene_symbol"), + default_score = 1 +) + +add_gene_frequency( + phenotype_to_genes = load_phenotype_to_genes(), + gene_frequency_threshold = NULL, + all.x = TRUE, + verbose = TRUE +) + +add_genes( + phenos = NULL, + phenotype_to_genes = load_phenotype_to_genes(), + hpo = get_hpo(), + by = c("hpo_id", "disease_id"), + gene_col = "gene_symbol", + all.x = FALSE, + allow.cartesian = FALSE +) + +add_hpo_definition( + phenos, + line_length = FALSE, + use_api = FALSE, + verbose = TRUE +) + +add_hpo_id(phenos, hpo = get_hpo(), phenotype_to_genes = NULL) + +add_info_content(phenos, hpo = get_hpo()) + +add_mondo(phenos, input_col = "disease_id", map_to = "hpo", ...) + +add_ndisease( + phenos, + pheno_ndiseases_threshold = NULL, + all.x = TRUE, + allow.cartesian = FALSE, + verbose = TRUE +) + +add_omop( + phenos, + input_col = "hpo_id", + all.x = TRUE, + allow.cartesian = FALSE, + force_new = FALSE, + verbose = TRUE +) + +add_onset( + phenos, + keep_onsets = NULL, + agg_by = NULL, + all.x = TRUE, + allow.cartesian = FALSE +) + +add_ont_lvl( + phenos, + hpo = get_hpo(), + absolute = TRUE, + keep_ont_levels = NULL, + ... +) + +add_pheno_frequency( + phenos, + pheno_frequency_threshold = NULL, + all.x = TRUE, + allow.cartesian = FALSE +) + +add_prevalence( + phenos, + input_col = "disease_id", + drop_na = TRUE, + method = "orphanet" +) + +add_severity( + phenos, + hpo = get_hpo(), + all.x = TRUE, + allow.cartesian = FALSE, + severity_threshold = NULL +) + +add_tier( + phenos, + all.x = TRUE, + include_disease_characteristics = TRUE, + auto_assign = TRUE, + hpo = get_hpo(), + keep_tiers = NULL, + verbose = TRUE +) +} +\arguments{ +\item{phenos}{A data.table containing HPO IDs and other metadata.} + +\item{lvl}{How many levels deep into the ontology to get ancestors from. +For example: +\itemize{ +\item{1: }{"All"} +\item{2: }{"Phenotypic abnormality"} +\item{3: }{"Abnormality of the nervous system"} +\item{4: }{"Abnormality of nervous system physiology"} +\item{5: }{"Neurodevelopmental abnormality" or "Behavioral abnormality"} +}} + +\item{hpo}{Human Phenotype Ontology object, +loaded from \link[KGExplorer]{get_ontology}.} + +\item{remove_descendants}{Remove HPO terms that are descendants of a given +ancestral HPO term. Ancestral terms be provided as a character vector of +phenotype names (e.g. \code{c("Clinical course")}), +HPO IDs (e.g. \code{"HP:0031797" }) or a mixture of the two. +See \link{add_ancestor} for details.} + +\item{keep_deaths}{The age of death associated with each HPO ID to keep. +If >1 age of death is associated with the term, +only the earliest age is considered. +See \link{add_death} for details.} + +\item{all.x}{logical; if \code{TRUE}, then extra rows will be added to the +output, one for each row in \code{x} that has no matching row in \code{y}. +These rows will have 'NA's in those columns that are usually filled with values +from \code{y}. The default is \code{FALSE}, so that only rows with data from both +\code{x} and \code{y} are included in the output.} + +\item{allow.cartesian}{See \code{allow.cartesian} in \code{\link[data.table]{[.data.table}}.} + +\item{agg_by}{Column to aggregate age of onset metadata by.} + +\item{extra_cols}{Extra metadata columns from the"phenotype.hpoa" +annotations file to include. +See +\href{https://hpo-annotation-qc.readthedocs.io/en/latest/annotationFormat.html}{ +here for column descriptions}.} + +\item{add_definitions}{Add disease definitions using \link{add_mondo}.} + +\item{verbose}{Print messages.} + +\item{keep_evidence}{The evidence scores of each gene-disease +association to keep.} + +\item{default_score}{Default evidence score to +apply to gene-disease associations that are present in the HPO annotations +but don't have evidence scores in the GenCC annotations.} + +\item{phenotype_to_genes}{Output of +\link{load_phenotype_to_genes} mapping phenotypes +to gene annotations.} + +\item{gene_frequency_threshold}{Only keep genes with frequency +above the set threshold. Frequency ranges from 0-100 where 100 is +a gene that occurs 100\% of the time in a given phenotype. +Include \code{NA} if you wish to retain genes that +do not have any frequency data. +See \link{add_gene_frequency} for details.} + +\item{by}{A vector of shared column names in \code{x} and \code{y} to merge on. +This defaults to the shared key columns between the two tables. +If \code{y} has no key columns, this defaults to the key of \code{x}.} + +\item{gene_col}{Name of the gene column.} + +\item{line_length}{The number of desired words per line \} + +\item{use_api}{Get definitions from the HPO API, +as opposed to a static local dataset.} + +\item{input_col}{Name of the column containing the disease or phenotype IDs.} + +\item{map_to}{Mapping outputs to include +(from Mondo IDs to another database's IDs).} + +\item{...}{ + Arguments passed on to \code{\link[KGExplorer:map_]{KGExplorer::map_mondo}}, \code{\link[KGExplorer:get_]{KGExplorer::get_ontology_levels}} + \describe{ + \item{\code{dat}}{\link[data.table]{data.table} with genes.} + \item{\code{output_col}}{Column name of output IDs.} + \item{\code{to}}{Character vector of database(s) to map IDs to. +When not \code{"mondo"}, can supply multiple alternative databases to map to + (e.g. \code{c("OMIM","Orphanet","DECIPHER")}).} + \item{\code{map_types}}{Mapping types to include.} + \item{\code{top_n}}{Top number of mappings to return per \code{top_by} grouping. +Set to \code{NULL} to skip this step.} + \item{\code{add_name}}{Logical, if TRUE, add mondo name column.} + \item{\code{save_dir}}{Directory to save cached data.} + \item{\code{ont}}{An ontology of class \link[simona]{ontology_DAG}.} + \item{\code{terms}}{Term IDs to include. Can alternatively be an integer, +which will be used to randomly sample N terms from the data.} + \item{\code{remove_terms}}{Character vector of term IDs to exclude.} + \item{\code{reverse}}{If \code{TRUE}, ontology +level numbers with be revered such that the level of the parent terms +are larger than the child terms.} + }} + +\item{pheno_ndiseases_threshold}{Filter phenotypes by the +maximum number of diseases they are associated with.} + +\item{force_new}{Force a new query to the OARD API instead of +using pre-downloaded data.} + +\item{keep_onsets}{The age of onset associated with each HPO ID to keep. +If >1 age of onset is associated with the term, +only the earliest age is considered. +See \link[HPOExplorer]{add_onset} for details.} + +\item{absolute}{Make the levels absolute in the sense that they consider +the entire ontology (\code{TRUE}). +Otherwise, levels will be relative to only the terms that are in + the provided subset of \code{terms} AND are directly adjacent (connected) + to a given cluster of terms (\code{FALSE}).} + +\item{keep_ont_levels}{Only keep phenotypes at certain \emph{absolute} +ontology levels to keep. +See \link{add_ont_lvl} for details.} + +\item{pheno_frequency_threshold}{Only keep phenotypes with frequency +above the set threshold. Frequency ranges from 0-100 where 100 is +a phenotype that occurs 100\% of the time in all associated diseases. +Include \code{NA} if you wish to retain phenotypes that +do not have any frequency data. +See \link[HPOExplorer]{add_pheno_frequency} for details.} + +\item{drop_na}{Whether to drop rows with missing prevalence data.} + +\item{method}{One of "orphanet" or "oard".} + +\item{severity_threshold}{Only keep phenotypes with a mean +severity score (averaged across multiple associated diseases) below the +set threshold. The severity score ranges from 1-4 where 1 is the MOST severe. +Include \code{NA} if you wish to retain phenotypes that +do not have any severity score.} + +\item{include_disease_characteristics}{Include \code{phenotypes} that +are also high-level \code{include_disease_characteristics}.} + +\item{auto_assign}{Automatically assing HPO IDs to Tiers by conducting +regex searches for keywords that appear in the term name, +or the names of its descendants or ancestors.} + +\item{keep_tiers}{Tiers from \link[HPOExplorer]{hpo_tiers} to keep. +Include \code{NA} if you wish to retain phenotypes that +do not have any Tier assignment.} +} +\value{ +Annotated data. + +phenos data.table with extra columns: +\itemize{ +\item{"AgeOfDeath": }{AgeOfDeath HPO IDs of disease phenotypes associated +with the target hpo_id phenotype.} +\item{"AgeOfDeath_names": }{AgeOfDeath HPO names of disease phenotypes +associated with the target hpo_id phenotype.} +\item{"AgeOfDeath_counts": }{The number of times each term in +"AgeOfDeath_names" appears across associated disease phenotypes.} +\item{"AgeOfDeath_score_mean": }{Mean age of death score.} +\item{"AgeOfDeath_score_min": }{Minimum age of death score.} +\item{"AgeOfDeath_top": }{The most common age of death term.} +\item{"AgeOfDeath_earliest": }{The earliest age of death.} +\item{"AgeOfDeath_latest": }{The latest age of death.} +} + +phenos data.table with extra columns: +\itemize{ +\item{"evidence_score_min": }{Minimum evidence score.} +\item{"evidence_score_max": }{Maximum evidence score.} +\item{"evidence_score_mean": }{Mean evidence score.} +} + +phenos data.table with extra column + +A named vector of HPO term descriptions. + +phenos data.table with extra column + +phenos data.table with extra columns. + +phenos data.table with extra columns + +phenos data.table with extra columns: +\itemize{ +\item{"onset": }{onset HPO IDs of disease phenotypes associated +with the target hpo_id phenotype.} +\item{"onset_names": }{onset HPO names of disease phenotypes associated +with the target hpo_id phenotype.} +\item{"onset_counts": }{The number of times each term in +"onset_names" appears across associated disease phenotypes.} +\item{"onset_score_mean": }{Mean onset score.} +\item{"onset_score_min": }{Minimum onset score.} +\item{"onset_top": }{The most common onset term.} +\item{"onset_earliest": }{The earliest age of onset.} +\item{"onset_latest": }{The latest age of onset.} +} + +phenos data.table with extra column + +phenos data.table with extra column + +phenos data.table with extra columns + +phenos data.table with extra column +} +\description{ +Functions to add metadata to data.table objects. +} +\section{Functions}{ +\itemize{ +\item \code{add_ancestor()}: add_ +Add ancestor + +Assign each HPO ID to the higher-order ancestral term that it is part of. + +\item \code{add_death()}: add_ +Add age of death + +Add age of death for each HPO ID. +AgeOfDeath IDs and assigned "AgeOfDeath_score" values: +\itemize{ +\item{HP:0005268 }{"Miscarriage" (AgeOfDeath_score=1)} +\item{HP:0003826 }{"Stillbirth" (AgeOfDeath_score=1)} +\item{HP:0034241 }{"Prenatal death" (AgeOfDeath_score=1)} +\item{HP:0003811 }{"Neonatal death" (AgeOfDeath_score=2)} +\item{HP:0001522 }{"Death in infancy" (AgeOfDeath_score=3)} +\item{HP:0003819 }{"Death in childhood" (AgeOfDeath_score=4)} +\item{HP:0011421 }{"Death in adolescence" (AgeOfDeath_score=5)} +\item{HP:0100613 }{"Death in early adulthood" (AgeOfDeath_score=6)} +\item{HP:0033764 }{"Death in middle age" (AgeOfDeath_score=7)} +\item{HP:0033763 }{"Death in adulthood" (AgeOfDeath_score=7)} +\item{HP:0033765 }{"Death in late adulthood" (AgeOfDeath_score=8)} +} + +\item \code{add_disease()}: add_ +Add diseases + +Annotate each HPO term with diseases that they are associated with. + +\item \code{add_disease_genes()}: add_ +Add disease genes + +Add genes that overlap between an HPO ID and an associated phenotype. + +\item \code{add_evidence()}: add_ +Add evidence + +Add the strength of evidence supporting each gene-disease association. +\href{https://thegencc.org/faq.html#validity-termsdelphi-survey}{ +Delphi survey} evidence classification IDs and +assigned "evidence_score" values: +\itemize{ +\item{GENCC:100001 }{"Definitive" (evidence_score=6)} +\item{GENCC:100002 }{"Strong" (evidence_score=5)} +\item{GENCC:100003 }{"Moderate" (evidence_score=4)} +\item{GENCC:100009 }{"Supportive" (evidence_score=3)} +\item{GENCC:100004 }{"Limited" (evidence_score=2)} +\item{GENCC:100005 }{"Disputed Evidence" (evidence_score=1)} +\item{GENCC:100006 }{"Refuted Evidence" (evidence_score=0)} +\item{GENCC:100008 }{"No Known Disease Relationship" (evidence_score=0)} +} + +\item \code{add_gene_frequency()}: add_ +Add gene frequency + +Add gene-level frequency, i.e. how often mutations in a given gene +are associated with a given phenotype. +Numeric frequency columns are on a 0-100\% scale. + +\item \code{add_genes()}: add_ +Add genes + +Add genes associated with each phenotype +(in the context of a particular disease). + +\item \code{add_hpo_definition()}: add_ +Get term definition + +This function accesses the HPO API to get a description/definition of an +HPO term. If a \code{line_length} \> 0 is passed to the function, it will add +newlines every nth word. This can be useful when displaying the description +in plots with limited space. + +\item \code{add_hpo_id()}: add_ +Add HPO ID column to dataframe + +Adds the HPO term ID column "hpo_id". + +\item \code{add_info_content()}: add_ +Add information content + +Add a column containing the information content score for each HPO ID. + +\item \code{add_mondo()}: add_ +Add Mondo metadata + +Add Mondo metadata (MONDO ID mappings, names, and definitions) for diseases +using files from their respective databases: +e.g. OMIM, DECIPHER, Orphanet. + +\item \code{add_ndisease()}: add_ +Add N diseases + +Annotate each HPO term with the total number of disease +they are associated with. + +\item \code{add_omop()}: add_ +Add OMOP + +Add metadata from \href{https://mondo.monarchinitiative.org/}{MONDO}, +including: +\itemize{ +\item{mondo_id: }{MONDO term ID.} +\item{mondo_name: }{MONDO term name.} +\item{mondo_def: }{MONDO term definition.} +} + +\item \code{add_onset()}: add_ +Add age of onset + +Add age of onset for each HPO ID. +onset IDs and assigned "onset_score" values: +\itemize{ +\item{HP:0011461 }{"Fetal onset" (onset_score=1)} +\item{HP:0030674 }{"Antenatal onset" (onset_score=2)} +\item{HP:0003577 }{"Congenital onset" (onset_score=3)} +\item{HP:0003623 }{"Neonatal onset" (onset_score=4)} +\item{HP:0003593 }{"Infantile onset" (onset_score=5)} +\item{HP:0011463 }{"Childhood onset" (onset_score=6)} +\item{HP:0003621 }{"Juvenile onset" (onset_score=7)} +\item{HP:0011462 }{"Young adult onset" (onset_score=8)} +\item{HP:0003581 }{"Adult onset" (onset_score=9)} +\item{HP:0003596 }{"Middle age onset" (onset_score=10)} +\item{HP:0003584 }{"Late onset" (onset_score=11)} +} + +\item \code{add_ont_lvl()}: add_ +Add ontology level + +Add the relative ontology level for each HPO ID. + +\item \code{add_pheno_frequency()}: add_ +Add phenotype frequency + +Add phenotype-level frequency, i.e. +how often a phenotype occurs in a given disease. + +\item \code{add_prevalence()}: add_ +Add prevalence + +Add a column containing the prevalence score for each disease ("disease_id") +or phenotype ("hpo_id"). + +\item \code{add_severity()}: add_ +Add HPO modifiers + +Annotate each HPO with modifier terms, including +(but not limited to) progression and severity ratings. +In order of increasing severity: +\itemize{ +\item{HP:0012825 }{"Mild" (Severity_score=4)} +\item{HP:0012827 }{"Borderline" (Severity_score=3)} +\item{HP:0012828 }{"Severe" (Severity_score=2)} +\item{HP:0012829}{"Profound" (Severity_score=1)} +} + +\item \code{add_tier()}: add_ +Add severity Tiers + +Add severity Tier for each HPO ID, in accordance with the +rating system provided by +\href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4262393/}{ +Lazarin et al (2014)}. +In order of increasing severity: +\itemize{ +\item{Tier 4 }{Reduced fertility} +\item{Tier 3 }{Sensory impairment: vision, +Immunodeficiency/cancer, +Sensory impairment: hearing, +Sensory impairment: touch, other (including pain), +Mental illness, +Dysmorphic features} +\item{Tier 2 }{Shortened life span: premature adulthood, +Impaired mobility, +Internal physical malformation} +\item{Tier 1 }{Shortened life span: infancy, +Shortened life span: childhood/adolescence, +Intellectual disability} +} + +}} +\examples{ +phenos <- example_phenos() +phenos2 <- add_ancestor(phenos = phenos, lvl=5) +phenos <- example_phenos() +phenos2 <- add_death(phenos = phenos) +phenos <- example_phenos() +phenos2 <- add_disease(phenos = phenos) +\dontrun{ +phenos <- load_phenotype_to_genes() +phenos2 <- add_severity(phenos = phenos) +} +phenos <- load_phenotype_to_genes() +phenos2 <- add_evidence(phenos = phenos) +phenotype_to_genes <- load_phenotype_to_genes()[seq(1000),] +phenos2 <- add_gene_frequency(phenotype_to_genes = phenotype_to_genes) +phenos <- example_phenos() +phenos2 <- add_genes(phenos = phenos) +phenos <- example_phenos() +phenos2 <- add_hpo_definition(phenos = phenos) +phenotype_to_genes <- load_phenotype_to_genes() +phenos <- unique(phenotype_to_genes[,c("hpo_id","hpo_name")]) +phenos2 <- add_hpo_id(phenos=phenos) +phenos <- example_phenos() +phenos2 <- add_info_content(phenos = phenos) +phenos <- load_phenotype_to_genes(3)[seq(1000)] +phenos2 <- add_mondo(phenos = phenos) +phenos <- example_phenos() +phenos2 <- add_ndisease(phenos = phenos) +phenos <- example_phenos() +phenos2 <- add_omop(phenos = phenos) +phenos <- example_phenos() +phenos2 <- add_onset(phenos = phenos) +phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") +phenos2 <- add_ont_lvl(phenos = phenos) +phenos <- example_phenos() +phenos2 <- add_pheno_frequency(phenos = phenos) +phenos <- example_phenos() +phenos2 <- add_prevalence(phenos = phenos) +phenos <- example_phenos() +phenos2 <- add_severity(phenos = phenos) +phenos <- example_phenos() +phenos2 <- add_tier(phenos = phenos) +} +\concept{add_} diff --git a/man/add_ancestor.Rd b/man/add_ancestor.Rd deleted file mode 100644 index 0f0b562..0000000 --- a/man/add_ancestor.Rd +++ /dev/null @@ -1,47 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_ancestor.R -\name{add_ancestor} -\alias{add_ancestor} -\title{Add ancestor} -\usage{ -add_ancestor( - phenos, - lvl = 3, - hpo = get_hpo(), - remove_descendants = NULL, - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{lvl}{How many levels deep into the ontology to get ancestors from. -For example: -\itemize{ -\item{1: }{"All"} -\item{2: }{"Phenotypic abnormality"} -\item{3: }{"Abnormality of the nervous system"} -\item{4: }{"Abnormality of nervous system physiology"} -\item{5: }{"Neurodevelopmental abnormality" or "Behavioral abnormality"} -}} - -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{remove_descendants}{Remove HPO terms that are descendants of a given -ancestral HPO term. Ancestral terms be provided as a character vector of -phenotype names (e.g. \code{c("Clinical course")}), -HPO IDs (e.g. \code{"HP:0031797" }) or a mixture of the two. -See \link[HPOExplorer]{add_ancestor} for details.} - -\item{verbose}{Print messages.} -} -\value{ -phenos data.table with extra column -} -\description{ -Assign each HPO ID to the higher-order ancestral term that it is part of. -} -\examples{ -phenos <- example_phenos() -phenos2 <- add_ancestor(phenos = phenos, lvl=5) -} diff --git a/man/add_death.Rd b/man/add_death.Rd deleted file mode 100644 index 1e247f5..0000000 --- a/man/add_death.Rd +++ /dev/null @@ -1,72 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_death.R -\name{add_death} -\alias{add_death} -\title{Add age of death} -\usage{ -add_death( - phenos, - keep_deaths = NULL, - all.x = TRUE, - allow.cartesian = FALSE, - agg_by = NULL, - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{keep_deaths}{The age of death associated with each HPO ID to keep. -If >1 age of death is associated with the term, -only the earliest age is considered. -See \link[HPOExplorer]{add_death} for details.} - -\item{all.x}{logical; if \code{TRUE}, then extra rows will be added to the -output, one for each row in \code{x} that has no matching row in \code{y}. -These rows will have 'NA's in those columns that are usually filled with values -from \code{y}. The default is \code{FALSE}, so that only rows with data from both -\code{x} and \code{y} are included in the output.} - -\item{allow.cartesian}{See \code{allow.cartesian} in \code{\link[data.table]{[.data.table}}.} - -\item{agg_by}{Column to aggregate age of death metadata by.} - -\item{verbose}{Print messages.} -} -\value{ -phenos data.table with extra columns: -\itemize{ -\item{"AgeOfDeath": }{AgeOfDeath HPO IDs of disease phenotypes associated -with the target hpo_id phenotype.} -\item{"AgeOfDeath_names": }{AgeOfDeath HPO names of disease phenotypes -associated with the target hpo_id phenotype.} -\item{"AgeOfDeath_counts": }{The number of times each term in -"AgeOfDeath_names" appears across associated disease phenotypes.} -\item{"AgeOfDeath_score_mean": }{Mean age of death score.} -\item{"AgeOfDeath_score_min": }{Minimum age of death score.} -\item{"AgeOfDeath_top": }{The most common age of death term.} -\item{"AgeOfDeath_earliest": }{The earliest age of death.} -\item{"AgeOfDeath_latest": }{The latest age of death.} -} -} -\description{ -Add age of death for each HPO ID. -AgeOfDeath IDs and assigned "AgeOfDeath_score" values: -\itemize{ -\item{HP:0005268 }{"Miscarriage" (AgeOfDeath_score=1)} -\item{HP:0003826 }{"Stillbirth" (AgeOfDeath_score=1)} -\item{HP:0034241 }{"Prenatal death" (AgeOfDeath_score=1)} -\item{HP:0003811 }{"Neonatal death" (AgeOfDeath_score=2)} -\item{HP:0001522 }{"Death in infancy" (AgeOfDeath_score=3)} -\item{HP:0003819 }{"Death in childhood" (AgeOfDeath_score=4)} -\item{HP:0011421 }{"Death in adolescence" (AgeOfDeath_score=5)} -\item{HP:0100613 }{"Death in early adulthood" (AgeOfDeath_score=6)} -\item{HP:0033764 }{"Death in middle age" (AgeOfDeath_score=7)} -\item{HP:0033763 }{"Death in adulthood" (AgeOfDeath_score=7)} -\item{HP:0033765 }{"Death in late adulthood" (AgeOfDeath_score=8)} -} -} -\examples{ -phenos <- example_phenos() -phenos2 <- add_death(phenos = phenos) -} diff --git a/man/add_disease.Rd b/man/add_disease.Rd deleted file mode 100644 index e16c491..0000000 --- a/man/add_disease.Rd +++ /dev/null @@ -1,51 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_disease.R -\name{add_disease} -\alias{add_disease} -\title{Add diseases} -\usage{ -add_disease( - phenos, - extra_cols = NULL, - include_mondo = TRUE, - all.x = TRUE, - allow.cartesian = FALSE, - add_definitions = FALSE, - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{extra_cols}{Extra metadata columns from the"phenotype.hpoa" -annotations file to include. -See -\href{https://hpo-annotation-qc.readthedocs.io/en/latest/annotationFormat.html}{ -here for column descriptions}.} - -\item{include_mondo}{Add IDs/names/definitions from MONDO via -\link[HPOExplorer]{add_mondo}.} - -\item{all.x}{logical; if \code{TRUE}, then extra rows will be added to the -output, one for each row in \code{x} that has no matching row in \code{y}. -These rows will have 'NA's in those columns that are usually filled with values -from \code{y}. The default is \code{FALSE}, so that only rows with data from both -\code{x} and \code{y} are included in the output.} - -\item{allow.cartesian}{See \code{allow.cartesian} in \code{\link[data.table]{[.data.table}}.} - -\item{add_definitions}{Add disease definitions using -\link[HPOExplorer]{add_disease_definition}.} - -\item{verbose}{Print messages.} -} -\value{ -phenos data.table with extra columns -} -\description{ -Annotate each HPO term with diseases that they are associated with. -} -\examples{ -phenos <- example_phenos() -phenos2 <- add_disease(phenos = phenos) -} diff --git a/man/add_disease_definition.Rd b/man/add_disease_definition.Rd deleted file mode 100644 index e278985..0000000 --- a/man/add_disease_definition.Rd +++ /dev/null @@ -1,47 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_disease_definition.R -\name{add_disease_definition} -\alias{add_disease_definition} -\title{Add disease definition} -\usage{ -add_disease_definition( - phenos, - cols = c("Definitions", "Preferred.Label"), - id_col = "disease_id", - save_dir = file.path(tools::R_user_dir("HPOExplorer", which = "cache")), - include_mondo = TRUE, - all.x = TRUE, - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{cols}{Metadata columns to include.} - -\item{id_col}{Name of the disease ID column.} - -\item{save_dir}{Directory to save metadata files to.} - -\item{include_mondo}{Add IDs/names/definitions from MONDO via -\link[HPOExplorer]{add_mondo}.} - -\item{all.x}{logical; if \code{TRUE}, then extra rows will be added to the -output, one for each row in \code{x} that has no matching row in \code{y}. -These rows will have 'NA's in those columns that are usually filled with values -from \code{y}. The default is \code{FALSE}, so that only rows with data from both -\code{x} and \code{y} are included in the output.} - -\item{verbose}{Print messages.} -} -\value{ -phenos data.table with extra columns. -} -\description{ -Add metadata for diseases using files from their respective databases: -OMIM, DECIPHER, Orphanet. -} -\examples{ -phenos <- load_phenotype_to_genes(3)[seq(1000)] -phenos2 <- add_disease_definition(phenos = phenos) -} diff --git a/man/add_disease_genes.Rd b/man/add_disease_genes.Rd deleted file mode 100644 index c11ed68..0000000 --- a/man/add_disease_genes.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_disease_genes.R -\name{add_disease_genes} -\alias{add_disease_genes} -\title{Add disease genes} -\usage{ -add_disease_genes(phenos, all.x = TRUE, verbose = TRUE) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{all.x}{logical; if \code{TRUE}, then extra rows will be added to the -output, one for each row in \code{x} that has no matching row in \code{y}. -These rows will have 'NA's in those columns that are usually filled with values -from \code{y}. The default is \code{FALSE}, so that only rows with data from both -\code{x} and \code{y} are included in the output.} - -\item{verbose}{Print messages.} -} -\value{ -phenos data.table with extra columns -} -\description{ -Add genes that overlap between an HPO ID and an associated phenotype. -} -\examples{ -\dontrun{ -phenos <- load_phenotype_to_genes() -phenos2 <- add_severity(phenos = phenos) -} -} diff --git a/man/add_evidence.Rd b/man/add_evidence.Rd deleted file mode 100644 index 6e4b7da..0000000 --- a/man/add_evidence.Rd +++ /dev/null @@ -1,66 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_evidence.R -\name{add_evidence} -\alias{add_evidence} -\title{Add evidence} -\usage{ -add_evidence( - phenos, - keep_evidence = NULL, - all.x = TRUE, - allow.cartesian = FALSE, - agg_by = c("disease_id", "gene_symbol"), - default_score = 1, - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{keep_evidence}{The evidence scores of each gene-disease -association to keep.} - -\item{all.x}{logical; if \code{TRUE}, then extra rows will be added to the -output, one for each row in \code{x} that has no matching row in \code{y}. -These rows will have 'NA's in those columns that are usually filled with values -from \code{y}. The default is \code{FALSE}, so that only rows with data from both -\code{x} and \code{y} are included in the output.} - -\item{allow.cartesian}{See \code{allow.cartesian} in \code{\link[data.table]{[.data.table}}.} - -\item{agg_by}{Column to aggregate metadata by.} - -\item{default_score}{Default evidence score to -apply to gene-disease associations that are present in the HPO annotations -but don't have evidence scores in the GenCC annotations.} - -\item{verbose}{Print messages.} -} -\value{ -phenos data.table with extra columns: -\itemize{ -\item{"evidence_score_min": }{Minimum evidence score.} -\item{"evidence_score_max": }{Maximum evidence score.} -\item{"evidence_score_mean": }{Mean evidence score.} -} -} -\description{ -Add the strength of evidence supporting each gene-disease association. -\href{https://thegencc.org/faq.html#validity-termsdelphi-survey}{ -Delphi survey} evidence classification IDs and -assigned "evidence_score" values: -\itemize{ -\item{GENCC:100001 }{"Definitive" (evidence_score=6)} -\item{GENCC:100002 }{"Strong" (evidence_score=5)} -\item{GENCC:100003 }{"Moderate" (evidence_score=4)} -\item{GENCC:100009 }{"Supportive" (evidence_score=3)} -\item{GENCC:100004 }{"Limited" (evidence_score=2)} -\item{GENCC:100005 }{"Disputed Evidence" (evidence_score=1)} -\item{GENCC:100006 }{"Refuted Evidence" (evidence_score=0)} -\item{GENCC:100008 }{"No Known Disease Relationship" (evidence_score=0)} -} -} -\examples{ -phenos <- load_phenotype_to_genes() -phenos2 <- add_evidence(phenos = phenos) -} diff --git a/man/add_gene_frequency.Rd b/man/add_gene_frequency.Rd deleted file mode 100644 index 2d669b0..0000000 --- a/man/add_gene_frequency.Rd +++ /dev/null @@ -1,45 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_gene_frequency.R -\name{add_gene_frequency} -\alias{add_gene_frequency} -\title{Add gene frequency} -\usage{ -add_gene_frequency( - phenotype_to_genes = load_phenotype_to_genes(), - gene_frequency_threshold = NULL, - all.x = TRUE, - verbose = TRUE -) -} -\arguments{ -\item{phenotype_to_genes}{Output of -\link[HPOExplorer]{load_phenotype_to_genes} mapping phenotypes -to gene annotations.} - -\item{gene_frequency_threshold}{Only keep genes with frequency -above the set threshold. Frequency ranges from 0-100 where 100 is -a gene that occurs 100\% of the time in a given phenotype. -Include \code{NA} if you wish to retain genes that -do not have any frequency data. -See \link[HPOExplorer]{add_gene_frequency} for details.} - -\item{all.x}{logical; if \code{TRUE}, then extra rows will be added to the -output, one for each row in \code{x} that has no matching row in \code{y}. -These rows will have 'NA's in those columns that are usually filled with values -from \code{y}. The default is \code{FALSE}, so that only rows with data from both -\code{x} and \code{y} are included in the output.} - -\item{verbose}{Print messages.} -} -\value{ -phenos data.table with extra column -} -\description{ -Add gene-level frequency, i.e. how often mutations in a given gene -are associated with a given phenotype. -Numeric frequency columns are on a 0-100\% scale. -} -\examples{ -phenotype_to_genes <- load_phenotype_to_genes()[seq(1000),] -phenos2 <- add_gene_frequency(phenotype_to_genes = phenotype_to_genes) -} diff --git a/man/add_genes.Rd b/man/add_genes.Rd deleted file mode 100644 index c11864f..0000000 --- a/man/add_genes.Rd +++ /dev/null @@ -1,50 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_genes.R -\name{add_genes} -\alias{add_genes} -\title{Add genes} -\usage{ -add_genes( - phenos = NULL, - phenotype_to_genes = load_phenotype_to_genes(), - hpo = get_hpo(), - by = c("hpo_id", "disease_id"), - gene_col = "gene_symbol", - all.x = FALSE, - allow.cartesian = FALSE, - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{phenotype_to_genes}{Output of -\link[HPOExplorer]{load_phenotype_to_genes} mapping phenotypes -to gene annotations.} - -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{by}{A vector of shared column names in \code{x} and \code{y} to merge on. -This defaults to the shared key columns between the two tables. -If \code{y} has no key columns, this defaults to the key of \code{x}.} - -\item{gene_col}{Name of the gene column.} - -\item{all.x}{logical; if \code{TRUE}, then extra rows will be added to the -output, one for each row in \code{x} that has no matching row in \code{y}. -These rows will have 'NA's in those columns that are usually filled with values -from \code{y}. The default is \code{FALSE}, so that only rows with data from both -\code{x} and \code{y} are included in the output.} - -\item{allow.cartesian}{See \code{allow.cartesian} in \code{\link[data.table]{[.data.table}}.} - -\item{verbose}{Print messages.} -} -\description{ -Add genes associated with each phenotype -(in the context of a particular disease). -} -\examples{ -phenos <- example_phenos() -phenos2 <- add_genes(phenos = phenos) -} diff --git a/man/add_hpo_definition.Rd b/man/add_hpo_definition.Rd deleted file mode 100644 index ec057ee..0000000 --- a/man/add_hpo_definition.Rd +++ /dev/null @@ -1,36 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_hpo_definition.R -\name{add_hpo_definition} -\alias{add_hpo_definition} -\title{Get term definition} -\usage{ -add_hpo_definition( - phenos, - line_length = FALSE, - use_api = FALSE, - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{line_length}{The number of desired words per line \} - -\item{use_api}{Get definitions from the HPO API, -as opposed to a static local dataset.} - -\item{verbose}{Print messages.} -} -\value{ -A named vector of HPO term descriptions. -} -\description{ -This function accesses the HPO API to get a description/definition of an -HPO term. If a \code{line_length} \> 0 is passed to the function, it will add -newlines every nth word. This can be useful when displaying the description -in plots with limited space. -} -\examples{ -phenos <- example_phenos() -phenos2 <- add_hpo_definition(phenos = phenos) -} diff --git a/man/add_hpo_id.Rd b/man/add_hpo_id.Rd deleted file mode 100644 index 3185612..0000000 --- a/man/add_hpo_id.Rd +++ /dev/null @@ -1,35 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_hpo_id.R -\name{add_hpo_id} -\alias{add_hpo_id} -\title{Add HPO ID column to dataframe} -\usage{ -add_hpo_id(phenos, hpo = get_hpo(), phenotype_to_genes = NULL, verbose = FALSE) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{phenotype_to_genes}{Output of -\link[HPOExplorer]{load_phenotype_to_genes} mapping phenotypes -to gene annotations.} - -\item{verbose}{Print messages.} -} -\value{ -The subset of EWCE results dataframe with a HPO Id column added. -} -\description{ -This adds the HPO term id column to the subset of ewce results data -to be plotted -in the cell select app. It also checks if it is a valid HPO term id to -prevent error and adds -a boolean column where TRUE if term is valid. If the HPO Id is not correct, -it caused an error in the ontologyPlot package. -} -\examples{ -phenotype_to_genes <- load_phenotype_to_genes() -phenos <- unique(phenotype_to_genes[,c("hpo_id","hpo_name")]) -phenos2 <- add_hpo_id(phenos=phenos) -} diff --git a/man/add_hpo_synonym.Rd b/man/add_hpo_synonym.Rd deleted file mode 100644 index ea5a0a4..0000000 --- a/man/add_hpo_synonym.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_hpo_synonym.R -\name{add_hpo_synonym} -\alias{add_hpo_synonym} -\title{Add HPO ID synonyms} -\usage{ -add_hpo_synonym( - phenos, - to = "UMLS", - to_col = paste0(to, "_ID"), - hpo = get_hpo(), - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{to}{The ontology to map to. Default is \code{UMLS}.} - -\item{to_col}{The name of the new column to add the synonyms to.} - -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{verbose}{Print messages.} -} -\value{ -A data.table with the HPO ID synonyms in a new column with -the same name as \code{to}. -} -\description{ -Map HPO IDs to synonynmous IDs from other ontologies -(e.g. UMLS, SNOMEDCT_US, MSH, ICD-10) -} -\examples{ -phenos <- HPOExplorer::example_phenos() -phenos2 <- add_hpo_synonym(phenos) -} diff --git a/man/add_info_content.Rd b/man/add_info_content.Rd deleted file mode 100644 index c5f1a50..0000000 --- a/man/add_info_content.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_info_content.R -\name{add_info_content} -\alias{add_info_content} -\title{Add information content} -\usage{ -add_info_content(phenos, hpo = get_hpo(), verbose = TRUE) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{verbose}{Print messages.} -} -\value{ -phenos data.table with extra column -} -\description{ -Add a column containing the information content score for each HPO ID. -} -\examples{ -phenos <- example_phenos() -phenos2 <- add_info_content(phenos = phenos) -} diff --git a/man/add_mondo.Rd b/man/add_mondo.Rd deleted file mode 100644 index 7cb4206..0000000 --- a/man/add_mondo.Rd +++ /dev/null @@ -1,46 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_mondo.R -\name{add_mondo} -\alias{add_mondo} -\title{Add MONDO} -\usage{ -add_mondo( - phenos, - id_col = "disease_id", - add_definitions = TRUE, - all.x = TRUE, - allow.cartesian = FALSE, - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{id_col}{ID column to map to MONDO IDs.} - -\item{add_definitions}{Add disease definitions using -\link[HPOExplorer]{add_disease_definition}.} - -\item{all.x}{logical; if \code{TRUE}, then extra rows will be added to the -output, one for each row in \code{x} that has no matching row in \code{y}. -These rows will have 'NA's in those columns that are usually filled with values -from \code{y}. The default is \code{FALSE}, so that only rows with data from both -\code{x} and \code{y} are included in the output.} - -\item{allow.cartesian}{See \code{allow.cartesian} in \code{\link[data.table]{[.data.table}}.} - -\item{verbose}{Print messages.} -} -\description{ -Add metadata from \href{https://mondo.monarchinitiative.org/}{MONDO}, -including: -\itemize{ -\item{MONDO_ID: }{MONDO term ID.} -\item{MONDO_name: }{MONDO term name.} -\item{MONDO_definition: }{MONDO term definition.} -} -} -\examples{ -phenos <- example_phenos() -phenos2 <- add_mondo(phenos = phenos) -} diff --git a/man/add_ndisease.Rd b/man/add_ndisease.Rd deleted file mode 100644 index 5b0acfd..0000000 --- a/man/add_ndisease.Rd +++ /dev/null @@ -1,41 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_ndisease.R -\name{add_ndisease} -\alias{add_ndisease} -\title{Add N diseases} -\usage{ -add_ndisease( - phenos, - pheno_ndiseases_threshold = NULL, - all.x = TRUE, - allow.cartesian = FALSE, - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{pheno_ndiseases_threshold}{Filter phenotypes by the -maximum number of diseases they are associated with.} - -\item{all.x}{logical; if \code{TRUE}, then extra rows will be added to the -output, one for each row in \code{x} that has no matching row in \code{y}. -These rows will have 'NA's in those columns that are usually filled with values -from \code{y}. The default is \code{FALSE}, so that only rows with data from both -\code{x} and \code{y} are included in the output.} - -\item{allow.cartesian}{See \code{allow.cartesian} in \code{\link[data.table]{[.data.table}}.} - -\item{verbose}{Print messages.} -} -\value{ -phenos data.table with extra columns -} -\description{ -Annotate each HPO term with the total number of disease -they are associated with. -} -\examples{ -phenos <- example_phenos() -phenos2 <- add_ndisease(phenos = phenos) -} diff --git a/man/add_omop.Rd b/man/add_omop.Rd deleted file mode 100644 index 5e138ad..0000000 --- a/man/add_omop.Rd +++ /dev/null @@ -1,46 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_omop.R -\name{add_omop} -\alias{add_omop} -\title{Add MONDO} -\usage{ -add_omop( - phenos, - id_col = "hpo_id", - all.x = TRUE, - allow.cartesian = FALSE, - force_new = FALSE, - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{id_col}{ID column to map to MONDO IDs.} - -\item{all.x}{logical; if \code{TRUE}, then extra rows will be added to the -output, one for each row in \code{x} that has no matching row in \code{y}. -These rows will have 'NA's in those columns that are usually filled with values -from \code{y}. The default is \code{FALSE}, so that only rows with data from both -\code{x} and \code{y} are included in the output.} - -\item{allow.cartesian}{See \code{allow.cartesian} in \code{\link[data.table]{[.data.table}}.} - -\item{force_new}{Force a new query to the OARD API instead of -using pre-downloaded data.} - -\item{verbose}{Print messages.} -} -\description{ -Add metadata from \href{https://mondo.monarchinitiative.org/}{MONDO}, -including: -\itemize{ -\item{MONDO_ID: }{MONDO term ID.} -\item{MONDO_name: }{MONDO term name.} -\item{MONDO_definition: }{MONDO term definition.} -} -} -\examples{ -phenos <- example_phenos() -phenos2 <- add_omop(phenos = phenos) -} diff --git a/man/add_onset.Rd b/man/add_onset.Rd deleted file mode 100644 index 23f4ab0..0000000 --- a/man/add_onset.Rd +++ /dev/null @@ -1,72 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_onset.R -\name{add_onset} -\alias{add_onset} -\title{Add age of onset} -\usage{ -add_onset( - phenos, - keep_onsets = NULL, - agg_by = NULL, - all.x = TRUE, - allow.cartesian = FALSE, - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{keep_onsets}{The age of onset associated with each HPO ID to keep. -If >1 age of onset is associated with the term, -only the earliest age is considered. -See \link[HPOExplorer]{add_onset} for details.} - -\item{agg_by}{Column to aggregate age of onset metadata by.} - -\item{all.x}{logical; if \code{TRUE}, then extra rows will be added to the -output, one for each row in \code{x} that has no matching row in \code{y}. -These rows will have 'NA's in those columns that are usually filled with values -from \code{y}. The default is \code{FALSE}, so that only rows with data from both -\code{x} and \code{y} are included in the output.} - -\item{allow.cartesian}{See \code{allow.cartesian} in \code{\link[data.table]{[.data.table}}.} - -\item{verbose}{Print messages.} -} -\value{ -phenos data.table with extra columns: -\itemize{ -\item{"onset": }{onset HPO IDs of disease phenotypes associated -with the target hpo_id phenotype.} -\item{"onset_names": }{onset HPO names of disease phenotypes associated -with the target hpo_id phenotype.} -\item{"onset_counts": }{The number of times each term in -"onset_names" appears across associated disease phenotypes.} -\item{"onset_score_mean": }{Mean onset score.} -\item{"onset_score_min": }{Minimum onset score.} -\item{"onset_top": }{The most common onset term.} -\item{"onset_earliest": }{The earliest age of onset.} -\item{"onset_latest": }{The latest age of onset.} -} -} -\description{ -Add age of onset for each HPO ID. -onset IDs and assigned "onset_score" values: -\itemize{ -\item{HP:0011461 }{"Fetal onset" (onset_score=1)} -\item{HP:0030674 }{"Antenatal onset" (onset_score=2)} -\item{HP:0003577 }{"Congenital onset" (onset_score=3)} -\item{HP:0003623 }{"Neonatal onset" (onset_score=4)} -\item{HP:0003593 }{"Infantile onset" (onset_score=5)} -\item{HP:0011463 }{"Childhood onset" (onset_score=6)} -\item{HP:0003621 }{"Juvenile onset" (onset_score=7)} -\item{HP:0011462 }{"Young adult onset" (onset_score=8)} -\item{HP:0003581 }{"Adult onset" (onset_score=9)} -\item{HP:0003596 }{"Middle age onset" (onset_score=10)} -\item{HP:0003584 }{"Late onset" (onset_score=11)} -} -} -\examples{ -phenos <- example_phenos() -phenos2 <- add_onset(phenos = phenos) -} diff --git a/man/add_ont_lvl.Rd b/man/add_ont_lvl.Rd deleted file mode 100644 index 5c02607..0000000 --- a/man/add_ont_lvl.Rd +++ /dev/null @@ -1,55 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_ont_lvl.R -\name{add_ont_lvl} -\alias{add_ont_lvl} -\title{Add ontology level} -\usage{ -add_ont_lvl( - phenos, - hpo = get_hpo(), - adjacency = NULL, - absolute = TRUE, - exclude_top_lvl = TRUE, - keep_ont_levels = NULL, - reverse = TRUE, - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{adjacency}{An adjacency matrix generated -by \link[HPOExplorer]{adjacency_matrix}.} - -\item{absolute}{Make the levels absolute in the sense that they consider -the entire HPO ontology (\code{TRUE}). -Otherwise, levels will be relative to only the HPO terms that are in -the provided subset of \code{terms} AND are directly adjacent (connected) -to a given cluster of terms (\code{FALSE}).} - -\item{exclude_top_lvl}{Exclude the top level term of the HPO -(i.e. "All" (HP:0000001)) when computing \emph{absolute} levels. -This argument is not used when computing \emph{relative} levels.} - -\item{keep_ont_levels}{Only keep phenotypes at certain \emph{absolute} -ontology levels to keep. -See \link[HPOExplorer]{add_ont_lvl} for details.} - -\item{reverse}{If \code{TRUE}, ontology -level numbers with be revered such that the level of the parent terms -are larger than the child terms.} - -\item{verbose}{Print messages.} -} -\value{ -phenos data.table with extra column -} -\description{ -Add the relative ontology level for each HPO ID. -} -\examples{ -phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") -phenos2 <- add_ont_lvl(phenos = phenos) -} diff --git a/man/add_pheno_frequency.Rd b/man/add_pheno_frequency.Rd deleted file mode 100644 index 9305c49..0000000 --- a/man/add_pheno_frequency.Rd +++ /dev/null @@ -1,45 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_pheno_frequency.R -\name{add_pheno_frequency} -\alias{add_pheno_frequency} -\title{Add phenotype frequency} -\usage{ -add_pheno_frequency( - phenos, - pheno_frequency_threshold = NULL, - all.x = TRUE, - allow.cartesian = FALSE, - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{pheno_frequency_threshold}{Only keep phenotypes with frequency -above the set threshold. Frequency ranges from 0-100 where 100 is -a phenotype that occurs 100\% of the time in all associated diseases. -Include \code{NA} if you wish to retain phenotypes that -do not have any frequency data. -See \link[HPOExplorer]{add_pheno_frequency} for details.} - -\item{all.x}{logical; if \code{TRUE}, then extra rows will be added to the -output, one for each row in \code{x} that has no matching row in \code{y}. -These rows will have 'NA's in those columns that are usually filled with values -from \code{y}. The default is \code{FALSE}, so that only rows with data from both -\code{x} and \code{y} are included in the output.} - -\item{allow.cartesian}{See \code{allow.cartesian} in \code{\link[data.table]{[.data.table}}.} - -\item{verbose}{Print messages.} -} -\value{ -phenos data.table with extra column -} -\description{ -Add phenotype-level frequency, i.e. -how often a phenotype occurs in a given disease. -} -\examples{ -phenos <- example_phenos() -phenos2 <- add_pheno_frequency(phenos = phenos) -} diff --git a/man/add_prevalence.Rd b/man/add_prevalence.Rd deleted file mode 100644 index 6f7ca57..0000000 --- a/man/add_prevalence.Rd +++ /dev/null @@ -1,36 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_prevalence.R -\name{add_prevalence} -\alias{add_prevalence} -\title{Add prevalence} -\usage{ -add_prevalence( - phenos, - id_col = "disease_id", - drop_na = TRUE, - verbose = TRUE, - method = "orphanet" -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{id_col}{Name of the column containing the disease or phenotype IDs.} - -\item{drop_na}{Whether to drop rows with missing prevalence data.} - -\item{verbose}{Print messages.} - -\item{method}{One of "orphanet" or "oard".} -} -\value{ -phenos data.table with extra column -} -\description{ -Add a column containing the prevalence score for each disease ("disease_id") -or phenotype ("hpo_id"). -} -\examples{ -phenos <- example_phenos() -phenos2 <- add_prevalence(phenos = phenos) -} diff --git a/man/add_severity.Rd b/man/add_severity.Rd deleted file mode 100644 index 44d3328..0000000 --- a/man/add_severity.Rd +++ /dev/null @@ -1,54 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_severity.R -\name{add_severity} -\alias{add_severity} -\title{Add HPO modifiers} -\usage{ -add_severity( - phenos, - hpo = get_hpo(), - all.x = TRUE, - allow.cartesian = FALSE, - severity_threshold = NULL, - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{all.x}{logical; if \code{TRUE}, then extra rows will be added to the -output, one for each row in \code{x} that has no matching row in \code{y}. -These rows will have 'NA's in those columns that are usually filled with values -from \code{y}. The default is \code{FALSE}, so that only rows with data from both -\code{x} and \code{y} are included in the output.} - -\item{allow.cartesian}{See \code{allow.cartesian} in \code{\link[data.table]{[.data.table}}.} - -\item{severity_threshold}{Only keep phenotypes with a mean -severity score (averaged across multiple associated diseases) below the -set threshold. The severity score ranges from 1-4 where 1 is the MOST severe. -Include \code{NA} if you wish to retain phenotypes that -do not have any severity score.} - -\item{verbose}{Print messages.} -} -\value{ -phenos data.table with extra columns -} -\description{ -Annotate each HPO with modifier terms, including -(but not limited to) progression and severity ratings. -In order of increasing severity: -\itemize{ -\item{HP:0012825 }{"Mild" (Severity_score=4)} -\item{HP:0012827 }{"Borderline" (Severity_score=3)} -\item{HP:0012828 }{"Severe" (Severity_score=2)} -\item{HP:0012829}{"Profound" (Severity_score=1)} -} -} -\examples{ -phenos <- example_phenos() -phenos2 <- add_severity(phenos = phenos) -} diff --git a/man/add_tier.Rd b/man/add_tier.Rd deleted file mode 100644 index eb347d6..0000000 --- a/man/add_tier.Rd +++ /dev/null @@ -1,69 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_tier.R -\name{add_tier} -\alias{add_tier} -\title{Add severity Tiers} -\usage{ -add_tier( - phenos, - all.x = TRUE, - include_disease_characteristics = TRUE, - auto_assign = TRUE, - hpo = get_hpo(), - keep_tiers = NULL, - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{all.x}{logical; if \code{TRUE}, then extra rows will be added to the -output, one for each row in \code{x} that has no matching row in \code{y}. -These rows will have 'NA's in those columns that are usually filled with values -from \code{y}. The default is \code{FALSE}, so that only rows with data from both -\code{x} and \code{y} are included in the output.} - -\item{include_disease_characteristics}{Include \code{phenotypes} that -are also high-level \code{include_disease_characteristics}.} - -\item{auto_assign}{Automatically assing HPO IDs to Tiers by conducting -regex searches for keywords that appear in the term name, -or the names of its descendants or ancestors.} - -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{keep_tiers}{Tiers from \link[HPOExplorer]{hpo_tiers} to keep. -Include \code{NA} if you wish to retain phenotypes that -do not have any Tier assignment.} - -\item{verbose}{Print messages.} -} -\value{ -phenos data.table with extra column -} -\description{ -Add severity Tier for each HPO ID, in accordance with the -rating system provided by -\href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4262393/}{ -Lazarin et al (2014)}. -In order of increasing severity: -\itemize{ -\item{Tier 4 }{Reduced fertility} -\item{Tier 3 }{Sensory impairment: vision, -Immunodeficiency/cancer, -Sensory impairment: hearing, -Sensory impairment: touch, other (including pain), -Mental illness, -Dysmorphic features} -\item{Tier 2 }{Shortened life span: premature adulthood, -Impaired mobility, -Internal physical malformation} -\item{Tier 1 }{Shortened life span: infancy, -Shortened life span: childhood/adolescence, -Intellectual disability} -} -} -\examples{ -phenos <- example_phenos() -phenos2 <- add_tier(phenos = phenos) -} diff --git a/man/adjacency_matrix.Rd b/man/adjacency_matrix.Rd deleted file mode 100644 index 5533342..0000000 --- a/man/adjacency_matrix.Rd +++ /dev/null @@ -1,35 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/adjacency_matrix.R -\name{adjacency_matrix} -\alias{adjacency_matrix} -\title{Adjacency matrix} -\usage{ -adjacency_matrix( - hpo = get_hpo(), - terms = unique(hpo$id), - remove_terms = grep(":", terms, invert = TRUE, value = TRUE), - method = "HPOExplorer", - verbose = TRUE -) -} -\arguments{ -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{terms}{Character vector of term IDs to include.} - -\item{remove_terms}{Character vector of term IDs to exclude.} - -\item{method}{Method to use to create the adjacency matrix.} - -\item{verbose}{Print messages.} -} -\value{ -adjacency matrix -} -\description{ -Create adjacency matrix of HPO child-parent relationships. -} -\examples{ -terms <- get_hpo()$id[seq(1000)] -adjacency <- adjacency_matrix(terms = terms) -} diff --git a/man/assign_tiers.Rd b/man/assign_tiers.Rd deleted file mode 100644 index 6881458..0000000 --- a/man/assign_tiers.Rd +++ /dev/null @@ -1,77 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/assign_tiers.R -\name{assign_tiers} -\alias{assign_tiers} -\title{Add severity Tiers (auto)} -\usage{ -assign_tiers( - hpo = get_hpo(), - terms = hpo$id, - keyword_sets = list(Tier1 = c("intellectual disability", "death"), Tier2 = - c("impaired mobility", "malform"), Tier3 = c("sight", "vision", "immunodeficien", - "cancer", "hearing", "touch", "pain ", " pain", "mental illness", "dysmorphic"), - Tier4 = c("fertility")), - check_names = TRUE, - check_definitions = TRUE, - check_diseases = FALSE, - search_ancestors = TRUE, - search_descendants = TRUE, - as_datatable = FALSE, - verbose = TRUE -) -} -\arguments{ -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{terms}{A subset of HPO IDs to assign Tiers to.} - -\item{keyword_sets}{A named list of regex queries to be used when searching -for phenotypes that have matching metadata.} - -\item{check_names}{When regexsearching for matching terms, -check the phenotype names.} - -\item{check_definitions}{When regex searching for matching terms, -check the phenotype definition.} - -\item{check_diseases}{When regex searching for matching terms, -check the names of diseases associated with the phenotype.} - -\item{search_ancestors}{Inherit Tiers of ancestors.} - -\item{search_descendants}{Inherit Tiers of descendants.} - -\item{as_datatable}{Return the results as a \link[data.table]{data.table}.} - -\item{verbose}{Print messages.} -} -\value{ -Tier assignments for each term in \code{terms}. -Will be returned as either a named vector or a \link[data.table]{data.table}. -} -\description{ -Automatically add severity Tier for each HPO ID, in accordance with the -rating system provided by -\href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4262393/}{ -Lazarin et al (2014)}. -In order of increasing severity: -\itemize{ -\item{Tier 4 }{Reduced fertility} -\item{Tier 3 }{Sensory impairment: vision, -Immunodeficiency/cancer, -Sensory impairment: hearing, -Sensory impairment: touch, other (including pain), -Mental illness, -Dysmorphic features} -\item{Tier 2 }{Shortened life span: premature adulthood, -Impaired mobility, -Internal physical malformation} -\item{Tier 1 }{Shortened life span: infancy, -Shortened life span: childhood/adolescence, -Intellectual disability} -} -} -\examples{ -terms <- get_hpo()$id[seq(10)] -tiers <- assign_tiers(terms = terms) -} diff --git a/man/clear_cache.Rd b/man/clear_cache.Rd deleted file mode 100644 index d053c03..0000000 --- a/man/clear_cache.Rd +++ /dev/null @@ -1,42 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/clear_cache.R -\name{clear_cache} -\alias{clear_cache} -\title{Clear cache} -\usage{ -clear_cache( - save_dir = tools::R_user_dir(package = "HPOExplorer", which = "cache"), - recursive = TRUE, - verbose = TRUE, - ... -) -} -\arguments{ -\item{save_dir}{Path to cache directory.} - -\item{recursive}{logical. Should directories be deleted recursively?} - -\item{verbose}{Print messages.} - -\item{...}{ - Arguments passed on to \code{\link[base:unlink]{base::unlink}} - \describe{ - \item{\code{x}}{a character vector with the names of the file(s) or - directories to be deleted.} - \item{\code{force}}{logical. Should permissions be changed (if possible) to - allow the file or directory to be removed?} - \item{\code{expand}}{logical. Should wildcards (see \sQuote{Details} below) and - tilde (see \code{\link[base]{path.expand}}) be expanded?} - }} -} -\value{ -Null. -} -\description{ -Remove all data cached by \pkg{HPOExplorer}. -} -\examples{ -\dontrun{ -clear_cache() -} -} diff --git a/man/convert_ontology.Rd b/man/convert_ontology.Rd deleted file mode 100644 index 0619099..0000000 --- a/man/convert_ontology.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convert_ontology.R -\name{convert_ontology} -\alias{convert_ontology} -\title{Convert ontology} -\usage{ -convert_ontology( - ont = get_hpo(), - terms = unique(ont$id), - remove_terms = grep(":", terms, invert = TRUE, value = TRUE), - to = c("adjacency", "descendancy", "dist", "dist_hclust", "dist_hclust_dendrogram", - "clusters", "igraph", "igraph_dist", "igraph_dist_hclust", - "igraph_dist_hclust_dendrogram", "tidygraph", "data.frame", "data.table"), - as_sparse = FALSE -) -} -\arguments{ -\item{ont}{An \link[ontologyIndex]{ontology_index} object.} - -\item{terms}{Character vector of term IDs to include.} - -\item{remove_terms}{Character vector of term IDs to exclude.} - -\item{to}{A character string specifying the format to convert to.} - -\item{as_sparse}{If TRUE, return a sparse matrix where possible.} -} -\value{ -An object of the specified format. -} -\description{ -Convert an \link[ontologyIndex]{ontology_index} to -a number of other useful formats. -} -\examples{ -ont <- get_hpo() -terms <- head(ont$id,100) -obj <- convert_ontology(ont=ont, terms=terms, to="igraph_dist_hclust") -} diff --git a/man/disease_id_to_omop.Rd b/man/disease_id_to_omop.Rd index 9cb7006..2a7bd6b 100644 --- a/man/disease_id_to_omop.Rd +++ b/man/disease_id_to_omop.Rd @@ -10,7 +10,7 @@ An object of class \code{data.table} (inherits from \code{data.frame}) with 5354 \source{ \code{ annot <- load_phenotype_to_genes(3) - id_col <- "disease_id" + input_col <- "disease_id" ## NOTE: must keep batch_size=1 as the OARD API returns results only for ## the IDs it can map. This leads to a mismatch between the input and output ## which is exacerbated that the concept_id is automatically converted to @@ -21,8 +21,8 @@ An object of class \code{data.table} (inherits from \code{data.frame}) with 5354 data.table::setnames(disease_id_to_omop, toupper(gsub("concept_","OMOP_",names(disease_id_to_omop))) ) - data.table::setnames(disease_id_to_omop,"QUERY",id_col) - disease_id_to_omop <- disease_id_to_omop[,c(id_col,"OMOP_ID","OMOP_NAME"), + data.table::setnames(disease_id_to_omop,"QUERY",input_col) + disease_id_to_omop <- disease_id_to_omop[,c(input_col,"OMOP_ID","OMOP_NAME"), with=FALSE] usethis::use_data(disease_id_to_omop, overwrite = TRUE) } diff --git a/man/example_phenos.Rd b/man/example_phenos.Rd deleted file mode 100644 index 602e240..0000000 --- a/man/example_phenos.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/example_phenos.R -\name{example_phenos} -\alias{example_phenos} -\title{Example phenotypes dataframe} -\usage{ -example_phenos(i = seq(10)) -} -\arguments{ -\item{i}{Indices of HPO IDs to use.} -} -\value{ -phenotype data.table -} -\description{ -Create a minimal example of a phenos dataframe. -} -\examples{ -phenos <- example_phenos() -} diff --git a/man/filter_.Rd b/man/filter_.Rd new file mode 100644 index 0000000..48c1d92 --- /dev/null +++ b/man/filter_.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/_docs.R, R/filter_descendants.R +\name{filter_} +\alias{filter_} +\alias{filter_descendants} +\title{Filter functions} +\usage{ +filter_descendants(phenos, ancestor = NULL, hpo = get_hpo()) +} +\arguments{ +\item{phenos}{A data.table containing HPO IDs and other metadata.} + +\item{ancestor}{Phenotype names (or HPO ID) of an ancestor in the +\href{https://hpo.jax.org/}{Human Phenotype Ontology}. +Only phenotypes that are descendants of this ancestor will be kept. +Set to \code{NULL} (default) to skip this filtering step.} + +\item{hpo}{Human Phenotype Ontology object, +loaded from \link[KGExplorer]{get_ontology}.} +} +\value{ +Filtered data. + +data.table of phenotypes, with additional columns: +"ancestor", "ancestor_id" +} +\description{ +Functions to filter data.table objects. +} +\section{Functions}{ +\itemize{ +\item \code{filter_descendants()}: filter_ +Filter descendants + +Subset a \code{phenos} data.table to only +descendants of an ancestor HPO ID term. + +}} +\examples{ +phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") +phenos2 <- filter_descendants(phenos = phenos, + ancestor = "Motor delay") +} +\concept{filter_} diff --git a/man/fix_hpo_ids.Rd b/man/fix_hpo_ids.Rd index a3d6560..cd903f4 100644 --- a/man/fix_hpo_ids.Rd +++ b/man/fix_hpo_ids.Rd @@ -4,15 +4,11 @@ \alias{fix_hpo_ids} \title{Fix missing HPO IDs} \usage{ -fix_hpo_ids(dt, phenotype_to_genes = load_phenotype_to_genes()) +fix_hpo_ids(dat, phenotype_to_genes = load_phenotype_to_genes()) } \arguments{ -\item{dt}{\link[data.table]{data.table} +\item{dat}{\link[data.table]{data.table} containing a column named "hpo_name".} - -\item{phenotype_to_genes}{Output of -\link[HPOExplorer]{load_phenotype_to_genes} mapping phenotypes -to gene annotations.} } \value{ data.table with the column "hpo_id" diff --git a/man/get_.Rd b/man/get_.Rd new file mode 100644 index 0000000..2dfbbdd --- /dev/null +++ b/man/get_.Rd @@ -0,0 +1,95 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/_docs.R, R/get_hpo.R, R/get_hpo_id_direct.R +\name{get_} +\alias{get_} +\alias{get_hpo} +\alias{get_hpo_id_direct} +\title{Get functions} +\usage{ +get_hpo(lvl, force_new = FALSE, ...) + +get_hpo_id_direct(term, hpo = get_hpo()) +} +\arguments{ +\item{lvl}{How many levels deep into the ontology to get ancestors from. +For example: +\itemize{ +\item{1: }{"All"} +\item{2: }{"Phenotypic abnormality"} +\item{3: }{"Abnormality of the nervous system"} +\item{4: }{"Abnormality of nervous system physiology"} +\item{5: }{"Neurodevelopmental abnormality" or "Behavioral abnormality"} +}} + +\item{force_new}{Force a new download or creation of a data resource.} + +\item{...}{ + Arguments passed on to \code{\link[KGExplorer:get_]{KGExplorer::get_ontology}} + \describe{ + \item{\code{save_dir}}{Directory to save a file to.} + \item{\code{name}}{\itemize{ +\item{"mondo"}{ +Import the \href{https://mondo.monarchinitiative.org/}{Mondo} ontology. +\href{https://github.com/monarch-initiative/mondo/release}{ +GitHub.}}. +\item{"hpo"}{ +Import the \href{https://hpo.jax.org/app/}{Human Phenotype Ontology}. +\href{https://github.com/obophenotype/human-phenotype-ontology/release}{ +GitHub.}} +}} + \item{\code{terms}}{Term IDs to include. Can alternatively be an integer, +which will be used to randomly sample N terms from the data.} + \item{\code{filetype}}{File type to search for.} + \item{\code{method}}{Compute ontology levels using: +\itemize{ + \item{"height" (default)} \link[simona]{dag_height}. + \item{"depth"} \link[simona]{dag_depth}. +}} + \item{\code{add_metadata}}{Add metadata to the resulting ontology object.} + \item{\code{add_ancestors}}{Add ancestors for each term.} + \item{\code{add_n_edges}}{Add the number of edges (connections) for each term.} + \item{\code{add_ontology_levels}}{Add the ontology level for each term.} + }} + +\item{term}{One or more ontology IDs.} + +\item{hpo}{Human Phenotype Ontology object, +loaded from \link[KGExplorer]{get_ontology}.} +} +\value{ +Data. + +\link[simona]{ontology_DAG} object. + +The HPO ID(s) of phenotype(s) +} +\description{ +Functions to get data objects and extract elements. +} +\section{Functions}{ +\itemize{ +\item \code{get_hpo()}: get_ +Get Human Phenotype Ontology (HPO) + +Updated version of Human Phenotype Ontology (HPO). +Created from the OBO files distributed by the HPO project's +\href{https://github.com/obophenotype/human-phenotype-ontology}{GitHub}. + +By comparison, the \code{hpo} data from \pkg{ontologyIndex} is from 2016. +Note that the maximum ontology level depth in the 2016 version was 14, +whereas in the 2023 version the maximum ontology level depth is 16 +(due to an expansion of the HPO). + +\item \code{get_hpo_id_direct()}: get_ +Get HPO term ID direct + +Directly retrieves it from the HPO +ontology object. + +}} +\examples{ +hpo <- get_hpo() +term = "Phenotypic abnormality" +pheno_abnormality_id <- get_hpo_id_direct(term = term) +} +\concept{get_} diff --git a/man/get_data.Rd b/man/get_data.Rd index 2b911cf..708f5b2 100644 --- a/man/get_data.Rd +++ b/man/get_data.Rd @@ -8,7 +8,7 @@ get_data( file, tag = "latest", repo = "neurogenomics/HPOExplorer", - save_dir = tools::R_user_dir(package = "HPOExplorer", which = "cache"), + save_dir = KGExplorer::cache_dir(package = "HPOExplorer"), add_version = FALSE, overwrite = TRUE ) diff --git a/man/get_gencc.Rd b/man/get_gencc.Rd deleted file mode 100644 index 4e7c155..0000000 --- a/man/get_gencc.Rd +++ /dev/null @@ -1,47 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_gencc.R -\name{get_gencc} -\alias{get_gencc} -\title{Get GenCC} -\usage{ -get_gencc( - agg_by = c("disease_id", "gene_symbol"), - dict = c(Definitive = 6, Strong = 5, Moderate = 4, Supportive = 3, Limited = 2, - `Disputed Evidence` = 1, `Refuted Evidence` = 0, `No Known Disease Relationship` = 0), - save_dir = tools::R_user_dir("HPOExplorer", which = "cache"), - verbose = TRUE -) -} -\arguments{ -\item{agg_by}{Column to aggregate metadata by.} - -\item{dict}{A named vector of evidence score mappings. -See \href{https://thegencc.org/faq.html#validity-termsdelphi-survey}{here} -for more information.} - -\item{save_dir}{Directory to save data to.} - -\item{verbose}{Print messages.} -} -\value{ -\link[data.table]{data.table} with columns: -\itemize{ -\item{"disease_id": }{Disease ID.} -\item{"gene_symbol": }{Gene symbol.} -\item{"evidence_score": }{Evidence score.} -} -} -\description{ -Get phenotype-gene evidence score from the -\href{https://thegencc.org/}{Gene Curation Coalition}. -Data downloaded from \href{https://search.thegencc.org/download}{here}.\cr -\emph{NOTE:} Due to licensing restrictions, a GenCC download does not -include OMIM data. OMIM data can be accessed and downloaded through -\href{https://omim.org/downloads/}{OMIM}.\cr -\emph{NOTE:} GenCC does not currently have any systematic versioning. -There for the \code{attr(obj,"version")} attribute is set to the date it was -downloaded and cached by \link[HPOExplorer]{get_gencc}. -} -\examples{ -d <- get_gencc() -} diff --git a/man/get_gene_lists.Rd b/man/get_gene_lists.Rd index f30996c..aad0a20 100644 --- a/man/get_gene_lists.Rd +++ b/man/get_gene_lists.Rd @@ -17,10 +17,11 @@ get_gene_lists( (e.g. "Phenotypic abnormality") \.} \item{phenotype_to_genes}{Output of -\link[HPOExplorer]{load_phenotype_to_genes} mapping phenotypes +\link{load_phenotype_to_genes} mapping phenotypes to gene annotations.} -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} +\item{hpo}{Human Phenotype Ontology object, +loaded from \link[KGExplorer]{get_ontology}.} \item{as_list}{Return as a named list instead of a data.table.} diff --git a/man/get_hpo.Rd b/man/get_hpo.Rd deleted file mode 100644 index 7985c1f..0000000 --- a/man/get_hpo.Rd +++ /dev/null @@ -1,43 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_hpo.R -\name{get_hpo} -\alias{get_hpo} -\title{Get Human Phenotype Ontology (HPO)} -\source{ -\href{https://bioportal.bioontology.org/ontologies/HP}{BioPortal} -\code{ -hpo <- HPOExplorer:::make_ontology(upload_tag="latest") -} -} -\usage{ -get_hpo( - save_dir = tools::R_user_dir("HPOExplorer", which = "cache"), - tag = "latest", - overwrite = TRUE -) -} -\arguments{ -\item{save_dir}{Directory to save data to.} - -\item{tag}{tag for the GitHub release to which this data should be attached.} - -\item{overwrite}{Should any local files of the same name be overwritten? -default \code{TRUE}.} -} -\value{ -\link[ontologyIndex]{ontology_index} object. -} -\description{ -Updated version of Human Phenotype Ontology (HPO) from 2023-10-09. -Created from the OBO files distributed by the HPO project's -\href{https://github.com/obophenotype/human-phenotype-ontology}{GitHub}. -} -\details{ -By comparison, the \code{hpo} data from \pkg{ontologyIndex} is from 2016. -Note that the maximum ontology level depth in the 2016 version was 14, -whereas in the 2023 version the maximum ontology level depth is 16 -(due to an expansion of the HPO). -} -\examples{ -hpo <- get_hpo() -} diff --git a/man/get_hpo_id_direct.Rd b/man/get_hpo_id_direct.Rd deleted file mode 100644 index 2d5dd15..0000000 --- a/man/get_hpo_id_direct.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_hpo_id_direct.R -\name{get_hpo_id_direct} -\alias{get_hpo_id_direct} -\title{Get HPO term ID direct} -\usage{ -get_hpo_id_direct(phenotype, hpo = get_hpo()) -} -\arguments{ -\item{phenotype}{One or more phenotype names in to get HPO IDs for.} - -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} -} -\value{ -The HPO ID(s) of phenotype(s) -} -\description{ -Directly retrieves it from the HPO -ontology object (found in the \pkg{ontologyIndex} package \code{data(hpo)}). -} -\examples{ -phenotype = "Phenotypic abnormality" -pheno_abnormality_id <- get_hpo_id_direct(phenotype = phenotype) -} diff --git a/man/get_kg_monarch.Rd b/man/get_kg_monarch.Rd deleted file mode 100644 index 1a10159..0000000 --- a/man/get_kg_monarch.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_kg_monarch.R -\name{get_kg_monarch} -\alias{get_kg_monarch} -\title{Get knowledge graph: Monarch} -\source{ -\href{https://pubmed.ncbi.nlm.nih.gov/37707514/}{BioThings Explorer} -} -\usage{ -get_kg_monarch() -} -\description{ -Option 1: Use the \href{https://api.monarchinitiative.org/api/}{biolink API} -to efficiently extract specific subset of data -from the Monarch server. -Option 2: Import the entire knowledge graph from the -\href{https://data.monarchinitiative.org/monarch-kg/latest/}{Monarch server}. -} diff --git a/man/get_monarch.Rd b/man/get_monarch.Rd deleted file mode 100644 index 014f18c..0000000 --- a/man/get_monarch.Rd +++ /dev/null @@ -1,85 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_monarch.R -\name{get_monarch} -\alias{get_monarch} -\title{Get Monarch} -\usage{ -get_monarch( - file = c("gene_to_gene", "phenotype_to_gene", "disease_to_model", - "phenotype_to_phenotype"), - file_map = list(gene_to_gene = paste0("monarch-kg-dev/latest/tsv/all_associations/", - "gene_to_gene_homology_association.all.tsv.gz"), phenotype_to_gene = - paste0("monarch-kg-dev/latest/tsv/all_associations/", - "gene_to_phenotypic_feature_association.all.tsv.gz"), disease_to_model = - paste0("latest/tsv/model_associations/", "model_disease.all.tsv.gz"), - phenotype_to_phenotype = paste0("upheno2/current/upheno-release/all/", - "upheno_mapping_all.csv")), - domain = "https://data.monarchinitiative.org", - ... -) -} -\arguments{ -\item{file}{Short name of the file to retrieve. -\itemize{ -\item{"gene_to_gene"}{Gene-to-gene mappings across species orthologous.} -\item{"phenotype_to_gene"}{Phenotype-to-gene mappings for multiple species.} -\item{"disease_to_model"}{Disease-to-model mappings for multiple -model species.} -\item{"phenotype_to_phenotype"}{Phenotype-to-phenotype mappings across -species homologs.} -}} - -\item{file_map}{Mapping between short name and full name of each file.} - -\item{domain}{Monarch Initiative server domain.} - -\item{...}{ - Arguments passed on to \code{\link[data.table:fread]{data.table::fread}} - \describe{ - \item{\code{input}}{ A single character string. The value is inspected and deferred to either \code{file=} (if no \\n present), \code{text=} (if at least one \\n is present) or \code{cmd=} (if no \\n is present, at least one space is present, and it isn't a file name). Exactly one of \code{input=}, \code{file=}, \code{text=}, or \code{cmd=} should be used in the same call. } - \item{\code{text}}{ The input data itself as a character vector of one or more lines, for example as returned by \code{readLines()}. } - \item{\code{cmd}}{ A shell command that pre-processes the file; e.g. \code{fread(cmd=paste("grep",word,"filename"))}. See Details. } - \item{\code{sep}}{ The separator between columns. Defaults to the character in the set \code{[,\\t |;:]} that separates the sample of rows into the most number of lines with the same number of fields. Use \code{NULL} or \code{""} to specify no separator; i.e. each line a single character column like \code{base::readLines} does.} - \item{\code{sep2}}{ The separator \emph{within} columns. A \code{list} column will be returned where each cell is a vector of values. This is much faster using less working memory than \code{strsplit} afterwards or similar techniques. For each column \code{sep2} can be different and is the first character in the same set above [\code{,\\t |;}], other than \code{sep}, that exists inside each field outside quoted regions in the sample. NB: \code{sep2} is not yet implemented. } - \item{\code{nrows}}{ The maximum number of rows to read. Unlike \code{read.table}, you do not need to set this to an estimate of the number of rows in the file for better speed because that is already automatically determined by \code{fread} almost instantly using the large sample of lines. \code{nrows=0} returns the column names and typed empty columns determined by the large sample; useful for a dry run of a large file or to quickly check format consistency of a set of files before starting to read any of them. } - \item{\code{header}}{ Does the first data line contain column names? Defaults according to whether every non-empty field on the first data line is type character. If so, or TRUE is supplied, any empty column names are given a default name. } - \item{\code{na.strings}}{ A character vector of strings which are to be interpreted as \code{NA} values. By default, \code{",,"} for columns of all types, including type \code{character} is read as \code{NA} for consistency. \code{,"",} is unambiguous and read as an empty string. To read \code{,NA,} as \code{NA}, set \code{na.strings="NA"}. To read \code{,,} as blank string \code{""}, set \code{na.strings=NULL}. When they occur in the file, the strings in \code{na.strings} should not appear quoted since that is how the string literal \code{,"NA",} is distinguished from \code{,NA,}, for example, when \code{na.strings="NA"}. } - \item{\code{stringsAsFactors}}{ Convert all character columns to factors? } - \item{\code{verbose}}{ Be chatty and report timings? } - \item{\code{skip}}{ If 0 (default) start on the first line and from there finds the first row with a consistent number of columns. This automatically avoids irregular header information before the column names row. \code{skip>0} means ignore the first \code{skip} rows manually. \code{skip="string"} searches for \code{"string"} in the file (e.g. a substring of the column names row) and starts on that line (inspired by read.xls in package gdata). } - \item{\code{select}}{ A vector of column names or numbers to keep, drop the rest. \code{select} may specify types too in the same way as \code{colClasses}; i.e., a vector of \code{colname=type} pairs, or a \code{list} of \code{type=col(s)} pairs. In all forms of \code{select}, the order that the columns are specified determines the order of the columns in the result. } - \item{\code{drop}}{ Vector of column names or numbers to drop, keep the rest. } - \item{\code{colClasses}}{ As in \code{\link[utils:read.table]{utils::read.csv}}; i.e., an unnamed vector of types corresponding to the columns in the file, or a named vector specifying types for a subset of the columns by name. The default, \code{NULL} means types are inferred from the data in the file. Further, \code{data.table} supports a named \code{list} of vectors of column names \emph{or numbers} where the \code{list} names are the class names; see examples. The \code{list} form makes it easier to set a batch of columns to be a particular class. When column numbers are used in the \code{list} form, they refer to the column number in the file not the column number after \code{select} or \code{drop} has been applied. - If type coercion results in an error, introduces \code{NA}s, or would result in loss of accuracy, the coercion attempt is aborted for that column with warning and the column's type is left unchanged. If you really desire data loss (e.g. reading \code{3.14} as \code{integer}) you have to truncate such columns afterwards yourself explicitly so that this is clear to future readers of your code. - } - \item{\code{integer64}}{ "integer64" (default) reads columns detected as containing integers larger than 2^31 as type \code{bit64::integer64}. Alternatively, \code{"double"|"numeric"} reads as \code{utils::read.csv} does; i.e., possibly with loss of precision and if so silently. Or, "character". } - \item{\code{dec}}{ The decimal separator as in \code{utils::read.csv}. If not "." (default) then usually ",". See details. } - \item{\code{col.names}}{ A vector of optional names for the variables (columns). The default is to use the header column if present or detected, or if not "V" followed by the column number. This is applied after \code{check.names} and before \code{key} and \code{index}. } - \item{\code{check.names}}{default is \code{FALSE}. If \code{TRUE} then the names of the variables in the \code{data.table} are checked to ensure that they are syntactically valid variable names. If necessary they are adjusted (by \code{\link{make.names}}) so that they are, and also to ensure that there are no duplicates.} - \item{\code{encoding}}{ default is \code{"unknown"}. Other possible options are \code{"UTF-8"} and \code{"Latin-1"}. Note: it is not used to re-encode the input, rather enables handling of encoded strings in their native encoding. } - \item{\code{quote}}{ By default (\code{"\\""}), if a field starts with a double quote, \code{fread} handles embedded quotes robustly as explained under \code{Details}. If it fails, then another attempt is made to read the field \emph{as is}, i.e., as if quotes are disabled. By setting \code{quote=""}, the field is always read as if quotes are disabled. It is not expected to ever need to pass anything other than \\"\\" to quote; i.e., to turn it off. } - \item{\code{strip.white}}{ default is \code{TRUE}. Strips leading and trailing whitespaces of unquoted fields. If \code{FALSE}, only header trailing spaces are removed. } - \item{\code{fill}}{logical (default is \code{FALSE}). If \code{TRUE} then in case the rows have unequal length, blank fields are implicitly filled.} - \item{\code{blank.lines.skip}}{\code{logical}, default is \code{FALSE}. If \code{TRUE} blank lines in the input are ignored.} - \item{\code{key}}{Character vector of one or more column names which is passed to \code{\link[data.table]{setkey}}. It may be a single comma separated string such as \code{key="x,y,z"}, or a vector of names such as \code{key=c("x","y","z")}. Only valid when argument \code{data.table=TRUE}. Where applicable, this should refer to column names given in \code{col.names}. } - \item{\code{index}}{ Character vector or list of character vectors of one or more column names which is passed to \code{\link[data.table]{setindexv}}. As with \code{key}, comma-separated notation like \code{index="x,y,z"} is accepted for convenience. Only valid when argument \code{data.table=TRUE}. Where applicable, this should refer to column names given in \code{col.names}. } - \item{\code{showProgress}}{ \code{TRUE} displays progress on the console if the ETA is greater than 3 seconds. It is produced in fread's C code where the very nice (but R level) txtProgressBar and tkProgressBar are not easily available. } - \item{\code{data.table}}{ TRUE returns a \code{data.table}. FALSE returns a \code{data.frame}. The default for this argument can be changed with \code{options(datatable.fread.datatable=FALSE)}.} - \item{\code{nThread}}{The number of threads to use. Experiment to see what works best for your data on your hardware.} - \item{\code{logical01}}{If TRUE a column containing only 0s and 1s will be read as logical, otherwise as integer.} - \item{\code{keepLeadingZeros}}{If TRUE a column containing numeric data with leading zeros will be read as character, otherwise leading zeros will be removed and converted to numeric.} - \item{\code{yaml}}{ If \code{TRUE}, \code{fread} will attempt to parse (using \code{\link[yaml]{yaml.load}}) the top of the input as YAML, and further to glean parameters relevant to improving the performance of \code{fread} on the data itself. The entire YAML section is returned as parsed into a \code{list} in the \code{yaml_metadata} attribute. See \code{Details}. } - \item{\code{autostart}}{ Deprecated and ignored with warning. Please use \code{skip} instead. } - \item{\code{tmpdir}}{ Directory to use as the \code{tmpdir} argument for any \code{tempfile} calls, e.g. when the input is a URL or a shell command. The default is \code{tempdir()} which can be controlled by setting \code{TMPDIR} before starting the R session; see \code{\link[base:tempfile]{base::tempdir}}. } - \item{\code{tz}}{ Relevant to datetime values which have no Z or UTC-offset at the end, i.e. \emph{unmarked} datetime, as written by \code{\link[utils:write.table]{utils::write.csv}}. The default \code{tz="UTC"} reads unmarked datetime as UTC POSIXct efficiently. \code{tz=""} reads unmarked datetime as type character (slowly) so that \code{as.POSIXct} can interpret (slowly) the character datetimes in local timezone; e.g. by using \code{"POSIXct"} in \code{colClasses=}. Note that \code{fwrite()} by default writes datetime in UTC including the final Z and therefore \code{fwrite}'s output will be read by \code{fread} consistently and quickly without needing to use \code{tz=} or \code{colClasses=}. If the \code{TZ} environment variable is set to \code{"UTC"} (or \code{""} on non-Windows where unset vs `""` is significant) then the R session's timezone is already UTC and \code{tz=""} will result in unmarked datetimes being read as UTC POSIXct. For more information, please see the news items from v1.13.0 and v1.14.0. } - }} -} -\value{ -\link[data.table]{data.table} -} -\description{ -Get key datasets from the Monarch Initiative server. -} -\examples{ -dat <- get_monarch() -} diff --git a/man/get_mondo.Rd b/man/get_mondo.Rd deleted file mode 100644 index 75809be..0000000 --- a/man/get_mondo.Rd +++ /dev/null @@ -1,36 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_mondo.R -\name{get_mondo} -\alias{get_mondo} -\title{Get Mondo Disease Ontology} -\source{ -\code{ -mondo <- HPOExplorer:::make_ontology(file="mondo-base.obo", - repo="monarch-initiative/mondo", - upload_tag="latest") -} -} -\usage{ -get_mondo( - save_dir = tools::R_user_dir("HPOExplorer", which = "cache"), - tag = "latest", - overwrite = TRUE -) -} -\arguments{ -\item{save_dir}{Directory to save data to.} - -\item{tag}{tag for the GitHub release to which this data should be attached.} - -\item{overwrite}{Should any local files of the same name be overwritten? -default \code{TRUE}.} -} -\value{ -\link[ontologyIndex]{ontology_index} -} -\description{ -Import \href{https://mondo.monarchinitiative.org/}{Mondo} as an R object. -} -\examples{ -mondo <- get_mondo() -} diff --git a/man/get_ont_lvl.Rd b/man/get_ont_lvl.Rd deleted file mode 100644 index e36d1cd..0000000 --- a/man/get_ont_lvl.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_ont_lvl.R -\name{get_ont_lvl} -\alias{get_ont_lvl} -\title{Get HPO term level} -\source{ -\code{ -lvl_absolute <- get_ont_lvl(term = "HP:0000001", absolute=TRUE) -lvl_relative <- get_ont_lvl(term = "HP:0000001", absolute=FALSE) -} -} -\usage{ -get_ont_lvl( - term, - hpo = get_hpo(), - adjacency = NULL, - absolute = TRUE, - verbose = FALSE -) -} -\arguments{ -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{adjacency}{An adjacency matrix generated -by \link[HPOExplorer]{adjacency_matrix}.} - -\item{verbose}{Print messages.} -} -\value{ -Ontology level of HPO ID. -} -\description{ -In this function, ontology level refers to the number of generations of -sub-phenotypes a term has below it in the HPO DAG. For example, the root of -the HPO is term "HP:0000001", the longest path to a term with no child terms -is 14. In other words there are 14 generations of "is-a" relationships below -HP:0000001 and it is therefore at ontology level 14. A term with no -sub-phenotypes below it (a leaf node), is at ontology level 0. -} -\keyword{internal} diff --git a/man/get_ont_lvls.Rd b/man/get_ont_lvls.Rd deleted file mode 100644 index 5bc6a29..0000000 --- a/man/get_ont_lvls.Rd +++ /dev/null @@ -1,61 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_ont_lvls.R -\name{get_ont_lvls} -\alias{get_ont_lvls} -\title{Get ontology level for HPO terms} -\usage{ -get_ont_lvls( - terms, - hpo = get_hpo(), - adjacency = NULL, - absolute = TRUE, - exclude_top_lvl = TRUE, - reverse = TRUE, - verbose = TRUE -) -} -\arguments{ -\item{terms}{Character vector of term IDs to include.} - -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{adjacency}{An adjacency matrix generated -by \link[HPOExplorer]{adjacency_matrix}.} - -\item{absolute}{Make the levels absolute in the sense that they consider -the entire HPO ontology (\code{TRUE}). -Otherwise, levels will be relative to only the HPO terms that are in -the provided subset of \code{terms} AND are directly adjacent (connected) -to a given cluster of terms (\code{FALSE}).} - -\item{exclude_top_lvl}{Exclude the top level term of the HPO -(i.e. "All" (HP:0000001)) when computing \emph{absolute} levels. -This argument is not used when computing \emph{relative} levels.} - -\item{reverse}{If \code{TRUE}, ontology -level numbers with be revered such that the level of the parent terms -are larger than the child terms.} - -\item{verbose}{Print messages.} -} -\value{ -A named vector of relative ontology level, -where names are HPO Ids and -value is relative ontology level. -} -\description{ -For a given set of HPO terms, get their level -within the hierarchically organised HPO ontology. -Ontology level can be computed either absolute mode (\code{absolute=TRUE}) -where the entire ontology is considered when assigning levels, or -relative mode (\code{absolute=FALSE}) where only a subset of the ontology -that is connected to a given term is considered when assigning levels. -Relative mode can be helpful when trying to make plot where nodes are -scaled to the ontology level. -} -\examples{ -terms <- ontologyIndex::get_descendants(ontology = get_hpo(), - roots = "HP:0000002") -lvls <- get_ont_lvls(terms = terms) -lvls_abs <- get_ont_lvls(terms = terms, absolute=TRUE) -} diff --git a/man/get_ont_lvls2.Rd b/man/get_ont_lvls2.Rd deleted file mode 100644 index a530992..0000000 --- a/man/get_ont_lvls2.Rd +++ /dev/null @@ -1,50 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_ont_lvls2.R -\name{get_ont_lvls2} -\alias{get_ont_lvls2} -\title{Get relative ontology level for multiple HPO terms} -\usage{ -get_ont_lvls2( - terms, - hpo = get_hpo(), - adjacency = adjacency_matrix(terms = terms, hpo = hpo), - absolute = FALSE, - reverse = TRUE, - verbose = TRUE -) -} -\arguments{ -\item{terms}{Character vector of term IDs to include.} - -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{adjacency}{An adjacency matrix generated -by \link[HPOExplorer]{adjacency_matrix}.} - -\item{absolute}{Make the levels absolute in the sense that they consider -the entire HPO ontology (\code{TRUE}). -Otherwise, levels will be relative to only the subset \code{terms} provided -(\code{FALSE}).} - -\item{reverse}{A boolean, if TRUE it will reverse the ontology -level numbers so that -the parent terms are larger than the child terms.} - -\item{verbose}{Print messages.} -} -\value{ -A named vector of relative ontology level, -where names are HPO Ids and -value is relative ontology level. -} -\description{ -This calls the \code{get_ont_lvl} function on all phenotypes in a -subset of the HPO. The subset chosen when creating the adjacency from the main -adjacency matrix of all phenotypes. So, the phenotypes can be found in the -row and column names of adjacency. -} -\examples{ -terms <- ontologyIndex::get_descendants(ontology = get_hpo(), - roots = "HP:0000002") -lvls <- get_ont_lvls2(terms = terms) -} diff --git a/man/get_term_definition_api.Rd b/man/get_term_definition_api.Rd index 2dc207c..78c2e5d 100644 --- a/man/get_term_definition_api.Rd +++ b/man/get_term_definition_api.Rd @@ -6,9 +6,6 @@ \usage{ get_term_definition_api(term, line_length = FALSE) } -\arguments{ -\item{term}{One or more HPO IDs.} -} \value{ Character vector of definitions. } diff --git a/man/get_upheno.Rd b/man/get_upheno.Rd deleted file mode 100644 index 1386e0c..0000000 --- a/man/get_upheno.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_upheno.R -\name{get_upheno} -\alias{get_upheno} -\title{Get UPHENO} -\usage{ -get_upheno(file = c("ontology", "bestmatches", "upheno_mapping")) -} -\arguments{ -\item{file}{Can be one of the following: -\itemize{ -\item{"ontology"}{Creates an \link[ontologyIndex]{ontologyIndex} R object by -importing the OBO file directly from the official -\href{https://github.com/obophenotype/upheno}{UPHENO GitHub repository}.} -\item{"bestmatches"}{Returns a merged table with the best matches between -human and non-human homologous phenotypes (from multiple species). -Distributed by the official -\href{https://github.com/obophenotype/upheno/tree/master/mappings}{ -UPHENO GitHub repository}.} -\item{"upheno_mapping"}{Return a merged table with matches bteween human -and non-human homologous phenotypes (from multiple species). -Distributed by the -\href{https://data.monarchinitiative.org/upheno2/current/upheno-release/all/index.html}{ -Monarch Initiative server}.} -}} -} -\value{ -\link[ontologyIndex]{ontologyIndex} or -\link[data.table]{data.table}. -} -\description{ -Get data from the \href{https://github.com/obophenotype/upheno}{ -Unified Phenotype Ontology (UPHENO)}. -} -\examples{ -upheno <- get_upheno() -} diff --git a/man/get_version.Rd b/man/get_version.Rd deleted file mode 100644 index 0ed0bb3..0000000 --- a/man/get_version.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_version.R -\name{get_version} -\alias{get_version} -\title{Get version} -\usage{ -get_version(obj, return_version = FALSE, verbose = TRUE) -} -\arguments{ -\item{obj}{An object from -\link[HPOExplorer]{get_hpo} or -\link[HPOExplorer]{load_phenotype_to_genes}.} - -\item{return_version}{Return the version as a character string.} - -\item{verbose}{Print messages.} -} -\value{ -HPO Release version a character string. -} -\description{ -Extract the precise version of the HPO release that the data object -was built from. All HPO Releases can be found at the official -\href{https://github.com/obophenotype/human-phenotype-ontology/releases}{ -HPO GitHub Releases page}. -} -\examples{ -obj <- get_hpo() -get_version(obj=obj) -} diff --git a/man/gpt_annot_plot.Rd b/man/gpt_annot_plot.Rd index fb192b9..8284eb9 100644 --- a/man/gpt_annot_plot.Rd +++ b/man/gpt_annot_plot.Rd @@ -18,13 +18,13 @@ gpt_annot_plot( \item{keep_ont_levels}{Only keep phenotypes at certain \emph{absolute} ontology levels to keep. -See \link[HPOExplorer]{add_ont_lvl} for details.} +See \link{add_ont_lvl} for details.} \item{remove_descendants}{Remove HPO terms that are descendants of a given ancestral HPO term. Ancestral terms be provided as a character vector of phenotype names (e.g. \code{c("Clinical course")}), HPO IDs (e.g. \code{"HP:0031797" }) or a mixture of the two. -See \link[HPOExplorer]{add_ancestor} for details.} +See \link{add_ancestor} for details.} \item{top_n}{Top number of most severe phenotypes to plot in heatmap.} diff --git a/man/harmonise_phenotypes.Rd b/man/harmonise_phenotypes.Rd deleted file mode 100644 index 1be50b3..0000000 --- a/man/harmonise_phenotypes.Rd +++ /dev/null @@ -1,54 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/harmonise_phenotypes.R -\name{harmonise_phenotypes} -\alias{harmonise_phenotypes} -\title{Harmonise phenotypes} -\usage{ -harmonise_phenotypes( - phenotypes, - hpo = get_hpo(), - as_hpo_ids = FALSE, - validate = TRUE, - ignore_case = TRUE, - keep_order = TRUE, - invert = FALSE, - verbose = TRUE -) -} -\arguments{ -\item{phenotypes}{A character vector of phenotype names and/or HPO IDs.} - -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{as_hpo_ids}{Return all \code{phenotypes} as HPO IDs (\code{TRUE}), -or phenotype names (\code{FALSE}).} - -\item{validate}{Remove any phenotype names or IDs that cannot be found in the -\code{hpo}.} - -\item{ignore_case}{Ignore casing when searching for matches.} - -\item{keep_order}{Return a named list of the same length and order -as \code{phenotypes}. -If \code{FALSE}, return a named list of only the unique \code{phenotypes}, -sometimes in a different order.} - -\item{invert}{Invert the keys/values of the dictionary, -such that the key becomes the values (and vice versa).} - -\item{verbose}{Print messages.} -} -\value{ -Character vector -} -\description{ -Harmonise a mixed vector of phenotype names (e.g. "Focal motor seizure") -and HPO IDs (e.g. c("HP:0000002","HP:0000003")). -} -\examples{ -phenotypes <- c("Focal motor seizure","HP:0000002","HP:0000003") -#### As phenotype names #### -pheno_names <- harmonise_phenotypes(phenotypes=phenotypes) -#### As HPO IDs #### -pheno_ids <- harmonise_phenotypes(phenotypes=phenotypes, as_hpo_ids=TRUE) -} diff --git a/man/hpo_deaths.Rd b/man/hpo_deaths.Rd index 8640b07..9d277e7 100644 --- a/man/hpo_deaths.Rd +++ b/man/hpo_deaths.Rd @@ -9,9 +9,7 @@ data.table } \source{ \code{ -terms <- ontologyIndex::get_descendants(ontology = get_hpo(), - roots = "HP:0011420", - exclude_roots = TRUE) +terms <- simona::dag_offspring(get_hpo(), term = "HP:0011420") aod <- lapply(stats::setNames(terms, terms), function(hpo_id){ message("Extracting API data for",hpo_id) @@ -19,8 +17,7 @@ aod <- lapply(stats::setNames(terms, terms), }) |> data.table::rbindlist(fill = TRUE, use.names = TRUE, idcol = "AgeOfDeath") -aod$AgeOfDeath_name <- harmonise_phenotypes(phenotypes = aod$AgeOfDeath, - as_hpo_ids = FALSE) +aod$AgeOfDeath_name <- map_phenotypes(terms = aod$AgeOfDeath) #### Convert AoD to numeric scores #### dict <- HPOExplorer:::hpo_dict(type="AgeOfDeath") aod$AgeOfDeath_score <- dict[aod$AgeOfDeath_name] diff --git a/man/hpo_frequencies.Rd b/man/hpo_frequencies.Rd index 5a938e5..63c545f 100644 --- a/man/hpo_frequencies.Rd +++ b/man/hpo_frequencies.Rd @@ -13,7 +13,7 @@ data.table \code{ annot <- load_phenotype_to_genes("phenotype.hpoa") hpo_frequencies <- HPOExplorer:::parse_pheno_frequency(annot=annot) -hpo_frequencies <- HPOExplorer:::as_ascii(dt=hpo_frequencies) +hpo_frequencies <- HPOExplorer:::as_ascii(dat=hpo_frequencies) data.table::setcolorder(hpo_frequencies,c("disease_id","hpo_id")) usethis::use_data(hpo_frequencies, overwrite = TRUE) } diff --git a/man/hpo_id_to_omop.Rd b/man/hpo_id_to_omop.Rd index 9d9bac0..df25c64 100644 --- a/man/hpo_id_to_omop.Rd +++ b/man/hpo_id_to_omop.Rd @@ -10,14 +10,14 @@ An object of class \code{data.table} (inherits from \code{data.frame}) with 1642 \source{ \code{ hpo <- get_hpo() - ids <- unique(hpo$id) + ids <- unique(hpo@terms) hpo_id_to_omop <- oard_query_api(ids = ids, workers=10) - id_col="hpo_id" + input_col="hpo_id" data.table::setnames(hpo_id_to_omop, toupper(gsub("concept_","OMOP_",names(hpo_id_to_omop))) ) - data.table::setnames(hpo_id_to_omop, "OMOP_CODE",id_col) - hpo_id_to_omop <- hpo_id_to_omop[,c(id_col,"OMOP_ID","OMOP_NAME"), + data.table::setnames(hpo_id_to_omop, "OMOP_CODE",input_col) + hpo_id_to_omop <- hpo_id_to_omop[,c(input_col,"OMOP_ID","OMOP_NAME"), with=FALSE] usethis::use_data(hpo_id_to_omop, overwrite = TRUE) } diff --git a/man/hpo_modifiers.Rd b/man/hpo_modifiers.Rd index 360b4e2..c04862c 100644 --- a/man/hpo_modifiers.Rd +++ b/man/hpo_modifiers.Rd @@ -16,7 +16,7 @@ data.table annot <- HPOExplorer::load_phenotype_to_genes(3) annot <- annot[modifier!=""] parse_mod <- function(x){ - unique(harmonise_phenotypes(strsplit(x,";")[[1]])) + unique(map_phenotypes(strsplit(x,";")[[1]])) } annot <- annot[,modifier_name:=lapply(modifier,parse_mod)][modifier!="",] annot <- annot[,.(modifier_name=unlist(modifier_name)), diff --git a/man/hpo_onsets.Rd b/man/hpo_onsets.Rd index b27cb58..1c1b73f 100644 --- a/man/hpo_onsets.Rd +++ b/man/hpo_onsets.Rd @@ -11,8 +11,7 @@ data.table \code{ annot <- load_phenotype_to_genes(file = "phenotype.hpoa") annot <- annot[onset!="",] -annot$onset_name <- harmonise_phenotypes(phenotypes = annot$onset, - as_hpo_ids = FALSE) +annot$onset_name <- map_phenotypes(terms = annot$onset) counts <- dplyr::group_by(annot, disease_id) |> dplyr::summarise(hpo_ids=length(unique(hpo_id)), onsets=length(unique(onset))) diff --git a/man/hpo_tiers_auto.Rd b/man/hpo_tiers_auto.Rd index ebdd10a..761a19e 100644 --- a/man/hpo_tiers_auto.Rd +++ b/man/hpo_tiers_auto.Rd @@ -9,7 +9,7 @@ data.table } \source{ \code{ -hpo_tiers_auto <- HPOExplorer:::assign_tiers(as_datatable=TRUE) +hpo_tiers_auto <- HPOExplorer:::make_tiers(as_datatable=TRUE) hpo_tiers_auto <- add_onset(phenos = hpo_tiers_auto) #### Filter by onset criterion #### ### Tier 1: Shortened life span: infancy @@ -28,6 +28,6 @@ data("hpo_tiers_auto") } \description{ HPO severity tiers automatically assigned using -\link[HPOExplorer]{assign_tiers}. +\link[HPOExplorer]{make_tiers}. } \keyword{datasets} diff --git a/man/hpo_to_matrix.Rd b/man/hpo_to_matrix.Rd index 5e6a930..86bd940 100644 --- a/man/hpo_to_matrix.Rd +++ b/man/hpo_to_matrix.Rd @@ -23,7 +23,7 @@ hpo_to_matrix( Set to \code{NULL} (default) to include all terms.} \item{phenotype_to_genes}{Output of -\link[HPOExplorer]{load_phenotype_to_genes} mapping phenotypes +\link{load_phenotype_to_genes} mapping phenotypes to gene annotations.} \item{formula}{A formula of the form LHS ~ RHS to cast, see Details.} diff --git a/man/igraph_to_plotly.Rd b/man/igraph_to_plotly.Rd deleted file mode 100644 index eb9c5ee..0000000 --- a/man/igraph_to_plotly.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/igraph_to_plotly.R -\name{igraph_to_plotly} -\alias{igraph_to_plotly} -\title{igraph to plotly data} -\usage{ -igraph_to_plotly( - g, - layout_func = igraph::layout.fruchterman.reingold, - dim = 3, - seed = 2023, - verbose = TRUE -) -} -\arguments{ -\item{g}{\link[igraph]{igraph} object -generated by \link[HPOExplorer]{make_igraph_object}.} - -\item{layout_func}{Layout function for the graph.} - -\item{dim}{Number of dimensions to create layout in.} - -\item{seed}{Random seed to enable reproducibility.} - -\item{verbose}{Print messages.} -} -\value{ -Named list of data.frames. -} -\description{ -Convert an igraph to data for a 3D plotly plot. -} -\keyword{internal} diff --git a/man/kde_surface.Rd b/man/kde_surface.Rd deleted file mode 100644 index 4d2f47b..0000000 --- a/man/kde_surface.Rd +++ /dev/null @@ -1,46 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/kde_surface.R -\name{kde_surface} -\alias{kde_surface} -\title{Kernel Density Estimation (KDE) surface} -\usage{ -kde_surface(xyz, n = 50, extend_kde = 1, rescale_z = TRUE) -} -\arguments{ -\item{xyz}{A data.frame containing the columns "x" and "y" (required). -Can also include a third column "z" (optional).} - -\item{n}{ -Number of grid points in each direction. Can be scalar or a length-2 -integer vector. -} - -\item{extend_kde}{Extend the limits of the KDE area by setting -\code{extend_kde>1}. -This makes the "mountains' of the KDE end more smoothly along the edges of -the x/y coordinates.} - -\item{rescale_z}{Rescale the z-axis so that the KDE can be positioned -below the original data points (when plotting them together).} -} -\value{ -\itemize{ -\item{\code{x, y}: }{ -The x and y coordinates of the grid points, vectors of length n. -} -\item{\code{z}: }{ -An \code{n[1]} by \code{n[2]} matrix of the estimated density: -rows correspond to the value of \code{x}, columns to the value of \code{y}. -} -} -} -\description{ -Compute a KDE surface based on the density of points along -an x- and y-axis. -} -\examples{ -xyz <- data.frame(x=stats::rnorm(50), - y=stats::rnorm(50), - z=stats::rnorm(50)) -kd <- kde_surface(xyz = xyz) -} diff --git a/man/list_columns.Rd b/man/list_columns.Rd index da7d33b..b9f0108 100644 --- a/man/list_columns.Rd +++ b/man/list_columns.Rd @@ -8,7 +8,7 @@ list_columns(columns = NULL, add_gene_cols = FALSE, extra_cols = NULL) } \arguments{ \item{columns}{A named vector of columns in \code{phenos} -to add to the hoverdata via \link[HPOExplorer]{make_hoverboxes}.} +to add to the hoverdata via \link[KGExplorer]{add_hoverboxes}.} \item{add_gene_cols}{Add columns related to genes.} diff --git a/man/list_deaths.Rd b/man/list_deaths.Rd deleted file mode 100644 index d85cdf8..0000000 --- a/man/list_deaths.Rd +++ /dev/null @@ -1,34 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/list_deaths.R -\name{list_deaths} -\alias{list_deaths} -\title{List age of death HPO terms} -\usage{ -list_deaths( - hpo = get_hpo(), - exclude = FALSE, - as_hpo_ids = FALSE, - include_na = TRUE, - verbose = TRUE -) -} -\arguments{ -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{exclude}{HPO phenotype names to exclude.} - -\item{as_hpo_ids}{Return as a character vector vector HPO IDs only.} - -\item{include_na}{Include NA values for onset.} - -\item{verbose}{Print messages.} -} -\value{ -Named list of HPO IDs. -} -\description{ -List age of death phenotypes in the HPO. -} -\examples{ -deaths <- list_deaths() -} diff --git a/man/list_onsets.Rd b/man/list_onsets.Rd deleted file mode 100644 index 38b2cd4..0000000 --- a/man/list_onsets.Rd +++ /dev/null @@ -1,34 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/list_onsets.R -\name{list_onsets} -\alias{list_onsets} -\title{List age of onset HPO terms} -\usage{ -list_onsets( - hpo = get_hpo(), - exclude = FALSE, - as_hpo_ids = FALSE, - include_na = TRUE, - verbose = TRUE -) -} -\arguments{ -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{exclude}{HPO phenotype names to exclude.} - -\item{as_hpo_ids}{Return as a character vector vector HPO IDs only.} - -\item{include_na}{Include NA values for onset.} - -\item{verbose}{Print messages.} -} -\value{ -Named list of HPO IDs. -} -\description{ -List age of onset phenotypes in the HPO. -} -\examples{ -onsets <- list_onsets() -} diff --git a/man/load_disease_genes.Rd b/man/load_disease_genes.Rd deleted file mode 100644 index 42992d7..0000000 --- a/man/load_disease_genes.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/load_disease_genes.R -\name{load_disease_genes} -\alias{load_disease_genes} -\title{Load disease genes} -\usage{ -load_disease_genes(verbose = TRUE) -} -\arguments{ -\item{verbose}{Print messages.} -} -\value{ -data.table -} -\description{ -Load gene lists associated with each disease phenotype from: -\itemize{ -\item{DECIPHER} -\item{ORPHANET} -\item{OMIM} -} -} -\examples{ -dat <- load_disease_genes() -} diff --git a/man/load_phenotype_to_genes.Rd b/man/load_phenotype_to_genes.Rd index 213e86c..40b4426 100644 --- a/man/load_phenotype_to_genes.Rd +++ b/man/load_phenotype_to_genes.Rd @@ -9,7 +9,7 @@ \usage{ load_phenotype_to_genes( file = c("phenotype_to_genes.txt", "genes_to_phenotype.txt", "phenotype.hpoa"), - save_dir = file.path(tools::R_user_dir("HPOExplorer", which = "cache"), "data"), + save_dir = file.path(KGExplorer::cache_dir(package = "HPOExplorer"), "data"), repo = paste("obophenotype", "human-phenotype-ontology", sep = "/"), tag = "latest", overwrite = FALSE, diff --git a/man/annotate_phenos.Rd b/man/main.Rd similarity index 63% rename from man/annotate_phenos.Rd rename to man/main.Rd index 0817a6b..56f851a 100644 --- a/man/annotate_phenos.Rd +++ b/man/main.Rd @@ -1,16 +1,18 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/annotate_phenos.R -\name{annotate_phenos} +% Please edit documentation in R/_docs.R, R/annotate_phenos.R, +% R/example_phenos.R +\name{main} +\alias{main} \alias{annotate_phenos} -\title{Annotate phenotypes} +\alias{example_phenos} +\title{Main functions} \usage{ annotate_phenos( phenos, hpo = get_hpo(), - adjacency = NULL, add_ont_lvl_absolute = TRUE, add_ont_lvl_relative = FALSE, - add_info_contents = FALSE, + add_info_content = TRUE, add_description = TRUE, add_disease_data = FALSE, add_ndiseases = add_disease_data, @@ -23,25 +25,24 @@ annotate_phenos( include_mondo = FALSE, add_hoverboxes = FALSE, columns = list_columns(), - interactive = TRUE, - verbose = TRUE + interactive = TRUE ) + +example_phenos(i = seq(10), hpo = get_hpo()) } \arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} +\item{phenos}{A data.table containing HPO IDs and other metadata.} -\item{adjacency}{An adjacency matrix generated -by \link[HPOExplorer]{adjacency_matrix}.} +\item{hpo}{Human Phenotype Ontology object, +loaded from \link[KGExplorer]{get_ontology}.} \item{add_ont_lvl_absolute}{Add the absolute ontology level of each HPO term. -See \link[HPOExplorer]{get_ont_lvls} for more details.} +See \link[KGExplorer]{get_ontology_levels} for more details.} \item{add_ont_lvl_relative}{Add the relative ontology level of each HPO term. -See \link[HPOExplorer]{get_ont_lvls} for more details.} +See \link[KGExplorer]{get_ontology_levels} for more details.} -\item{add_info_contents}{Add information content column for each phenotype.} +\item{add_info_content}{Add information content column for each phenotype.} \item{add_description}{Whether to get the phenotype descriptions as well (slower).} @@ -72,20 +73,40 @@ each disease.} IDs, names, and definitions to each disease.} \item{add_hoverboxes}{Add hoverdata with -\link[HPOExplorer]{make_hoverboxes}.} +\link[KGExplorer]{add_hoverboxes}.} \item{columns}{A named vector of columns in \code{phenos} -to add to the hoverdata via \link[HPOExplorer]{make_hoverboxes}.} +to add to the hoverdata via \link[KGExplorer]{add_hoverboxes}.} + +\item{interactive}{Make the plot interactive.} -\item{interactive}{Make the plot interactive with \link[plotly]{ggplotly}.} +\item{i}{Indices of HPO IDs to use.} +} +\value{ +Data. -\item{verbose}{Print messages.} +phenotype data.table } \description{ +Main functions. +} +\section{Functions}{ +\itemize{ +\item \code{annotate_phenos()}: main +Annotate phenotypes + Annotate phenotypes \link[data.table]{data.table} without various types of metadata. -} + +\item \code{example_phenos()}: main +Example phenotypes dataframe + +Create a minimal example of a phenos dataframe. + +}} \examples{ phenos <- example_phenos() phenos2 <- annotate_phenos(phenos) +phenos <- example_phenos() } +\concept{main} diff --git a/man/make_.Rd b/man/make_.Rd new file mode 100644 index 0000000..599f041 --- /dev/null +++ b/man/make_.Rd @@ -0,0 +1,240 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/_docs.R, R/make_igraph_object.R, +% R/make_network_object.R, R/make_phenos_dataframe.R, R/make_tiers.R +\name{make_} +\alias{make_} +\alias{make_igraph_object} +\alias{make_network_object} +\alias{make_phenos_dataframe} +\alias{make_tiers} +\title{Make functions} +\usage{ +make_igraph_object( + phenos, + hpo = get_hpo(), + colour_var = "fold_change", + cols = list_columns(extra_cols = c(colour_var, grep("_count$|_values$", names(phenos), + value = TRUE))), + ... +) + +make_network_object( + phenos, + hpo = get_hpo(), + colour_var = "fold_change", + add_ont_lvl_absolute = FALSE, + cols = list_columns(extra_cols = c(colour_var, grep("_count$|_values$", names(phenos), + value = TRUE))), + as = c("ggnetwork", "tbl_graph"), + ... +) + +make_phenos_dataframe( + ancestor = NULL, + hpo = get_hpo(), + phenotype_to_genes = load_phenotype_to_genes(), + add_ont_lvl_absolute = TRUE, + add_ont_lvl_relative = FALSE, + add_info_content = FALSE, + add_description = TRUE, + add_disease_data = FALSE, + add_ndiseases = add_disease_data, + add_onsets = add_disease_data, + add_deaths = add_disease_data, + add_pheno_frequencies = add_disease_data, + add_tiers = add_disease_data, + add_severities = add_disease_data, + add_disease_definitions = add_disease_data, + include_mondo = FALSE, + add_hoverboxes = FALSE, + columns = list_columns(), + interactive = TRUE, + verbose = TRUE +) + +make_tiers( + hpo = get_hpo(), + terms = hpo@terms, + keyword_sets = list(Tier1 = c("intellectual disability", "death"), Tier2 = + c("impaired mobility", "malform"), Tier3 = c("sight", "vision", "immunodeficien", + "cancer", "hearing", "touch", "pain ", " pain", "mental illness", "dysmorphic"), + Tier4 = c("fertility")), + check_names = TRUE, + check_definitions = TRUE, + check_diseases = FALSE, + search_ancestors = TRUE, + search_descendants = TRUE, + as_datatable = FALSE, + verbose = TRUE +) +} +\arguments{ +\item{phenos}{A data.table containing HPO IDs and other metadata.} + +\item{hpo}{Human Phenotype Ontology object, +loaded from \link[KGExplorer]{get_ontology}.} + +\item{colour_var}{The column from phenos that you wish +to map to node colour.} + +\item{cols}{Columns to add to metadata of \link[ggnetwork]{ggnetwork} object.} + +\item{...}{ + Arguments passed on to \code{\link[ggnetwork:fortify]{ggnetwork::fortify}} + \describe{ + \item{\code{}}{} + }} + +\item{add_ont_lvl_absolute}{Add the absolute ontology level of each HPO term. +See \link[KGExplorer]{get_ontology_levels} for more details.} + +\item{as}{R object class to return output as.} + +\item{ancestor}{The ancestor to get all descendants of. If \code{NULL}, +returns the entirely ontology.} + +\item{phenotype_to_genes}{Output of +\link{load_phenotype_to_genes} mapping phenotypes +to gene annotations.} + +\item{add_ont_lvl_relative}{Add the relative ontology level of each HPO term. +See \link[KGExplorer]{get_ontology_levels} for more details.} + +\item{add_info_content}{Add information content column for each phenotype.} + +\item{add_description}{Whether to get the phenotype descriptions as +well (slower).} + +\item{add_disease_data}{Add all disease metadata columns. +This will expand the data using \code{allow.cartesian=TRUE}.} + +\item{add_ndiseases}{Add the number of diseases per phenotype.} + +\item{add_onsets}{Add age of onset columns using +\link[HPOExplorer]{add_onset}.} + +\item{add_deaths}{Add age of death columns using +\link[HPOExplorer]{add_death}.} + +\item{add_pheno_frequencies}{Add the frequency of each phenotype in +each disease.} + +\item{add_tiers}{Add severity Tiers column using +\link[HPOExplorer]{add_tier}.} + +\item{add_severities}{Add severity column using +\link[HPOExplorer]{add_severity}.} + +\item{add_disease_definitions}{Add disease definitions.} + +\item{include_mondo}{Add \href{https://mondo.monarchinitiative.org/}{MONDO} +IDs, names, and definitions to each disease.} + +\item{add_hoverboxes}{Add hoverdata with +\link[KGExplorer]{add_hoverboxes}.} + +\item{columns}{A named vector of columns in \code{phenos} +to add to the hoverdata via \link[KGExplorer]{add_hoverboxes}.} + +\item{interactive}{Make the plot interactive.} + +\item{verbose}{Print messages.} + +\item{terms}{A subset of HPO IDs to assign Tiers to.} + +\item{keyword_sets}{A named list of regex queries to be used when searching +for phenotypes that have matching metadata.} + +\item{check_names}{When regexsearching for matching terms, +check the phenotype names.} + +\item{check_definitions}{When regex searching for matching terms, +check the phenotype definition.} + +\item{check_diseases}{When regex searching for matching terms, +check the names of diseases associated with the phenotype.} + +\item{search_ancestors}{Inherit Tiers of ancestors.} + +\item{search_descendants}{Inherit Tiers of descendants.} + +\item{as_datatable}{Return the results as a \link[data.table]{data.table}.} +} +\value{ +R object. + +A \link[igraph]{igraph} object. + +A \link[ggnetwork]{ggnetwork} object. + +The HPO in dataframe format. + +Tier assignments for each term in \code{terms}. +Will be returned as either a named vector or a \link[data.table]{data.table}. +} +\description{ +Functions to make complex R objects. +} +\section{Functions}{ +\itemize{ +\item \code{make_igraph_object()}: make_ +Make an \link[igraph]{igraph} object + +This uses the network package to coerce the adjacency matrix into a +network object. It also adds the fold change, label, +and relative ontology level parameters to each node in the network. + +It expects there to be a column of HPO IDs in the phenos dataframe called +hpo_id. + +\item \code{make_network_object()}: make_ +Make a \link[ggnetwork]{ggnetwork} object + +This uses the network package to coerce the adjacency matrix into a +network object. It also adds the fold change, label, +and relative ontology level parameters to each node in the network. + +It expects there to be a column of HPO IDs in the phenos dataframe called +hpo_id. + +\item \code{make_phenos_dataframe()}: make_ +Make phenotypes dataframe + +Make a dataframe from a subset of the Human Phenotype Ontology. + +\item \code{make_tiers()}: make_ +Make severity Tiers (auto) + +Automatically add severity Tier for each HPO ID, in accordance with the +rating system provided by +\href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4262393/}{ +Lazarin et al (2014)}. +In order of increasing severity: +\itemize{ +\item{Tier 4 }{Reduced fertility} +\item{Tier 3 }{Sensory impairment: vision, +Immunodeficiency/cancer, +Sensory impairment: hearing, +Sensory impairment: touch, other (including pain), +Mental illness, +Dysmorphic features} +\item{Tier 2 }{Shortened life span: premature adulthood, +Impaired mobility, +Internal physical malformation} +\item{Tier 1 }{Shortened life span: infancy, +Shortened life span: childhood/adolescence, +Intellectual disability} +} + +}} +\examples{ +phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") +g <- make_igraph_object(phenos = phenos) +phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") +phenoNet <- make_network_object(phenos = phenos, + colour_var = "ontLvl_geneCount_ratio") +phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") +terms <- get_hpo()@terms[seq(100)] +tiers <- make_tiers(terms = terms) +} +\concept{make_} diff --git a/man/make_hoverboxes.Rd b/man/make_hoverboxes.Rd deleted file mode 100644 index eca1831..0000000 --- a/man/make_hoverboxes.Rd +++ /dev/null @@ -1,54 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/make_hoverboxes.R -\name{make_hoverboxes} -\alias{make_hoverboxes} -\title{Make hoverboxes} -\usage{ -make_hoverboxes( - phenos, - columns = list_columns(), - interactive = TRUE, - width = 60, - digits = 3, - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{columns}{A named vector of columns in \code{phenos} -to add to the hoverdata via \link[HPOExplorer]{make_hoverboxes}.} - -\item{interactive}{Make the plot interactive with \link[plotly]{ggplotly}.} - -\item{width}{Positive integer giving target line width (in number of -characters). A width less than or equal to 1 will put each word on its -own line.} - -\item{digits}{integer indicating the number of decimal places - (\code{round}) or significant digits (\code{signif}) to be used. - For \code{round}, negative values are allowed (see \sQuote{Details}).} - -\item{verbose}{Print messages.} -} -\value{ -A nicely formatted string with newlines etc, -to be used as a hoverbox. -} -\description{ -A hoverbox is a box of text that shows up when the cursor -hovers over something. -These can be useful when making interactive network plots -of the HPO phenotypes because we can include a hoverbox that gives -information and data associated with each phenotype. -} -\details{ -This function expects a dataframe of with a "hpo_name" column that has the -name of each phenotype. It must then include columns -for all of the parameters you wish to include in the hoverbox. -} -\examples{ -phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay", - add_hoverboxes = FALSE) -phenos <- make_hoverboxes(phenos = phenos) -} diff --git a/man/make_igraph_object.Rd b/man/make_igraph_object.Rd deleted file mode 100644 index 36d3dd3..0000000 --- a/man/make_igraph_object.Rd +++ /dev/null @@ -1,68 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/make_igraph_object.R -\name{make_igraph_object} -\alias{make_igraph_object} -\title{Make an \link[igraph]{igraph} object} -\usage{ -make_igraph_object( - phenos, - hpo = get_hpo(), - adjacency = adjacency_matrix(terms = phenos$hpo_id, hpo = hpo), - colour_var = "fold_change", - add_ont_lvl_absolute = FALSE, - cols = list_columns(extra_cols = c(colour_var, grep("_count$|_values$", names(phenos), - value = TRUE))), - layout = "fruchtermanreingold", - verbose = TRUE, - ... -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{adjacency}{An adjacency matrix generated -by \link[HPOExplorer]{adjacency_matrix}.} - -\item{colour_var}{The column from phenos that you wish -to map to node colour.} - -\item{add_ont_lvl_absolute}{Add the absolute ontology level of each HPO term. -See \link[HPOExplorer]{get_ont_lvls} for more details.} - -\item{cols}{Columns to add to metadata of \link[ggnetwork]{ggnetwork} object.} - -\item{layout}{a network layout supplied by \code{\link[sna]{gplot.layout}}, -such as \code{"fruchtermanreingold"} (the default), or a two-column matrix -with as many rows as there are nodes in the network, in which case the -matrix is used as nodes coordinates.} - -\item{verbose}{Print messages.} - -\item{...}{ - Arguments passed on to \code{\link[ggnetwork:ggnetwork]{ggnetwork::ggnetwork}} - \describe{ - \item{\code{x}}{an object of class \code{\link[network]{network}} or -\code{\link[igraph:igraph-package]{igraph}}, or any object that can be -coerced to that class, such as an adjacency or incidence matrix, or an -edge list: see \code{\link[network]{edgeset.constructors}} and -\code{\link[network]{network}} for details.} - }} -} -\value{ -A \link[igraph]{igraph} object. -} -\description{ -This uses the network package to coerce the adjacency matrix into a -network object. It also adds the fold change, label, -and relative ontology level parameters to each node in the network. -} -\details{ -It expects there to be a column of HPO IDs in the phenos dataframe called -hpo_id. -} -\examples{ -phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") -g <- make_igraph_object(phenos = phenos) -} diff --git a/man/make_network_object.Rd b/man/make_network_object.Rd deleted file mode 100644 index 98860c7..0000000 --- a/man/make_network_object.Rd +++ /dev/null @@ -1,69 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/make_network_object.R -\name{make_network_object} -\alias{make_network_object} -\title{Make a \link[ggnetwork]{ggnetwork} object} -\usage{ -make_network_object( - phenos, - hpo = get_hpo(), - adjacency = adjacency_matrix(terms = phenos$hpo_id, hpo = hpo), - colour_var = "fold_change", - add_ont_lvl_absolute = FALSE, - cols = list_columns(extra_cols = c(colour_var, grep("_count$|_values$", names(phenos), - value = TRUE))), - layout = "fruchtermanreingold", - verbose = TRUE, - ... -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{adjacency}{An adjacency matrix generated -by \link[HPOExplorer]{adjacency_matrix}.} - -\item{colour_var}{The column from phenos that you wish -to map to node colour.} - -\item{add_ont_lvl_absolute}{Add the absolute ontology level of each HPO term. -See \link[HPOExplorer]{get_ont_lvls} for more details.} - -\item{cols}{Columns to add to metadata of \link[ggnetwork]{ggnetwork} object.} - -\item{layout}{a network layout supplied by \code{\link[sna]{gplot.layout}}, -such as \code{"fruchtermanreingold"} (the default), or a two-column matrix -with as many rows as there are nodes in the network, in which case the -matrix is used as nodes coordinates.} - -\item{verbose}{Print messages.} - -\item{...}{ - Arguments passed on to \code{\link[ggnetwork:ggnetwork]{ggnetwork::ggnetwork}} - \describe{ - \item{\code{x}}{an object of class \code{\link[network]{network}} or -\code{\link[igraph:igraph-package]{igraph}}, or any object that can be -coerced to that class, such as an adjacency or incidence matrix, or an -edge list: see \code{\link[network]{edgeset.constructors}} and -\code{\link[network]{network}} for details.} - }} -} -\value{ -A \link[ggnetwork]{ggnetwork} object. -} -\description{ -This uses the network package to coerce the adjacency matrix into a -network object. It also adds the fold change, label, -and relative ontology level parameters to each node in the network. -} -\details{ -It expects there to be a column of HPO IDs in the phenos dataframe called -hpo_id. -} -\examples{ -phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") -phenoNet <- make_network_object(phenos = phenos, - colour_var = "ontLvl_geneCount_ratio") -} diff --git a/man/make_ontology.Rd b/man/make_ontology.Rd deleted file mode 100644 index fea3b67..0000000 --- a/man/make_ontology.Rd +++ /dev/null @@ -1,54 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/make_ontology.R -\name{make_ontology} -\alias{make_ontology} -\title{Make an ontologyIndex object} -\usage{ -make_ontology( - file = "hp-base.obo", - repo = "obophenotype/human-phenotype-ontology", - tag = "latest", - save_dir = tools::R_user_dir(package = "HPOExplorer", which = "cache"), - extract_tags = "everything", - fix_ascii = TRUE, - overwrite = TRUE, - upload_tag = NULL, - ... -) -} -\arguments{ -\item{file}{name or vector of names of files to be downloaded. If \code{NULL}, -all assets attached to the release will be downloaded.} - -\item{repo}{Repository name in format "owner/repo". Defaults to \code{guess_repo()}.} - -\item{tag}{tag for the GitHub release to which this data should be attached.} - -\item{save_dir}{Directory to save data to.} - -\item{extract_tags}{Character value: either "minimal" or "everything", determining whether to extract only the properties of terms which are required to run functions in the package - i.e. \code{"id", "name", "parents", "children"} and \code{"ancestors"} - or extract all properties provided in the file. Term properties are named in the resulting \code{ontology_index} as their corresponding tags in the OBO file (except \code{"parents"}, \code{"children"} and \code{"ancestors"} which are appended with \code{"_OBO"} to avoid clashing with standard \code{ontology_index} properties. Defaults to \code{"minimal"}.} - -\item{fix_ascii}{Fix non-ASCII characters in metadata.} - -\item{overwrite}{Should any local files of the same name be overwritten? -default \code{TRUE}.} - -\item{...}{ - Arguments passed on to \code{\link[ontologyIndex:get_ontology]{ontologyIndex::get_OBO}} - \describe{ - \item{\code{propagate_relationships}}{Character vector of relations} - \item{\code{merge_equivalent_terms}}{Logical value determining whether terms that are marked \code{"equivalent_to"} a target term should be merged, retaining properties of the target term when the property should have one value, e.g. the term ID and name. Defaults to \code{TRUE}.} - }} -} -\value{ -\link[ontologyIndex]{ontology_index} object -} -\description{ -Make a \link[ontologyIndex]{ontology_index} -object from the Open Biomedical Ontologies (OBO) file. -By default, this function uses the Human Phenotype Ontology (HPO) -distributed via the official -\href{https://github.com/obophenotype/human-phenotype-ontology/releases}{ -HPO GitHub Releases}. -} -\keyword{internal} diff --git a/man/make_phenos_dataframe.Rd b/man/make_phenos_dataframe.Rd deleted file mode 100644 index fefd80c..0000000 --- a/man/make_phenos_dataframe.Rd +++ /dev/null @@ -1,98 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/make_phenos_dataframe.R -\name{make_phenos_dataframe} -\alias{make_phenos_dataframe} -\title{Make phenotypes dataframe} -\usage{ -make_phenos_dataframe( - ancestor = NULL, - hpo = get_hpo(), - phenotype_to_genes = load_phenotype_to_genes(), - adjacency = NULL, - add_ont_lvl_absolute = TRUE, - add_ont_lvl_relative = FALSE, - add_info_contents = FALSE, - add_description = TRUE, - add_disease_data = FALSE, - add_ndiseases = add_disease_data, - add_onsets = add_disease_data, - add_deaths = add_disease_data, - add_pheno_frequencies = add_disease_data, - add_tiers = add_disease_data, - add_severities = add_disease_data, - add_disease_definitions = add_disease_data, - include_mondo = FALSE, - add_hoverboxes = FALSE, - columns = list_columns(), - interactive = TRUE, - verbose = TRUE -) -} -\arguments{ -\item{ancestor}{The ancestor to get all descendants of. If \code{NULL}, -returns the entirely ontology.} - -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{phenotype_to_genes}{Output of -\link[HPOExplorer]{load_phenotype_to_genes} mapping phenotypes -to gene annotations.} - -\item{adjacency}{An adjacency matrix generated -by \link[HPOExplorer]{adjacency_matrix}.} - -\item{add_ont_lvl_absolute}{Add the absolute ontology level of each HPO term. -See \link[HPOExplorer]{get_ont_lvls} for more details.} - -\item{add_ont_lvl_relative}{Add the relative ontology level of each HPO term. -See \link[HPOExplorer]{get_ont_lvls} for more details.} - -\item{add_info_contents}{Add information content column for each phenotype.} - -\item{add_description}{Whether to get the phenotype descriptions as -well (slower).} - -\item{add_disease_data}{Add all disease metadata columns. -This will expand the data using \code{allow.cartesian=TRUE}.} - -\item{add_ndiseases}{Add the number of diseases per phenotype.} - -\item{add_onsets}{Add age of onset columns using -\link[HPOExplorer]{add_onset}.} - -\item{add_deaths}{Add age of death columns using -\link[HPOExplorer]{add_death}.} - -\item{add_pheno_frequencies}{Add the frequency of each phenotype in -each disease.} - -\item{add_tiers}{Add severity Tiers column using -\link[HPOExplorer]{add_tier}.} - -\item{add_severities}{Add severity column using -\link[HPOExplorer]{add_severity}.} - -\item{add_disease_definitions}{Add disease definitions.} - -\item{include_mondo}{Add \href{https://mondo.monarchinitiative.org/}{MONDO} -IDs, names, and definitions to each disease.} - -\item{add_hoverboxes}{Add hoverdata with -\link[HPOExplorer]{make_hoverboxes}.} - -\item{columns}{A named vector of columns in \code{phenos} -to add to the hoverdata via \link[HPOExplorer]{make_hoverboxes}.} - -\item{interactive}{Make the plot interactive with \link[plotly]{ggplotly}.} - -\item{verbose}{Print messages.} -} -\value{ -The HPO in dataframe format. -} -\description{ -Make a dataframe from a subset of the Human Phenotype Ontology. -} -\examples{ -phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") -} diff --git a/man/map_.Rd b/man/map_.Rd new file mode 100644 index 0000000..48b7087 --- /dev/null +++ b/man/map_.Rd @@ -0,0 +1,99 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/_docs.R, R/list_deaths.R, R/list_onsets.R, +% R/map_phenotypes.R +\name{map_} +\alias{map_} +\alias{list_deaths} +\alias{list_onsets} +\alias{map_phenotypes} +\title{Map functions} +\usage{ +list_deaths( + hpo = get_hpo(), + exclude = FALSE, + to = c("name", "id"), + include_na = TRUE +) + +list_onsets( + hpo = get_hpo(), + exclude = FALSE, + to = c("name", "id"), + include_na = TRUE, + verbose = TRUE +) + +map_phenotypes( + terms, + hpo = get_hpo(), + to = c("name", "id"), + keep_order = TRUE, + invert = FALSE +) +} +\arguments{ +\item{hpo}{Human Phenotype Ontology object, +loaded from \link[KGExplorer]{get_ontology}.} + +\item{exclude}{HPO phenotype names to exclude.} + +\item{to}{Character vector of database(s) to map IDs to. +When not \code{"mondo"}, can supply multiple alternative databases to map to + (e.g. \code{c("OMIM","Orphanet","DECIPHER")}).} + +\item{include_na}{Include NA values for onset.} + +\item{verbose}{Print messages.} + +\item{terms}{A subset of HPO IDs to include in the final dataset and plots +(e.g. c("HP:0001508","HP:0001507")).} + +\item{keep_order}{Return a named list of the same length and order +as \code{terms}. +If \code{FALSE}, return a named list of only the unique \code{terms}, +sometimes in a different order.} + +\item{invert}{Invert the keys/values of the dictionary, +such that the key becomes the values (and vice versa).} +} +\value{ +Mapped data. + +Named list of HPO IDs. + +Named list of HPO IDs. + +Character vector +} +\description{ +Functions to map IDs. +} +\section{Functions}{ +\itemize{ +\item \code{list_deaths()}: map_ +List age of death HPO terms + +List age of death phenotypes in the HPO. + +\item \code{list_onsets()}: map_ +List age of onset HPO terms + +List age of onset phenotypes in the HPO. + +\item \code{map_phenotypes()}: map_ +Harmonise phenotypes + +Harmonise a mixed vector of phenotype names (e.g. "Focal motor seizure") +and HPO IDs (e.g. c("HP:0000002","HP:0000003")). + +}} +\examples{ +deaths <- list_deaths() +onsets <- list_onsets() +terms <- c("Focal motor seizure","HP:0000002","HP:0000003") +#### As phenotype names #### +term_names <- map_phenotypes(terms=terms) +#### As HPO IDs #### +term_ids <- map_phenotypes(terms=terms, to="id") +} +\concept{map_} diff --git a/man/map_upheno.Rd b/man/map_upheno.Rd deleted file mode 100644 index f35bfb1..0000000 --- a/man/map_upheno.Rd +++ /dev/null @@ -1,69 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/map_upheno.R -\name{map_upheno} -\alias{map_upheno} -\title{Map phenotypes across uPheno} -\source{ -https://data.monarchinitiative.org/upheno2/current/qc/index - -https://data.monarchinitiative.org/upheno2/current/upheno-release/all/index.html -} -\usage{ -map_upheno( - pheno_map_method = c("upheno", "monarch"), - gene_map_method = c("monarch", "orthogene"), - subset_db1 = c("HP"), - terms = NULL, - fill_scores = NULL, - show_plot = TRUE, - force_new = FALSE, - save_dir = tools::R_user_dir(package = "HPOExplorer", which = "cache") -) -} -\arguments{ -\item{pheno_map_method}{Method to use for mapping phenotypes across ontologies. -\itemize{ -\item{"upheno"}{Use uPheno's phenotype-to-phenotype mappings. -Contains fewer ontologies but with greater coverage of phenotypes.} -\item{"monarch"}{Use Monarch's phenotype-to-phenotype mappings. -Contains more ontologies but with less coverage of phenotypes. -} -}} - -\item{gene_map_method}{Method to use for mapping genes across species. -\itemize{ -\item{"monarch"}{Use Monarch's gene-to-gene mappings.} -\item{"orthogene"}{Use \link[orthogene]{convert_orthologs} to generate -gene-to-gene mappings.} -}} - -\item{subset_db1}{Subset of ontologies to include in the plot.} - -\item{terms}{A subset of HPO IDs to include in the final dataset and plots -(e.g. c("HP:0001508","HP:0001507")).} - -\item{fill_scores}{Fill missing scores in the "equivalence_score" and -"subclass_score" columns with this value. These columns represent the -quality of mapping between two phenotypes on a scale from 0-1.} - -\item{show_plot}{Show the plot.} - -\item{force_new}{Force new data to be downloaded and processed.} - -\item{save_dir}{Directory to save cached data.} -} -\value{ -A list containing the data and plot. -} -\description{ -Map phenotypes across species within the Unified Phenotype Ontology (uPheno). -First, gathers phenotype-phenotype mappings across ontologies. -Next, gathers all phenotype-gene associations for each ontology, -converts all genes to human HGNC orthologs, and computes the number of -overlapping orthologs between all pairs mapped phenotypes. -Finally, plots the results as the proportion of intersecting genes between -all pairs of phenotypes. -} -\examples{ -res <- map_upheno() -} diff --git a/man/map_upheno_data.Rd b/man/map_upheno_data.Rd deleted file mode 100644 index 2d9e4e6..0000000 --- a/man/map_upheno_data.Rd +++ /dev/null @@ -1,68 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/map_upheno_data.R -\name{map_upheno_data} -\alias{map_upheno_data} -\title{Map uPheno data} -\usage{ -map_upheno_data( - pheno_map_method = c("upheno", "monarch"), - gene_map_method = c("monarch", "orthogene"), - keep_nogenes = FALSE, - fill_scores = NULL, - terms = NULL, - save_dir = tools::R_user_dir(package = "HPOExplorer", which = "cache"), - force_new = FALSE -) -} -\arguments{ -\item{pheno_map_method}{Method to use for mapping phenotypes across ontologies. -\itemize{ -\item{"upheno"}{Use uPheno's phenotype-to-phenotype mappings. -Contains fewer ontologies but with greater coverage of phenotypes.} -\item{"monarch"}{Use Monarch's phenotype-to-phenotype mappings. -Contains more ontologies but with less coverage of phenotypes. -} -}} - -\item{gene_map_method}{Method to use for mapping genes across species. -\itemize{ -\item{"monarch"}{Use Monarch's gene-to-gene mappings.} -\item{"orthogene"}{Use \link[orthogene]{convert_orthologs} to generate -gene-to-gene mappings.} -}} - -\item{keep_nogenes}{Logical indicating whether to keep mappings that do not -have any orthologous genes.} - -\item{fill_scores}{Fill missing scores in the "equivalence_score" and -"subclass_score" columns with this value. These columns represent the -quality of mapping between two phenotypes on a scale from 0-1.} - -\item{terms}{A subset of HPO IDs to include in the final dataset and plots -(e.g. c("HP:0001508","HP:0001507")).} - -\item{save_dir}{Directory to save cached data.} - -\item{force_new}{Force new data to be downloaded and processed.} -} -\value{ -A data.table containing the mapped data. -} -\description{ -Get uPheno cross-species mapping data by: -\itemize{ -\item{Downloading cross-species phenotype-phenotype mappings.} -\item{Downloading within-spceies phenotype-gene mappings, -and converting these genes to human orthologs.} -\item{Merging the phenotype-phenotype and phenotype-gene mappings.} -\item{Filtering out any mappings that do not have a human ortholog within -each respective phenotype.} -\item{Calculating the proportion of orthologous genes overlapping across -the species-specific phenotype-gene mappings.} -\item{Iterating the above steps using multiple methods -(\code{pheno_map_method}) and concatenating the results together.} -} -} -\examples{ -pheno_map_genes_match <- map_upheno_data() -} diff --git a/man/map_upheno_plot.Rd b/man/map_upheno_plot.Rd deleted file mode 100644 index 7ca8dd3..0000000 --- a/man/map_upheno_plot.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/map_upheno_plots.R -\name{map_upheno_plot} -\alias{map_upheno_plot} -\title{Map uPheno plot} -\usage{ -map_upheno_plot( - pheno_map_genes_match = NULL, - subset_db1 = "HP", - types = c("rainplot", "scatterplot", "heatmap") -) -} -\arguments{ -\item{pheno_map_genes_match}{uPheno cross-species mapping data generated by -\link[HPOExplorer]{map_upheno_data}.} - -\item{subset_db1}{A character vector of reference ontologies to subset -\code{pheno_map_genes_match} by.} - -\item{types}{A character vector of plot types to generate.} -} -\value{ -A named list of \pkg{ggplot2} objects. -} -\description{ -Generate multiple kinds of plots summarising mappings of phenotypes and genes -between the HPO and various non-human ontologies within uPheno. -} -\examples{ -pheno_map_genes_match <- map_upheno_data() -upheno_plots <- map_upheno_plot( - pheno_map_genes_match = pheno_map_genes_match) -} diff --git a/man/network_3d.Rd b/man/network_3d.Rd deleted file mode 100644 index 0c4497e..0000000 --- a/man/network_3d.Rd +++ /dev/null @@ -1,114 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/network_3d.R -\name{network_3d} -\alias{network_3d} -\title{3D network} -\source{ -\href{https://www.r-bloggers.com/2013/09/network-visualization-part-4-3d-networks/}{ -R bloggers} - -\href{https://community.plotly.com/t/is-it-possible-to-connect-scatters-in-3d-scatter-plot/}{ -Plotly: Connect points in 3D plot} - -\href{https://plotly.com/r/line-charts/}{ -Plotly: Connect points in 2D plot} - -\href{https://plotly.com/r/3d-line-plots/}{ -Plotly: 3D lines plots} - -\href{https://plotly.com/python/v3/3d-network-graph/}{ -Plotly: 3D network plot} -} -\usage{ -network_3d( - g, - layout_func = igraph::layout.fruchterman.reingold, - node_color_var = "ontLvl", - edge_color_var = "zend", - text_color_var = node_color_var, - node_symbol_var = "ancestor_name", - node_palette = pals::kovesi.cyclic_mrybm_35_75_c68_s25, - edge_palette = pals::kovesi.cyclic_mrybm_35_75_c68_s25, - node_opacity = 0.75, - edge_opacity = 0.5, - kde_palette = pals::gnuplot, - add_kde = TRUE, - extend_kde = 1.5, - bg_color = kde_palette(6)[1], - add_labels = FALSE, - keep_grid = FALSE, - aspectmode = "cube", - hover_width = 80, - label_width = 80, - seed = 2023, - showlegend = TRUE, - show_plot = TRUE, - save_path = tempfile(fileext = "network_3d.html"), - verbose = TRUE -) -} -\arguments{ -\item{g}{\link[igraph]{igraph} object -generated by \link[HPOExplorer]{make_igraph_object}.} - -\item{layout_func}{Layout function for the graph.} - -\item{node_color_var}{Variable in the vertex metadata to color nodes by.} - -\item{edge_color_var}{Variable in the edge metadata to color edges by.} - -\item{text_color_var}{Variable in the node metadata to color text by.} - -\item{node_symbol_var}{Variable in the vertex metadata to shape nodes by.} - -\item{node_palette}{Color palette function for the nodes/points.} - -\item{edge_palette}{Color palette function for the edges/lines.} - -\item{node_opacity}{Node opacity.} - -\item{edge_opacity}{Edge opacity.} - -\item{kde_palette}{Color palette function for the KDE plot.} - -\item{add_kde}{Add a kernel density estimation (KDE) plot -below the 3D scatter plot (i.e. the "mountains" beneath the points).} - -\item{extend_kde}{Extend the area that the KDE plot covers.} - -\item{bg_color}{Plot background color.} - -\item{add_labels}{Add phenotype name labels to each point.} - -\item{keep_grid}{Keep all grid lines and axis labels.} - -\item{aspectmode}{The proportions of the 3D plot. See the -\href{https://plotly.com/python/reference/layout/scene/#layout-scene-aspectmode}{ -plotly documentation site} for details.} - -\item{hover_width}{Maximum width of the hover text.} - -\item{label_width}{Maximum width of the label text.} - -\item{seed}{Random seed to enable reproducibility.} - -\item{showlegend}{Show node fill legend.} - -\item{show_plot}{Print the plot after it's been generated.} - -\item{save_path}{Path to save interactive plot to -as a self-contained HTML file.} - -\item{verbose}{Print messages.} -} -\value{ -A 3D interactive \link[plotly]{plotly} object. -} -\description{ -Plot a subset of the HPO as a 3D network. -} -\examples{ -phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") -g <- make_igraph_object(phenos = phenos) -plt <- network_3d(g=g, show_plot=FALSE) -} diff --git a/man/per_branch_plot.Rd b/man/per_branch_plot.Rd index 24f96aa..b607e08 100644 --- a/man/per_branch_plot.Rd +++ b/man/per_branch_plot.Rd @@ -8,7 +8,8 @@ per_branch_plot( highlighted_branches, ancestor, hpo = get_hpo(), - background_branches = hpo$children[hpo$id[match(ancestor, hpo$name)]][[1]], + background_branches = simona::dag_children(hpo, term = map_phenotypes(terms = ancestor, + hpo = hpo, to = "id")), phenotype_to_genes = load_phenotype_to_genes() ) } @@ -19,13 +20,14 @@ to highlight \\>} \item{ancestor}{The ancestor to get all descendants of. If \code{NULL}, returns the entirely ontology.} -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} +\item{hpo}{Human Phenotype Ontology object, +loaded from \link[KGExplorer]{get_ontology}.} \item{background_branches}{HPO Ids of all branches to be shown (including highlighted) \\>} \item{phenotype_to_genes}{Output of -\link[HPOExplorer]{load_phenotype_to_genes} mapping phenotypes +\link{load_phenotype_to_genes} mapping phenotypes to gene annotations.} } \value{ @@ -44,8 +46,6 @@ Note that the highlighted branches must be also present in the background branches. } \examples{ -## Selecting child terms of -## "Abnormality of the nervous system" as background branches plt <- per_branch_plot( highlighted_branches = "Abnormality of nervous system physiology", ancestor = "Abnormality of the nervous system") diff --git a/man/phenos_to_granges.Rd b/man/phenos_to_granges.Rd index b9e2101..9f8281f 100644 --- a/man/phenos_to_granges.Rd +++ b/man/phenos_to_granges.Rd @@ -8,7 +8,7 @@ phenos_to_granges( phenos = NULL, phenotype_to_genes = load_phenotype_to_genes(), hpo = get_hpo(), - keep_seqnames = c(seq(22), "X", "Y"), + keep_chr = c(seq(22), "X", "Y"), by = c("hpo_id", "disease_id"), gene_col = "intersection", split.field = "hpo_id", @@ -18,15 +18,16 @@ phenos_to_granges( ) } \arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} +\item{phenos}{A data.table containing HPO IDs and other metadata.} \item{phenotype_to_genes}{Output of -\link[HPOExplorer]{load_phenotype_to_genes} mapping phenotypes +\link{load_phenotype_to_genes} mapping phenotypes to gene annotations.} -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} +\item{hpo}{Human Phenotype Ontology object, +loaded from \link[KGExplorer]{get_ontology}.} -\item{keep_seqnames}{Chromosomes to keep.} +\item{keep_chr}{Chromosomes to keep.} \item{by}{A vector of shared column names in \code{x} and \code{y} to merge on. This defaults to the shared key columns between the two tables. diff --git a/man/search_hpo.Rd b/man/search_hpo.Rd index af6866f..a21b638 100644 --- a/man/search_hpo.Rd +++ b/man/search_hpo.Rd @@ -19,7 +19,8 @@ search_hpo( ) } \arguments{ -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} +\item{hpo}{Human Phenotype Ontology object, +loaded from \link[KGExplorer]{get_ontology}.} \item{queries}{A named list of character vectors. Each vector includes a phenoptype name (or a substring) diff --git a/man/subset_descendants.Rd b/man/subset_descendants.Rd deleted file mode 100644 index 894a5c6..0000000 --- a/man/subset_descendants.Rd +++ /dev/null @@ -1,41 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/subset_descendants.R -\name{subset_descendants} -\alias{subset_descendants} -\title{Subset descendants} -\usage{ -subset_descendants( - phenos, - ancestor = NULL, - hpo = get_hpo(), - ignore_case = TRUE, - verbose = TRUE -) -} -\arguments{ -\item{phenos}{dataframe of phenotypes and values / parameters.} - -\item{ancestor}{Phenotype names (or HPO ID) of an ancestor in the -\href{https://hpo.jax.org/}{Human Phenotype Ontology}. -Only phenotypes that are descendants of this ancestor will be kept. -Set to \code{NULL} (default) to skip this filtering step.} - -\item{hpo}{Human Phenotype Ontology object, loaded from \pkg{ontologyIndex}.} - -\item{ignore_case}{Ignore casing when searching for matches.} - -\item{verbose}{Print messages.} -} -\value{ -data.table of phenotypes, with additional columns: -"ancestor", "ancestor_id" -} -\description{ -Subset a \code{phenos} datatable to only -descendants of an ancestor HPO ID term. -} -\examples{ -phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") -phenos2 <- subset_descendants(phenos = phenos, - ancestor = "Motor delay") -} diff --git a/man/unlist_col.Rd b/man/unlist_col.Rd index 0f0698e..7a7c79a 100644 --- a/man/unlist_col.Rd +++ b/man/unlist_col.Rd @@ -4,10 +4,10 @@ \alias{unlist_col} \title{Unlist column} \usage{ -unlist_col(dt, col) +unlist_col(dat, col) } \arguments{ -\item{dt}{\link[data.table]{data.table}} +\item{dat}{\link[data.table]{data.table}} \item{col}{Column name.} } diff --git a/tests/testthat/test-add_hpo_id.R b/tests/testthat/test-add_hpo_id.R index c152ac2..7ac555a 100644 --- a/tests/testthat/test-add_hpo_id.R +++ b/tests/testthat/test-add_hpo_id.R @@ -7,8 +7,8 @@ test_that("add_hpo_id works", { ## Several terms were omitted from later versions of the HPO ## due to becoming obsolete. testthat::expect_lte(sum(phenos2$hpo_id!=phenos2$hpo_id), 4) - mismatched_phenos <- harmonise_phenotypes( - phenotypes = phenos2$ID[phenos2$hpo_id!=phenos2$hpo_id] + mismatched_phenos <- map_phenotypes( + terms = phenos2$ID[phenos2$hpo_id!=phenos2$hpo_id] ) testthat::expect_true( all(grepl("^obsolete",mismatched_phenos, ignore.case = TRUE)) diff --git a/tests/testthat/test-add_omop.R b/tests/testthat/test-add_omop.R index f562804..ce1175d 100644 --- a/tests/testthat/test-add_omop.R +++ b/tests/testthat/test-add_omop.R @@ -3,7 +3,7 @@ test_that("add_omop works", { #### Using hpo_id #### phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") phenos2 <- add_omop(phenos = phenos, - id_col = "hpo_id") + input_col = "hpo_id") testthat::expect_true(all(!is.na(phenos2$OMOP_ID))) testthat::expect_true(all(!is.na(phenos2$OMOP_NAME))) @@ -11,7 +11,7 @@ test_that("add_omop works", { phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay", add_disease_data = TRUE) phenos2 <- add_omop(phenos = phenos, - id_col = "disease_id") + input_col = "disease_id") testthat::expect_gte(sum(!is.na(phenos2$OMOP_ID)),1200) testthat::expect_gte(sum(!is.na(phenos2$OMOP_NAME)),1200) }) diff --git a/tests/testthat/test-add_ont_lvl.R b/tests/testthat/test-add_ont_lvl.R index 0008d24..3a156b9 100644 --- a/tests/testthat/test-add_ont_lvl.R +++ b/tests/testthat/test-add_ont_lvl.R @@ -1,6 +1,7 @@ test_that("add_ont_lvl works", { - phenos <- data.table::data.table(hpo_id=get_hpo()$id[1:10]) + hp <- get_hpo() + phenos <- data.table::data.table(hpo_id=hp@terms[1:10]) #### Absolute #### phenos2 <- add_ont_lvl(phenos = data.table::copy(phenos)) cols <- c("ontLvl") diff --git a/tests/testthat/test-as_ascii.R b/tests/testthat/test-as_ascii.R index 2f18230..cd14dab 100644 --- a/tests/testthat/test-as_ascii.R +++ b/tests/testthat/test-as_ascii.R @@ -1,5 +1,5 @@ test_that("as_ascii works", { - dt <- HPOExplorer:::as_ascii(dt = HPOExplorer::hpo_deaths) - testthat::expect_true(methods::is(dt,"data.table")) + dat <- HPOExplorer:::as_ascii(dat = HPOExplorer::hpo_deaths) + testthat::expect_true(methods::is(dat,"data.table")) }) diff --git a/tests/testthat/test-assign_tiers.R b/tests/testthat/test-assign_tiers.R deleted file mode 100644 index 973f74f..0000000 --- a/tests/testthat/test-assign_tiers.R +++ /dev/null @@ -1,6 +0,0 @@ -test_that("assign_tiers works", { - - terms <- get_hpo()$id[seq(10)] - tiers <- assign_tiers(terms = terms) - testthat::expect_true(all(sort(unique(tiers))==paste0("Tier",seq(4)))) -}) diff --git a/tests/testthat/test-clear_cache.R b/tests/testthat/test-clear_cache.R deleted file mode 100644 index 9984ebb..0000000 --- a/tests/testthat/test-clear_cache.R +++ /dev/null @@ -1,11 +0,0 @@ -test_that("clear_cache works", { - - dat <- load_phenotype_to_genes() - f <- file.path(tools::R_user_dir("HPOExplorer", - which="cache"), - "data", - "phenotype_to_genes.rds") - testthat::expect_true(file.exists(f)) - clear_cache() - testthat::expect_false(file.exists(f)) -}) diff --git a/tests/testthat/test-subset_descendants.R b/tests/testthat/test-filter_descendants.R similarity index 71% rename from tests/testthat/test-subset_descendants.R rename to tests/testthat/test-filter_descendants.R index 96ddee1..48a5fc6 100644 --- a/tests/testthat/test-subset_descendants.R +++ b/tests/testthat/test-filter_descendants.R @@ -1,7 +1,7 @@ -test_that("subset_descendants works", { +test_that("filter_descendants works", { phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") - phenos2 <- subset_descendants(phenos = phenos, + phenos2 <- filter_descendants(phenos = phenos, ancestor = "Motor delay") testthat::expect_equal(nrow(phenos),23) testthat::expect_equal(nrow(phenos2),9) diff --git a/tests/testthat/test-get_hpo.R b/tests/testthat/test-get_hpo.R index ac514a7..ede8aa8 100644 --- a/tests/testthat/test-get_hpo.R +++ b/tests/testthat/test-get_hpo.R @@ -4,5 +4,5 @@ test_that("get_hpo works", { hpo <- get_hpo() items <- ls() testthat::expect_true("hpo" %in% ls()) - testthat::expect_true(methods::is(hpo,"ontology_index")) + testthat::expect_true(methods::is(hpo,"ontology_DAG")) }) diff --git a/tests/testthat/test-get_ont_lvls.R b/tests/testthat/test-get_ont_lvls.R deleted file mode 100644 index 00f1901..0000000 --- a/tests/testthat/test-get_ont_lvls.R +++ /dev/null @@ -1,44 +0,0 @@ -test_that("get_ont_lvls works", { - - hpo <- get_hpo() - parents <- grep("^HP",unique(hpo$id), value = TRUE)[seq(2)] - childs <- unlist(unname(hpo$children[parents])) - - terms1 <- c(parents,childs) - #### Using only immediate children #### - lvls1 <- get_ont_lvls(terms = terms1, absolute = TRUE) - testthat::expect_length(lvls1, length(terms1)) - testthat::expect_equal(min(lvls1),1) - testthat::expect_equal(max(lvls1),16) ## Increased from 14 to 16 in 2023 version - - #### Using all descendants #### - terms2 <- ontologyIndex::get_descendants(ontology = hpo, - roots = parents[[2]], - exclude_roots = FALSE) - lvls2 <- get_ont_lvls(terms = terms2) - testthat::expect_length(lvls2, length(terms2)) - testthat::expect_equal(min(lvls2),0) - testthat::expect_equal(max(lvls2),5) - - #### Using relative levels #### - lvls3 <- get_ont_lvls(terms = terms1, absolute = FALSE) - testthat::expect_length(lvls3, length(terms1)) - testthat::expect_equal(min(lvls3),0) - testthat::expect_equal(max(lvls3),1) - - lvls4 <- get_ont_lvls(terms = terms1, - absolute = TRUE, - exclude_top_lvl = FALSE, - reverse = FALSE) - testthat::expect_length(lvls4, length(terms1)) - testthat::expect_equal(min(lvls4),0) - testthat::expect_equal(max(lvls4),15) - - #### Visual confirmation of correct hierarchy #### - # ontologyPlot::onto_plot(ontology = hpo, - # label = terms1, - # terms = terms1) - # ontologyPlot::onto_plot(ontology = hpo, - # label = terms2, - # terms = terms2) -}) diff --git a/tests/testthat/test-ggnetwork_plot.R b/tests/testthat/test-ggnetwork_plot.R index 0b631fa..067543a 100644 --- a/tests/testthat/test-ggnetwork_plot.R +++ b/tests/testthat/test-ggnetwork_plot.R @@ -9,7 +9,7 @@ test_that("ggnetwork_plot works", { phenoNet <- make_network_object(phenos = phenos, colour_var = "ontLvl_geneCount_ratio") testthat::expect_true(methods::is(phenoNet,"data.frame")) - testthat::expect_equal(nrow(phenoNet),45) + testthat::expect_equal(nrow(phenoNet),71) #### ggnetwork_plot #### plt <- ggnetwork_plot(phenoNet = phenoNet, diff --git a/tests/testthat/test-list_onsets.R b/tests/testthat/test-list_onsets.R index 46fa144..a6ce97a 100644 --- a/tests/testthat/test-list_onsets.R +++ b/tests/testthat/test-list_onsets.R @@ -4,7 +4,6 @@ test_that("list_onsets works", { testthat::expect_length(onsets,12) onsets <- list_onsets(exclude = c("Antenatal","Fetal"), - as_hpo_ids = FALSE, include_na = FALSE) testthat::expect_length(onsets,9) }) diff --git a/tests/testthat/test-make_tiers.R b/tests/testthat/test-make_tiers.R new file mode 100644 index 0000000..3bf4b73 --- /dev/null +++ b/tests/testthat/test-make_tiers.R @@ -0,0 +1,6 @@ +test_that("make_tiers works", { + + terms <- get_hpo()@terms[seq(100)] + tiers <- make_tiers(terms = terms) + testthat::expect_true(all(sort(unique(tiers))==paste0("Tier",seq(4)))) +}) diff --git a/tests/testthat/test-map_upheno.R b/tests/testthat/test-map_upheno.R deleted file mode 100644 index 956c9db..0000000 --- a/tests/testthat/test-map_upheno.R +++ /dev/null @@ -1,21 +0,0 @@ -test_that("map_upheno works", { - - run_tests <- function(res){ - testthat::expect_true(methods::is(res$data,"data.table")) - for(x in res$plots){ - testthat::expect_true(methods::is(x,"gg") || - methods::is(x,"Heatmap")) - } - } - - terms <- example_phenos()$hpo_id - res <- map_upheno(force_new = TRUE, - terms = terms) - run_tests(res) - - #### Use cached data and filter by HPO terms - example_phenos <- example_phenos() - res <- map_upheno(force_new = FALSE, - terms = terms) - run_tests(res) -}) diff --git a/tests/testthat/test-network_3d.R b/tests/testthat/test-plot_graph_3d.R similarity index 54% rename from tests/testthat/test-network_3d.R rename to tests/testthat/test-plot_graph_3d.R index 91d24e1..cacd921 100644 --- a/tests/testthat/test-network_3d.R +++ b/tests/testthat/test-plot_graph_3d.R @@ -1,10 +1,12 @@ -test_that("network_3d works", { +test_that("plot_graph_3d works", { - phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay") + phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay")[seq(1,100),] g <- make_igraph_object(phenos = phenos) - plt <- network_3d(g=g) + plt <- suppressWarnings( + KGExplorer::plot_graph_3d(g=g, show_plot = FALSE) + ) testthat::expect_true(methods::is(g,"igraph")) - testthat::expect_length(g,23) + testthat::expect_length(g,34) testthat::expect_true(methods::is(plt,"plotly")) }) diff --git a/vignettes/HPOExplorer.Rmd b/vignettes/HPOExplorer.Rmd index 521b40d..58d5f90 100644 --- a/vignettes/HPOExplorer.Rmd +++ b/vignettes/HPOExplorer.Rmd @@ -3,7 +3,7 @@ title: "HPOExplorer" author: "

Authors: Robert Gordon-Smith, Brian M. Schilder, Nathan G .Skene

" date: "

Most recent update: `r format( Sys.Date(), '%b-%d-%Y')`

" output: - BiocStyle::html_document: + rmarkdown::html_document vignette: > %\VignetteIndexEntry{HPOExplorer} %\VignetteEngine{knitr::rmarkdown} @@ -27,7 +27,9 @@ library(HPOExplorer) The `HPOExplorer` package provides useful functions for working with the [Human Phenotype Ontology (HPO)](https://hpo.jax.org/app/). -The main HPO ontology object comes with the [`ontologyIndex`](https://CRAN.R-project.org/package=ontologyIndex) R package. +The main HPO ontology object comes with the +[`simona`](https://bioconductor.org/packages/release/bioc/html/simona.html) +R package. This tutorial will focus on creating an interactive network plot of a subset of the HPO, where disease descriptions and results from your