diff --git a/NAMESPACE b/NAMESPACE index 7dd8b040..b85b6eac 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -169,6 +169,7 @@ export(obo_parser) export(oma_code) export(oma_organisms) export(oma_pairwise) +export(oma_pairwise_genesymbols) export(omnipath) export(omnipath_cache_autoclean) export(omnipath_cache_clean) diff --git a/R/id_mapping.R b/R/id_mapping.R index bb29847d..a7293c3d 100644 --- a/R/id_mapping.R +++ b/R/id_mapping.R @@ -396,6 +396,7 @@ translate_ids <- function( .nse_ensure_str(!!enquo(organism)) %>% {`if`(. == 'organism', organism, .)} %>% ncbi_taxid + id_cols <- names(ids) id_types <- unlist(ids) from_col <- id_cols[1] diff --git a/R/oma.R b/R/oma.R index a228b6bf..37ff886d 100644 --- a/R/oma.R +++ b/R/oma.R @@ -153,6 +153,67 @@ oma_pairwise <- function( } +#' Orthologous pairs of gene symbols between two organisms +#' +#' The Orthologous Matrix (OMA), a resource of orthologous relationships +#' between genes, doesn't provide gene symbols, the identifier preferred in +#' many bioinformatics pipelines. Hence this function wraps +#' \code{\link{oma_pairwise}} by translating the identifiers used in OMA to +#' gene symbols. Items that can not be translated to `id_type` (but present +#' in the data with their internal OMA IDs) will be removed. This part is done by . Then, +#' in this function we translate the identifiers to gene symbols. +#' +#' @param organism_a Name or identifier of an organism. +#' @param organism_b Name or identifier of another organism. +#' @param id_type The gene or protein identifier to use in the table. For a +#' list of supported ID types see `omnipath.env$id_types$oma`. These are +#' the identifiers that will be translated to gene symbols. +#' @param mappings Character vector: control ambiguous mappings: \itemize{ +#' \item{1:1 - unambiguous} +#' \item{1:m - one-to-many} +#' \item{n:1 - many-to-one} +#' \item{n:m - many-to-many} +#' } +#' @param only_ids Logical: include only the two identifier columns, not the +#' mapping type and the orthology group columns. +#' +#' @return A data frame with orthologous gene pairs. +#' +#' @examples +#' oma_pairwise_genesmybols("human", "mouse") +#' +#' @importFrom magrittr %>% +#' @importFrom rlang exec !!! !! := sym +#' @importFrom dplyr filter +#' @export +oma_pairwise_genesymbols <- function( + organism_a = 'human', + organism_b = 'mouse', + id_type = 'uniprot', + mappings = c('1:1', '1:m', 'n:1', 'n:m'), + only_ids = TRUE +) { + + .slow_doctest() + + environment() %>% + as.list %>% + exec(oma_pairwise, !!!.) %>% + translate_ids( + id_organism_a := !!sym(id_type), + id_organism_a := genesymbol, + organism = !!sym(organism_a) + ) %>% + translate_ids( + id_organism_b := !!sym(id_type), + id_organism_b := genesymbol, + organism = !!sym(organism_b) + ) %>% + filter(!is.na(id_organism_a) & !is.na(id_organism_b)) + +} + + #' OMA identifier type from synonyms and lower case version #' #' @param id_type Character: a synonym or a lower case version of an OMA diff --git a/man/oma_pairwise_genesymbols.Rd b/man/oma_pairwise_genesymbols.Rd new file mode 100644 index 00000000..583183fd --- /dev/null +++ b/man/oma_pairwise_genesymbols.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/oma.R +\name{oma_pairwise_genesymbols} +\alias{oma_pairwise_genesymbols} +\title{Orthologous pairs of gene symbols between two organisms} +\usage{ +oma_pairwise_genesymbols( + organism_a = "human", + organism_b = "mouse", + id_type = "uniprot", + mappings = c("1:1", "1:m", "n:1", "n:m"), + only_ids = TRUE +) +} +\arguments{ +\item{organism_a}{Name or identifier of an organism.} + +\item{organism_b}{Name or identifier of another organism.} + +\item{id_type}{The gene or protein identifier to use in the table. For a +list of supported ID types see `omnipath.env$id_types$oma`. These are +the identifiers that will be translated to gene symbols.} + +\item{mappings}{Character vector: control ambiguous mappings: \itemize{ + \item{1:1 - unambiguous} + \item{1:m - one-to-many} + \item{n:1 - many-to-one} + \item{n:m - many-to-many} +}} + +\item{only_ids}{Logical: include only the two identifier columns, not the +mapping type and the orthology group columns.} +} +\value{ +A data frame with orthologous gene pairs. +} +\description{ +The Orthologous Matrix (OMA), a resource of orthologous relationships +between genes, doesn't provide gene symbols, the identifier preferred in +many bioinformatics pipelines. Hence this function wraps +\code{\link{oma_pairwise}} by translating the identifiers used in OMA to +gene symbols. Items that can not be translated to `id_type` (but present +in the data with their internal OMA IDs) will be removed. This part is done by . Then, +in this function we translate the identifiers to gene symbols. +} +\examples{ +oma_pairwise_genesmybols("human", "mouse") + +}