diff --git a/tools/isolib/isolib.R b/tools/isolib/isolib.R index 7804e15b..4354ea36 100644 --- a/tools/isolib/isolib.R +++ b/tools/isolib/isolib.R @@ -3,23 +3,43 @@ library(Spectra) library(MsBackendMsp) library(MetaboCoreUtils) library(readr) +library(tidyselect) -#' @param args A list of command line arguments. -main <- function() { - data(isotopes) - data(adducts) +parse_args <- function() { args <- commandArgs(trailingOnly = TRUE) + compound_table <- read_tsv( file = args[1], col_types = "ccd", - col_select = tidyselect::all_of(c("name", "formula")) | tidyselect::any_of("rt") + col_select = all_of(c("name", "formula")) | any_of("rt") ) - adducts_to_use <- c(unlist(strsplit(args[2], ",", fixed = TRUE))) - chemforms <- compound_table$formula - chemforms <- check_chemform(isotopes, chemforms)[, 2] + parsed <- list( + compound_table = compound_table, + adducts_to_use = c(unlist(strsplit(args[2], ",", fixed = TRUE))), + threshold = as.numeric(args[3]), + append_adducts = args[4], + append_isotopes = args[5], + out_format = args[6], + outfile = args[7] + ) + return(parsed) +} +generate_isotope_spectra <- function(compound_table, + adducts_to_use, + append_adducts, + threshold) { + data(isotopes) + data(adducts) + + monoisotopic <- isotopes |> + dplyr::group_by(element) |> + dplyr::slice_max(abundance, n = 1) |> + dplyr::filter(!stringr::str_detect(element, "\\[|\\]")) + + chemforms <- check_chemform(isotopes, compound_table$formula)[, 2] spectra <- data.frame() for (current in adducts_to_use) { @@ -32,12 +52,19 @@ main <- function() { merged_chemforms <- mergeform(multiplied_chemforms, adduct$Formula_add) } - charge_string <- paste0(if (adduct$Charge > 0) "+" else "-", if (abs(adduct$Charge) > 1) abs(adduct$Charge) else "") + charge_string <- paste0( + if (adduct$Charge > 0) "+" else "-", + if (abs(adduct$Charge) > 1) abs(adduct$Charge) else "" + ) adduct_string <- paste0("[", adduct$Name, "]", charge_string) precursor_mz <- calculateMass(multiplied_chemforms) + adduct$Mass - if (args[4] == TRUE) { - names <- paste(compound_table$name, paste0("(", adduct$Name, ")"), sep = " ") + if (append_adducts == TRUE) { + names <- paste( + compound_table$name, + paste0("(", adduct$Name, ")"), + sep = " " + ) } else { names <- compound_table$name } @@ -60,26 +87,94 @@ main <- function() { isotopes = isotopes, chemforms = merged_chemforms, charge = adduct$Charge, - threshold = as.numeric(args[3]), + threshold = threshold, ) mzs <- list() intensities <- list() + isos <- list() + for (i in seq_along(patterns)) { mzs <- append(mzs, list(patterns[[i]][, 1])) intensities <- append(intensities, list(patterns[[i]][, 2])) + + # select all columns which describe the elemental composition + # remove all 12C, 35Cl etc. + # remove isotopes which don't occur + compositions <- as.data.frame(patterns[[i]][, -c(1, 2)]) |> + dplyr::select(-tidyselect::any_of(monoisotopic$isotope)) |> + dplyr::select_if(~ !all(. == 0)) + + # combine elemental composition into single string + compositions <- compositions |> + dplyr::rowwise() |> + dplyr::mutate(isotopes = paste( + purrr::map2_chr( + names(compositions), + dplyr::c_across(everything()), + ~ paste(.x, .y, sep = ":") + ), + collapse = ", " + )) |> + dplyr::ungroup() |> + dplyr::select(isotopes) + isos <- append(isos, list(compositions$isotopes)) } spectra_df$mz <- mzs spectra_df$intensity <- intensities + spectra_df$isotopes <- isos spectra <- rbind(spectra, spectra_df) } + return(spectra) +} + +write_to_msp <- function(spectra, file) { + sps <- Spectra(dplyr::select(spectra, -isotopes)) + export(sps, MsBackendMsp(), file = file) +} - sps <- Spectra(spectra) - export(sps, MsBackendMsp(), file = args[5]) +write_to_table <- function(spectra, file, append_isotopes) { + entries <- spectra |> + dplyr::rowwise() |> + dplyr::mutate(peaks = paste(unlist(mz), collapse = ";")) |> + dplyr::mutate(isos = paste(unlist(isotopes), collapse = ";")) + result <- tidyr::separate_longer_delim( + entries, + all_of(c("peaks", "isos")), + ";" + ) + result <- result |> + dplyr::select(-c("mz", "intensity", "isotopes")) |> + dplyr::rename(mz = peaks, isotopes = isos, rt = retention_time) + + if (append_isotopes) { + result <- result |> + dplyr::mutate(result, + full_formula = paste0(formula, " (", isotopes, ")") + ) |> + dplyr::select(-all_of(c("formula", "isotopes"))) |> + dplyr::rename(formula = full_formula) |> + dplyr::relocate(formula, .after = name) + } + readr::write_tsv(result, file = file) +} + +main <- function() { + args <- parse_args() + spectra <- generate_isotope_spectra( + args$compound_table, + args$adducts_to_use, + args$append_adducts, + args$threshold + ) + + if (args$out_format == "msp") { + write_to_msp(spectra, args$outfile) + } else if (args$out_format == "tabular") { + write_to_table(spectra, args$outfile, args$append_isotopes) + } } -# Get the command line arguments -args <- commandArgs(trailingOnly = TRUE) # Call the main function main() diff --git a/tools/isolib/isolib.xml b/tools/isolib/isolib.xml index af83e572..22821394 100644 --- a/tools/isolib/isolib.xml +++ b/tools/isolib/isolib.xml @@ -1,5 +1,5 @@ - - create an isotopic pattern library for given compounds and adducts + + create an isotopic pattern library for given compounds and adducts based on enviPat bioconductor-msbackendmsp r-envipat r-readr + r-tidyr + r-stringr + r-purrr @@ -46,9 +60,25 @@ + + + + + + + + + + + - + + + + + + @@ -60,6 +90,18 @@ + + + + + + + + + + + +