Skip to content

Commit

Permalink
Merge pull request #599 from hechth/isolib_add_csv
Browse files Browse the repository at this point in the history
isolib: added option to export to tabular
  • Loading branch information
hechth authored Nov 1, 2024
2 parents 7d98b78 + d74b0da commit e21ab1b
Show file tree
Hide file tree
Showing 4 changed files with 646 additions and 20 deletions.
127 changes: 111 additions & 16 deletions tools/isolib/isolib.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,43 @@ library(Spectra)
library(MsBackendMsp)
library(MetaboCoreUtils)
library(readr)
library(tidyselect)

#' @param args A list of command line arguments.
main <- function() {
data(isotopes)
data(adducts)

parse_args <- function() {
args <- commandArgs(trailingOnly = TRUE)

compound_table <- read_tsv(
file = args[1],
col_types = "ccd",
col_select = tidyselect::all_of(c("name", "formula")) | tidyselect::any_of("rt")
col_select = all_of(c("name", "formula")) | any_of("rt")
)
adducts_to_use <- c(unlist(strsplit(args[2], ",", fixed = TRUE)))

chemforms <- compound_table$formula
chemforms <- check_chemform(isotopes, chemforms)[, 2]
parsed <- list(
compound_table = compound_table,
adducts_to_use = c(unlist(strsplit(args[2], ",", fixed = TRUE))),
threshold = as.numeric(args[3]),
append_adducts = args[4],
append_isotopes = args[5],
out_format = args[6],
outfile = args[7]
)
return(parsed)
}

generate_isotope_spectra <- function(compound_table,
adducts_to_use,
append_adducts,
threshold) {
data(isotopes)
data(adducts)

monoisotopic <- isotopes |>
dplyr::group_by(element) |>
dplyr::slice_max(abundance, n = 1) |>
dplyr::filter(!stringr::str_detect(element, "\\[|\\]"))

chemforms <- check_chemform(isotopes, compound_table$formula)[, 2]
spectra <- data.frame()

for (current in adducts_to_use) {
Expand All @@ -32,12 +52,19 @@ main <- function() {
merged_chemforms <- mergeform(multiplied_chemforms, adduct$Formula_add)
}

charge_string <- paste0(if (adduct$Charge > 0) "+" else "-", if (abs(adduct$Charge) > 1) abs(adduct$Charge) else "")
charge_string <- paste0(
if (adduct$Charge > 0) "+" else "-",
if (abs(adduct$Charge) > 1) abs(adduct$Charge) else ""
)
adduct_string <- paste0("[", adduct$Name, "]", charge_string)
precursor_mz <- calculateMass(multiplied_chemforms) + adduct$Mass

if (args[4] == TRUE) {
names <- paste(compound_table$name, paste0("(", adduct$Name, ")"), sep = " ")
if (append_adducts == TRUE) {
names <- paste(
compound_table$name,
paste0("(", adduct$Name, ")"),
sep = " "
)
} else {
names <- compound_table$name
}
Expand All @@ -60,26 +87,94 @@ main <- function() {
isotopes = isotopes,
chemforms = merged_chemforms,
charge = adduct$Charge,
threshold = as.numeric(args[3]),
threshold = threshold,
)

mzs <- list()
intensities <- list()
isos <- list()

for (i in seq_along(patterns)) {
mzs <- append(mzs, list(patterns[[i]][, 1]))
intensities <- append(intensities, list(patterns[[i]][, 2]))

# select all columns which describe the elemental composition
# remove all 12C, 35Cl etc.
# remove isotopes which don't occur
compositions <- as.data.frame(patterns[[i]][, -c(1, 2)]) |>
dplyr::select(-tidyselect::any_of(monoisotopic$isotope)) |>
dplyr::select_if(~ !all(. == 0))

# combine elemental composition into single string
compositions <- compositions |>
dplyr::rowwise() |>
dplyr::mutate(isotopes = paste(
purrr::map2_chr(
names(compositions),
dplyr::c_across(everything()),
~ paste(.x, .y, sep = ":")
),
collapse = ", "
)) |>
dplyr::ungroup() |>
dplyr::select(isotopes)
isos <- append(isos, list(compositions$isotopes))
}

spectra_df$mz <- mzs
spectra_df$intensity <- intensities
spectra_df$isotopes <- isos
spectra <- rbind(spectra, spectra_df)
}
return(spectra)
}

write_to_msp <- function(spectra, file) {
sps <- Spectra(dplyr::select(spectra, -isotopes))
export(sps, MsBackendMsp(), file = file)
}

sps <- Spectra(spectra)
export(sps, MsBackendMsp(), file = args[5])
write_to_table <- function(spectra, file, append_isotopes) {
entries <- spectra |>
dplyr::rowwise() |>
dplyr::mutate(peaks = paste(unlist(mz), collapse = ";")) |>
dplyr::mutate(isos = paste(unlist(isotopes), collapse = ";"))
result <- tidyr::separate_longer_delim(
entries,
all_of(c("peaks", "isos")),
";"
)
result <- result |>
dplyr::select(-c("mz", "intensity", "isotopes")) |>
dplyr::rename(mz = peaks, isotopes = isos, rt = retention_time)

if (append_isotopes) {
result <- result |>
dplyr::mutate(result,
full_formula = paste0(formula, " (", isotopes, ")")
) |>
dplyr::select(-all_of(c("formula", "isotopes"))) |>
dplyr::rename(formula = full_formula) |>
dplyr::relocate(formula, .after = name)
}
readr::write_tsv(result, file = file)
}

main <- function() {
args <- parse_args()
spectra <- generate_isotope_spectra(
args$compound_table,
args$adducts_to_use,
args$append_adducts,
args$threshold
)

if (args$out_format == "msp") {
write_to_msp(spectra, args$outfile)
} else if (args$out_format == "tabular") {
write_to_table(spectra, args$outfile, args$append_isotopes)
}
}

# Get the command line arguments
args <- commandArgs(trailingOnly = TRUE)
# Call the main function
main()
50 changes: 46 additions & 4 deletions tools/isolib/isolib.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<tool id="isolib" name="isolib" version="1.0.1+galaxy0" profile="21.09">
<description>create an isotopic pattern library for given compounds and adducts</description>
<tool id="isolib" name="isolib" version="2.6+galaxy0" profile="21.09">
<description>create an isotopic pattern library for given compounds and adducts based on enviPat</description>
<creator>
<person
givenName="Helge"
Expand All @@ -20,9 +20,23 @@
<requirement type="package" version="1.6.0">bioconductor-msbackendmsp</requirement>
<requirement type="package" version="2.6">r-envipat</requirement>
<requirement type="package" version="2.1.5">r-readr</requirement>
<requirement type="package" version="1.3.1">r-tidyr</requirement>
<requirement type="package" version="1.5.1">r-stringr</requirement>
<requirement type="package" version="1.0.2">r-purrr</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[
Rscript '${__tool_directory__}/isolib.R' '${input_file}' '${ionization.adducts}' '${threshold}' '${append_adduct}' '${isotope_library}'
Rscript '${__tool_directory__}/isolib.R'
'${input_file}'
'${ionization.adducts}'
'${threshold}'
'${append_adduct}'
#if $formatting.out_format == "tabular"
'${formatting.append_isotopes}'
#else
'FALSE'
#end if
'${formatting.out_format}'
'${isotope_library}'
]]></command>
<inputs>
<param name="input_file" type="data" format="tabular" label="Table with input compounds"/>
Expand All @@ -46,9 +60,25 @@
</conditional>
<param name="threshold" type="float" min="0" max="100" value="1" label="Threshold" help="Probability threshold to use as cutoff for isotopic pattern distribution - this can be used to remove low abundant peaks and improve computation performance." />
<param name="append_adduct" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Append adduct to compound name" help="Append the adduct string to the compound name for easy identification." />
<conditional name="formatting">
<param name="out_format" type="select" label="Output Format" help="Choose the output format, either MSP or Tabular">
<option value="tabular">tabular</option>
<option value="msp" selected="true">msp</option>
</param>
<when value="tabular">
<param name="append_isotopes" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Append isotopes to formula" help="Append the isotopic composition to the formula for easy identification." />
</when>
<when value="msp">
</when>
</conditional>
</inputs>
<outputs>
<data format="msp" name="isotope_library"/>
<data format="msp" name="isotope_library">
<change_format>
<when input="formatting.out_format" value="msp" format="msp" />
<when input="formatting.out_format" value="tabular" format="tabular" />
</change_format>
</data>
</outputs>

<tests>
Expand All @@ -60,6 +90,18 @@
<param name="input_file" value="markers_no_rt.tsv"/>
<output name="isotope_library" file="test1.msp"/>
</test>
<test>
<param name="input_file" value="lc_markers_neg.tsv"/>
<param name="out_format" value="tabular"/>
<param name="append_isotopes" value="TRUE" />
<output name="isotope_library" file="test2.tabular"/>
</test>
<test>
<param name="input_file" value="lc_markers_neg.tsv"/>
<param name="out_format" value="tabular"/>
<param name="adducts" value="M-H,2M-H"/>
<output name="isotope_library" file="test3.tabular"/>
</test>
</tests>
<help><![CDATA[
This tool computes isotopic patterns for given compounds and adduct forms.
Expand Down
Loading

0 comments on commit e21ab1b

Please sign in to comment.