Skip to content

Commit

Permalink
Merge pull request #36 from UMCUGenetics/release/v2.2.0
Browse files Browse the repository at this point in the history
Release/v2.2.0 (DIMS)
  • Loading branch information
ALuesink authored Jun 18, 2024
2 parents 2fa7abc + 9e114c4 commit 972a97e
Show file tree
Hide file tree
Showing 125 changed files with 8,333 additions and 0 deletions.
94 changes: 94 additions & 0 deletions DIMS/AddOnFunctions/10-collectSamplesFilled_extraoutput.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/Rscript

.libPaths(new = "/hpc/local/CentOS7/dbg_mz/R_libs/3.2.2")

# load required packages
# none

# define parameters
cmd_args <- commandArgs(trailingOnly = TRUE)
for (arg in cmd_args) cat(" ", arg, "\n")

outdir <- cmd_args[1]
scanmode <- cmd_args[2]
normalization <- cmd_args[3]
scripts <- cmd_args[4]
z_score <- as.numeric(cmd_args[5])
ppm <- as.numeric(cmd_args[6])

#outdir <- "/Users/nunen/Documents/Metab/processed/test_old"
#scanmode <- "negative"
#normalization <- "disabled"
#scripts <- "/Users/nunen/Documents/Metab/DIMS/scripts"
#db <- "/Users/nunen/Documents/Metab/DIMS/db/HMDB_add_iso_corrNaCl_withIS_withC5OH.RData"
#z_score <- 0

object.files = list.files(paste(outdir, "9-samplePeaksFilled", sep="/"), full.names=TRUE, pattern=scanmode)
outlist.tot=NULL
for (i in 1:length(object.files)) {
load(object.files[i])
print(print(object.files[i]))
outlist.tot = rbind(outlist.tot, final.outlist.idpat3)
}

source(paste(scripts, "AddOnFunctions/sourceDir.R", sep="/"))
sourceDir(paste(scripts, "AddOnFunctions", sep="/"))

# remove duplicates
outlist.tot = mergeDuplicatedRows(outlist.tot)

# sort on mass
outlist.tot = outlist.tot[order(outlist.tot[,"mzmed.pgrp"]),]

# normalization
load(paste0(outdir, "/repl.pattern.",scanmode,".RData"))

if (normalization != "disabled") {
outlist.tot = normalization_2.1(outlist.tot, fileName, names(repl.pattern.filtered), on=normalization, assi_label="assi_HMDB")
}

if (z_score == 1) {
outlist.stats = statistics_z(outlist.tot, sortCol=NULL, adducts=FALSE)
nr.removed.samples=length(which(repl.pattern.filtered[]=="character(0)"))
order.index.int=order(colnames(outlist.stats)[8:(length(repl.pattern.filtered)-nr.removed.samples+7)])
outlist.stats.more = cbind(outlist.stats[,1:7],
outlist.stats[,(length(repl.pattern.filtered)-nr.removed.samples+8):(length(repl.pattern.filtered)-nr.removed.samples+8+6)],
outlist.stats[,8:(length(repl.pattern.filtered)-nr.removed.samples+7)][order.index.int],
outlist.stats[,(length(repl.pattern.filtered)-nr.removed.samples+5+10):ncol(outlist.stats)])

tmp.index=grep("_Zscore", colnames(outlist.stats.more), fixed = TRUE)
tmp.index.order=order(colnames(outlist.stats.more[,tmp.index]))
tmp = outlist.stats.more[,tmp.index[tmp.index.order]]
outlist.stats.more=outlist.stats.more[,-tmp.index]
outlist.stats.more=cbind(outlist.stats.more,tmp)
outlist.tot = outlist.stats.more
}

# filter identified compounds
index.1=which((outlist.tot[,"assi_HMDB"]!="") & (!is.na(outlist.tot[,"assi_HMDB"])))
index.2=which((outlist.tot[,"iso_HMDB"]!="") & (!is.na(outlist.tot[,"iso_HMDB"])))
index=union(index.1,index.2)
outlist.ident = outlist.tot[index,]
outlist.not.ident = outlist.tot[-index,]

if (z_score == 1) {
outlist.ident$ppmdev=as.numeric(outlist.ident$ppmdev)
outlist.ident <- outlist.ident[which(outlist.ident["ppmdev"] >= -ppm & outlist.ident["ppmdev"] <= ppm),]
}
# NAs in theormz_noise <======================================================================= uitzoeken!!!
outlist.ident$theormz_noise[which(is.na(outlist.ident$theormz_noise))] = 0
outlist.ident$theormz_noise=as.numeric(outlist.ident$theormz_noise)
outlist.ident$theormz_noise[which(is.na(outlist.ident$theormz_noise))] = 0
outlist.ident$theormz_noise=as.numeric(outlist.ident$theormz_noise)

save(outlist.not.ident, outlist.ident, file=paste(outdir, "/outlist_identified_", scanmode, ".RData", sep=""))

# Extra output in Excel-readable format:
remove_columns <- c("fq.best", "fq.worst", "mzmin.pgrp", "mzmax.pgrp")
remove_colindex <- which(colnames(outlist.ident) %in% remove_columns)
outlist.ident <- outlist.ident[ , -remove_colindex]
write.table(outlist.ident, file=paste0(outdir, "/outlist_identified_", scanmode, ".txt"), sep="\t", row.names = FALSE)
remove_colindex <- which(colnames(outlist.not.ident) %in% remove_columns)
outlist.not.ident <- outlist.not.ident[ , -remove_colindex]
write.table(outlist.not.ident, file=paste0(outdir, "/outlist_not_identified_", scanmode, ".txt"), sep="\t", row.names = FALSE)

10 changes: 10 additions & 0 deletions DIMS/AddOnFunctions/add_lab_id_and_onderzoeksnummer.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
add_lab_id_and_onderzoeksnummer <- function(df_metabs_helix) {
# Split patient number into labnummer and Onderzoeksnummer
for (row in 1:nrow(df_metabs_helix)) {
df_metabs_helix[row,"labnummer"] <- gsub("^P|\\.[0-9]*", "", df_metabs_helix[row,"Patient"])
labnummer_split <- strsplit(as.character(df_metabs_helix[row, "labnummer"]), "M")[[1]]
df_metabs_helix[row, "Onderzoeksnummer"] <- paste0("MB", labnummer_split[1], "/", labnummer_split[2])
}

return(df_metabs_helix)
}
16 changes: 16 additions & 0 deletions DIMS/AddOnFunctions/checkOverlap.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
checkOverlap <- function(range1,range2){
if (length(intersect(range1,range2))==2) {
# Overlap
# message("Overlap, smaller range is used")
if (length(range1) >= length(range2)){
range1=range1[-length(range1)]
} else {
range2=range2[-1]
}
} else if (length(intersect(range1,range2))==3){
# message("Overlap, smaller range is used")
range1=range1[-length(range1)]
range2=range2[-1]
}
return(list("range1"=range1,"range2"=range2))
}
4 changes: 4 additions & 0 deletions DIMS/AddOnFunctions/check_same_samplename.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# function to test whether intensity and Z-score columns match
check_same_samplename <- function(int_col_name, zscore_col_name) {
paste0(int_col_name, "_Zscore") == zscore_col_name
}
116 changes: 116 additions & 0 deletions DIMS/AddOnFunctions/create_violin_plots.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
create_violin_plots <- function(pdf_dir, pt_name, metab_perpage, top_metab_pt=NULL) {

# set parameters for plots
plot_height <- 9.6
plot_width <- 6
fontsize <- 1
nr_plots_perpage <- 20
circlesize <- 0.8
colors_4plot <- c("#22E4AC", "#00B0F0", "#504FFF","#A704FD","#F36265","#DA0641")
# green blue blue/purple purple orange red

# patient plots, create the PDF device
pt_name_sub <- pt_name
suffix <- ""
if (grepl("Diagnostics", pdf_dir) & is_diagnostic_patient(pt_name)) {
prefix <- "MB"
suffix <- "_DIMS_PL_DIAG"
# substitute P and M in P2020M00001 into right format for Helix
pt_name_sub <- gsub("[PM]", "", pt_name)
pt_name_sub <- gsub("\\..*", "", pt_name_sub)
} else if (grepl("Diagnostics", pdf_dir)) {
prefix <- "Dx_"
} else if (grepl("IEM", pdf_dir)) {
prefix <- "IEM_"
} else {
prefix <- "R_"
}

pdf(paste0(pdf_dir, "/", prefix, pt_name_sub, suffix, ".pdf"),
onefile = TRUE,
width = plot_width,
height = plot_height)

# page headers:
page_headers <- names(metab_perpage)

# put table into PDF file, if not empty
if (!is.null(dim(top_metab_pt))) {
plot.new()
# get the names and numbers in the table aligned
table_theme <- ttheme_default(core = list(fg_params = list(hjust=0, x=0.05, fontsize=6)),
colhead = list(fg_params = list(fontsize=8, fontface="bold")))
grid.table(top_metab_pt, theme = table_theme, rows = NULL)
# g <- tableGrob(top_metab_pt)
# grid.draw(g)
text(x=0.45, y=1.02, paste0("Top deviating metabolites for patient: ", pt_name), font=1, cex=1)
}

# violin plots
for (page_index in 1:length(metab_perpage)) {
# extract list of metabolites to plot on a page
metab_list_2plot <- metab_perpage[[page_index]]
# extract original data for patient of interest (pt_name) before cut-offs
pt_list_2plot_orig <- metab_list_2plot[which(metab_list_2plot$variable == pt_name), ]
# cut off Z-scores higher than 20 or lower than -5 (for nicer plots)
metab_list_2plot$value[metab_list_2plot$value > 20] <- 20
metab_list_2plot$value[metab_list_2plot$value < -5] <- -5
# extract data for patient of interest (pt_name)
pt_list_2plot <- metab_list_2plot[which(metab_list_2plot$variable == pt_name), ]
# restore original Z-score before cut-off, for showing Z-scores in PDF
pt_list_2plot$value_orig <- pt_list_2plot_orig$value
# remove patient of interest (pt_name) from list; violins will be made up of controls and other patients
metab_list_2plot <- metab_list_2plot[-which(metab_list_2plot$variable == pt_name), ]
# subtitle per page
sub_perpage <- gsub("_", " ", page_headers[page_index])
# for IEM plots, put subtitle on two lines
sub_perpage <- gsub("probability", "\nprobability", sub_perpage)
# add size parameter for showing Z-score of patient per metabolite
Z_size <- rep(3, nrow(pt_list_2plot))
# set size to 0 if row is empty
Z_size[is.na(pt_list_2plot$value)] <- 0

# draw violin plot. shape=22 gives square for patient of interest
ggplot_object <- ggplot(metab_list_2plot, aes(x=value, y=HMDB_name)) +
theme(axis.text.y=element_text(size=rel(fontsize)), plot.caption = element_text(size=rel(fontsize))) +
xlim(-5, 20) +
geom_violin(scale="width") +
geom_point(data = pt_list_2plot, aes(color=value), size = 3.5*circlesize, shape=22, fill="white") +
scale_fill_gradientn(colors = colors_4plot, values = NULL, space = "Lab", na.value = "grey50", guide = "colourbar", aesthetics = "colour") +
# add Z-score value for patient of interest at x=16
geom_text(data = pt_list_2plot, aes(16, label = paste0("Z=", round(value_orig, 2))), hjust = "left", vjust = +0.2, size = Z_size) +
# add labels. Use font Courier to get all the plots in the same location.
labs(x = "Z-scores", y = "Metabolites", subtitle = sub_perpage, color = "z-score") +
theme(axis.text.y = element_text(family = "Courier", size=6)) +
# do not show legend
theme(legend.position="none") +
# add title
ggtitle(label = paste0("Results for patient ", pt_name)) +
# labs(x = "Z-scores", y = "Metabolites", title = paste0("Results for patient ", pt_name), subtitle = sub_perpage, color = "z-score") +
# add vertical lines
geom_vline(xintercept = 2, col = "grey", lwd = 0.5, lty=2) +
geom_vline(xintercept = -2, col = "grey", lwd = 0.5, lty=2)

suppressWarnings(print(ggplot_object))

}

# add explanation of violin plots, version number etc.
# plot.new()
plot(NA, xlim=c(0,5), ylim=c(0,5), bty='n', xaxt='n', yaxt='n', xlab='', ylab='')
if (length(explanation) > 0) {
text(0.2, 5, explanation[1], pos=4, cex=0.8)
for (line_index in 2:length(explanation)) {
text_y_position <- 5 - (line_index*0.2)
text(-0.2, text_y_position, explanation[line_index], pos=4, cex=0.5)
}
# full_explanation <- paste(explanation[2:length(explanation)], sep=" \n")
# text(0.2, 4, full_explanation, pos=4, cex=0.6)
#explanation_grob=textGrob(apply(full_explanation, 2, paste, collapse="\n"))
#grid.arrange(explanation_grob)
}

# close the PDF file
dev.off()

}
6 changes: 6 additions & 0 deletions DIMS/AddOnFunctions/elementInfo.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
elementInfo <- function(name, elements = NULL) { # from Rdisop function .getElement
if (!is.list(elements) || length(elements)==0 ) {
elements <- initializePSE() }
if (name=="CH3OH+H"){rex<-"^CH3OH\\+H$"}else{rex <- paste ("^",name,"$", sep="")}
elements [[grep (rex, sapply (elements, function(x) {x$name}))]]
}
13 changes: 13 additions & 0 deletions DIMS/AddOnFunctions/export.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
export <- function(peaklist, plotdir, adducts, control_label, case_label, patients, sub, fileName){
# peaklist = outlist.adducts
# adducts=TRUE
# control_label="C"
# case_label="P"
# patients = getPatients(outlist.adducts)
# sub=3000

# peaklist = statistics_z_4export(as.data.frame(peaklist), plotdir, patients, adducts, control_label, case_label)

# generateExcelFile(peaklist, file.path(plotdir), imageNum=2, fileName, subName=c("","_box"), sub, adducts)

}
24 changes: 24 additions & 0 deletions DIMS/AddOnFunctions/findPeaks.Gauss.HPC.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
### fit Gaussian estimate mean and integrate to obtain intensity
findPeaks.Gauss.HPC <- function(plist, breaks.fwhm, int.factor, scale, resol, outdir, scanmode, plot, thresh, width, height) {
sampname <- colnames(plist)[1]

range <- as.vector(plist)
names(range) <- rownames(plist)

values <- list("mean"=NULL, "area"=NULL, "nr"=NULL, "min"=NULL, "max"=NULL, "qual"=NULL, "spikes"=0)

values <- searchMZRange(range, values, int.factor, scale, resol, outdir, sampname, scanmode, plot, width, height, thresh)

outlist.persample <- NULL
outlist.persample <- cbind("samplenr"=values$nr, "mzmed.pkt"=values$mean, "fq"=values$qual, "mzmin.pkt"=values$min, "mzmax.pkt"=values$max, "height.pkt"=values$area)

index <- which(outlist.persample[ ,"height.pkt"]==0)
if (length(index) > 0) {
outlist.persample <- outlist.persample[-index,]
}

# save(outlist.persample, file=paste(outdir, paste(sampname, "_", scanmode, ".RData", sep=""), sep="/"))
save(outlist.persample, file=paste("./", sampname, "_", scanmode, ".RData", sep=""))

cat(paste("There were", values$spikes, "spikes!"))
}
Loading

0 comments on commit 972a97e

Please sign in to comment.