diff --git a/DIMS/AddOnFunctions/10-collectSamplesFilled_extraoutput.R b/DIMS/AddOnFunctions/10-collectSamplesFilled_extraoutput.R
new file mode 100644
index 0000000..21ccdce
--- /dev/null
+++ b/DIMS/AddOnFunctions/10-collectSamplesFilled_extraoutput.R
@@ -0,0 +1,94 @@
+#!/usr/bin/Rscript
+
+.libPaths(new = "/hpc/local/CentOS7/dbg_mz/R_libs/3.2.2")
+
+# load required packages 
+# none 
+
+# define parameters 
+cmd_args <- commandArgs(trailingOnly = TRUE)
+for (arg in cmd_args) cat("  ", arg, "\n")
+
+outdir <- cmd_args[1]
+scanmode <- cmd_args[2]
+normalization <- cmd_args[3]
+scripts <- cmd_args[4]
+z_score <- as.numeric(cmd_args[5])
+ppm <- as.numeric(cmd_args[6])
+
+#outdir <- "/Users/nunen/Documents/Metab/processed/test_old"
+#scanmode <- "negative"
+#normalization <- "disabled"
+#scripts <- "/Users/nunen/Documents/Metab/DIMS/scripts"
+#db <- "/Users/nunen/Documents/Metab/DIMS/db/HMDB_add_iso_corrNaCl_withIS_withC5OH.RData"
+#z_score <- 0
+
+object.files = list.files(paste(outdir, "9-samplePeaksFilled", sep="/"), full.names=TRUE, pattern=scanmode)
+outlist.tot=NULL
+for (i in 1:length(object.files)) {
+  load(object.files[i])
+  print(print(object.files[i]))
+  outlist.tot = rbind(outlist.tot, final.outlist.idpat3)
+}
+
+source(paste(scripts, "AddOnFunctions/sourceDir.R", sep="/"))
+sourceDir(paste(scripts, "AddOnFunctions", sep="/"))
+
+# remove duplicates
+outlist.tot = mergeDuplicatedRows(outlist.tot)
+
+# sort on mass
+outlist.tot = outlist.tot[order(outlist.tot[,"mzmed.pgrp"]),]
+
+# normalization
+load(paste0(outdir, "/repl.pattern.",scanmode,".RData"))
+
+if (normalization != "disabled") {
+  outlist.tot = normalization_2.1(outlist.tot, fileName, names(repl.pattern.filtered), on=normalization, assi_label="assi_HMDB")
+}
+
+if (z_score == 1) {
+  outlist.stats = statistics_z(outlist.tot, sortCol=NULL, adducts=FALSE)
+  nr.removed.samples=length(which(repl.pattern.filtered[]=="character(0)"))
+  order.index.int=order(colnames(outlist.stats)[8:(length(repl.pattern.filtered)-nr.removed.samples+7)])
+  outlist.stats.more = cbind(outlist.stats[,1:7],
+                             outlist.stats[,(length(repl.pattern.filtered)-nr.removed.samples+8):(length(repl.pattern.filtered)-nr.removed.samples+8+6)],
+                             outlist.stats[,8:(length(repl.pattern.filtered)-nr.removed.samples+7)][order.index.int],
+                             outlist.stats[,(length(repl.pattern.filtered)-nr.removed.samples+5+10):ncol(outlist.stats)])
+  
+  tmp.index=grep("_Zscore", colnames(outlist.stats.more), fixed = TRUE)
+  tmp.index.order=order(colnames(outlist.stats.more[,tmp.index]))
+  tmp = outlist.stats.more[,tmp.index[tmp.index.order]]
+  outlist.stats.more=outlist.stats.more[,-tmp.index]
+  outlist.stats.more=cbind(outlist.stats.more,tmp)
+  outlist.tot = outlist.stats.more
+}
+
+# filter identified compounds
+index.1=which((outlist.tot[,"assi_HMDB"]!="") & (!is.na(outlist.tot[,"assi_HMDB"])))
+index.2=which((outlist.tot[,"iso_HMDB"]!="") & (!is.na(outlist.tot[,"iso_HMDB"])))
+index=union(index.1,index.2)
+outlist.ident = outlist.tot[index,]
+outlist.not.ident = outlist.tot[-index,]
+
+if (z_score == 1) {
+  outlist.ident$ppmdev=as.numeric(outlist.ident$ppmdev)
+  outlist.ident <- outlist.ident[which(outlist.ident["ppmdev"] >= -ppm & outlist.ident["ppmdev"] <= ppm),]
+}
+# NAs in theormz_noise <======================================================================= uitzoeken!!!
+outlist.ident$theormz_noise[which(is.na(outlist.ident$theormz_noise))] = 0
+outlist.ident$theormz_noise=as.numeric(outlist.ident$theormz_noise)
+outlist.ident$theormz_noise[which(is.na(outlist.ident$theormz_noise))] = 0
+outlist.ident$theormz_noise=as.numeric(outlist.ident$theormz_noise)
+
+save(outlist.not.ident, outlist.ident, file=paste(outdir, "/outlist_identified_", scanmode, ".RData", sep=""))
+
+# Extra output in Excel-readable format:
+remove_columns <- c("fq.best", "fq.worst", "mzmin.pgrp", "mzmax.pgrp")
+remove_colindex <- which(colnames(outlist.ident) %in% remove_columns)
+outlist.ident <- outlist.ident[ , -remove_colindex]
+write.table(outlist.ident, file=paste0(outdir, "/outlist_identified_", scanmode, ".txt"), sep="\t", row.names = FALSE)
+remove_colindex <- which(colnames(outlist.not.ident) %in% remove_columns)
+outlist.not.ident <- outlist.not.ident[ , -remove_colindex]
+write.table(outlist.not.ident, file=paste0(outdir, "/outlist_not_identified_", scanmode, ".txt"), sep="\t", row.names = FALSE)
+
diff --git a/DIMS/AddOnFunctions/add_lab_id_and_onderzoeksnummer.R b/DIMS/AddOnFunctions/add_lab_id_and_onderzoeksnummer.R
new file mode 100644
index 0000000..9d91a55
--- /dev/null
+++ b/DIMS/AddOnFunctions/add_lab_id_and_onderzoeksnummer.R
@@ -0,0 +1,10 @@
+add_lab_id_and_onderzoeksnummer <- function(df_metabs_helix) {
+  # Split patient number into labnummer and Onderzoeksnummer
+  for (row in 1:nrow(df_metabs_helix)) {
+    df_metabs_helix[row,"labnummer"] <- gsub("^P|\\.[0-9]*", "", df_metabs_helix[row,"Patient"])
+    labnummer_split <- strsplit(as.character(df_metabs_helix[row, "labnummer"]), "M")[[1]]
+    df_metabs_helix[row, "Onderzoeksnummer"] <- paste0("MB", labnummer_split[1], "/", labnummer_split[2])
+  }
+  
+  return(df_metabs_helix)
+}
diff --git a/DIMS/AddOnFunctions/checkOverlap.R b/DIMS/AddOnFunctions/checkOverlap.R
new file mode 100644
index 0000000..f7ef818
--- /dev/null
+++ b/DIMS/AddOnFunctions/checkOverlap.R
@@ -0,0 +1,16 @@
+checkOverlap <- function(range1,range2){
+  if (length(intersect(range1,range2))==2) {
+    # Overlap
+    # message("Overlap, smaller range is used")
+    if (length(range1) >= length(range2)){
+      range1=range1[-length(range1)]  
+    } else {
+      range2=range2[-1]
+    }
+  } else if (length(intersect(range1,range2))==3){
+    # message("Overlap, smaller range is used")
+    range1=range1[-length(range1)]  
+    range2=range2[-1]
+  }
+  return(list("range1"=range1,"range2"=range2))
+}
diff --git a/DIMS/AddOnFunctions/check_same_samplename.R b/DIMS/AddOnFunctions/check_same_samplename.R
new file mode 100644
index 0000000..16ae9cf
--- /dev/null
+++ b/DIMS/AddOnFunctions/check_same_samplename.R
@@ -0,0 +1,4 @@
+# function to test whether intensity and Z-score columns match
+check_same_samplename <- function(int_col_name, zscore_col_name) {
+  paste0(int_col_name, "_Zscore") == zscore_col_name
+}
diff --git a/DIMS/AddOnFunctions/create_violin_plots.R b/DIMS/AddOnFunctions/create_violin_plots.R
new file mode 100644
index 0000000..df398be
--- /dev/null
+++ b/DIMS/AddOnFunctions/create_violin_plots.R
@@ -0,0 +1,116 @@
+create_violin_plots <- function(pdf_dir, pt_name, metab_perpage, top_metab_pt=NULL) {
+  
+  # set parameters for plots
+  plot_height <- 9.6 
+  plot_width <- 6
+  fontsize <- 1 
+  nr_plots_perpage <- 20
+  circlesize <- 0.8 
+  colors_4plot <- c("#22E4AC", "#00B0F0", "#504FFF","#A704FD","#F36265","#DA0641")
+  #                   green     blue      blue/purple purple    orange    red
+  
+  # patient plots, create the PDF device
+  pt_name_sub <- pt_name
+  suffix <- ""
+  if (grepl("Diagnostics", pdf_dir) & is_diagnostic_patient(pt_name)) {
+    prefix <- "MB"
+    suffix <- "_DIMS_PL_DIAG"
+    # substitute P and M in P2020M00001 into right format for Helix
+    pt_name_sub <- gsub("[PM]", "", pt_name)
+    pt_name_sub <- gsub("\\..*", "", pt_name_sub)
+  } else if (grepl("Diagnostics", pdf_dir)) {
+    prefix <- "Dx_"
+  } else if (grepl("IEM", pdf_dir)) {
+    prefix <- "IEM_"
+  } else {
+    prefix <- "R_"
+  }
+  
+  pdf(paste0(pdf_dir, "/", prefix, pt_name_sub, suffix, ".pdf"),
+      onefile = TRUE,
+      width = plot_width,
+      height = plot_height)
+  
+  # page headers:
+  page_headers <- names(metab_perpage)
+  
+  # put table into PDF file, if not empty
+  if (!is.null(dim(top_metab_pt))) {
+    plot.new()
+    # get the names and numbers in the table aligned
+    table_theme <- ttheme_default(core = list(fg_params = list(hjust=0, x=0.05, fontsize=6)),
+                                  colhead = list(fg_params = list(fontsize=8, fontface="bold")))
+    grid.table(top_metab_pt, theme = table_theme, rows = NULL)
+    # g <- tableGrob(top_metab_pt)
+    # grid.draw(g)
+    text(x=0.45, y=1.02, paste0("Top deviating metabolites for patient: ", pt_name), font=1, cex=1)
+  }
+  
+  # violin plots
+  for (page_index in 1:length(metab_perpage)) {
+    # extract list of metabolites to plot on a page
+    metab_list_2plot <- metab_perpage[[page_index]]
+    # extract original data for patient of interest (pt_name) before cut-offs
+    pt_list_2plot_orig <- metab_list_2plot[which(metab_list_2plot$variable == pt_name), ]
+    # cut off Z-scores higher than 20 or lower than -5 (for nicer plots)
+    metab_list_2plot$value[metab_list_2plot$value >  20] <-  20
+    metab_list_2plot$value[metab_list_2plot$value <  -5] <-  -5
+    # extract data for patient of interest (pt_name)
+    pt_list_2plot <- metab_list_2plot[which(metab_list_2plot$variable == pt_name), ]
+    # restore original Z-score before cut-off, for showing Z-scores in PDF
+    pt_list_2plot$value_orig <- pt_list_2plot_orig$value
+    # remove patient of interest (pt_name) from list; violins will be made up of controls and other patients
+    metab_list_2plot <- metab_list_2plot[-which(metab_list_2plot$variable == pt_name), ]
+    # subtitle per page
+    sub_perpage <- gsub("_", " ", page_headers[page_index])
+    # for IEM plots, put subtitle on two lines
+    sub_perpage <- gsub("probability", "\nprobability", sub_perpage)
+    # add size parameter for showing Z-score of patient per metabolite
+    Z_size <- rep(3, nrow(pt_list_2plot))
+    # set size to 0 if row is empty
+    Z_size[is.na(pt_list_2plot$value)] <- 0
+    
+    # draw violin plot. shape=22 gives square for patient of interest
+    ggplot_object <- ggplot(metab_list_2plot, aes(x=value, y=HMDB_name)) +
+      theme(axis.text.y=element_text(size=rel(fontsize)), plot.caption = element_text(size=rel(fontsize))) +
+      xlim(-5, 20) +
+      geom_violin(scale="width") +
+      geom_point(data = pt_list_2plot, aes(color=value), size = 3.5*circlesize, shape=22, fill="white") +
+      scale_fill_gradientn(colors = colors_4plot, values = NULL, space = "Lab", na.value = "grey50", guide = "colourbar", aesthetics = "colour") +
+      # add Z-score value for patient of interest at x=16
+      geom_text(data = pt_list_2plot, aes(16, label = paste0("Z=", round(value_orig, 2))), hjust = "left", vjust = +0.2, size = Z_size) +
+      # add labels. Use font Courier to get all the plots in the same location.
+      labs(x = "Z-scores", y = "Metabolites", subtitle = sub_perpage, color = "z-score") + 
+      theme(axis.text.y = element_text(family = "Courier", size=6)) +
+      # do not show legend
+      theme(legend.position="none") +
+      # add title 
+      ggtitle(label = paste0("Results for patient ", pt_name)) + 
+      # labs(x = "Z-scores", y = "Metabolites", title = paste0("Results for patient ", pt_name), subtitle = sub_perpage, color = "z-score") + 
+      # add vertical lines
+      geom_vline(xintercept = 2, col = "grey", lwd = 0.5, lty=2) +
+      geom_vline(xintercept = -2, col = "grey", lwd = 0.5, lty=2)
+    
+    suppressWarnings(print(ggplot_object))
+    
+  }
+  
+  # add explanation of violin plots, version number etc.
+  # plot.new()
+  plot(NA, xlim=c(0,5), ylim=c(0,5), bty='n', xaxt='n', yaxt='n', xlab='', ylab='')
+  if (length(explanation) > 0) {
+    text(0.2, 5, explanation[1], pos=4, cex=0.8)
+    for (line_index in 2:length(explanation)) {
+      text_y_position <- 5 - (line_index*0.2)
+      text(-0.2, text_y_position, explanation[line_index], pos=4, cex=0.5)
+    }
+    # full_explanation <- paste(explanation[2:length(explanation)], sep="  \n")
+    # text(0.2, 4, full_explanation, pos=4, cex=0.6)
+    #explanation_grob=textGrob(apply(full_explanation, 2, paste, collapse="\n"))
+    #grid.arrange(explanation_grob)
+  }
+  
+  # close the PDF file
+  dev.off()
+  
+}
diff --git a/DIMS/AddOnFunctions/elementInfo.R b/DIMS/AddOnFunctions/elementInfo.R
new file mode 100644
index 0000000..18eaeff
--- /dev/null
+++ b/DIMS/AddOnFunctions/elementInfo.R
@@ -0,0 +1,6 @@
+elementInfo <- function(name, elements = NULL) { # from Rdisop function .getElement  
+  if (!is.list(elements) || length(elements)==0 ) {
+    elements <- initializePSE() }
+  if (name=="CH3OH+H"){rex<-"^CH3OH\\+H$"}else{rex <- paste ("^",name,"$", sep="")}
+  elements [[grep (rex, sapply (elements, function(x) {x$name}))]]
+}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/export.R b/DIMS/AddOnFunctions/export.R
new file mode 100644
index 0000000..cab79a0
--- /dev/null
+++ b/DIMS/AddOnFunctions/export.R
@@ -0,0 +1,13 @@
+export <- function(peaklist, plotdir, adducts, control_label, case_label, patients, sub, fileName){
+  # peaklist = outlist.adducts
+  # adducts=TRUE
+  # control_label="C"
+  # case_label="P"
+  # patients = getPatients(outlist.adducts)
+  # sub=3000
+  
+  # peaklist = statistics_z_4export(as.data.frame(peaklist), plotdir, patients, adducts, control_label, case_label)
+  
+  # generateExcelFile(peaklist, file.path(plotdir), imageNum=2, fileName, subName=c("","_box"), sub, adducts)
+  
+}
diff --git a/DIMS/AddOnFunctions/findPeaks.Gauss.HPC.R b/DIMS/AddOnFunctions/findPeaks.Gauss.HPC.R
new file mode 100644
index 0000000..c00aa92
--- /dev/null
+++ b/DIMS/AddOnFunctions/findPeaks.Gauss.HPC.R
@@ -0,0 +1,24 @@
+### fit Gaussian estimate mean and integrate to obtain intensity
+findPeaks.Gauss.HPC <- function(plist, breaks.fwhm, int.factor, scale, resol, outdir, scanmode, plot, thresh, width, height) {
+  sampname <- colnames(plist)[1]
+  
+  range <- as.vector(plist)
+  names(range) <- rownames(plist)
+  
+  values <- list("mean"=NULL, "area"=NULL, "nr"=NULL, "min"=NULL, "max"=NULL, "qual"=NULL, "spikes"=0)
+  
+  values <- searchMZRange(range, values, int.factor, scale, resol, outdir, sampname, scanmode, plot, width, height, thresh)
+  
+  outlist.persample <- NULL
+  outlist.persample <- cbind("samplenr"=values$nr, "mzmed.pkt"=values$mean, "fq"=values$qual, "mzmin.pkt"=values$min, "mzmax.pkt"=values$max, "height.pkt"=values$area)
+
+  index <- which(outlist.persample[ ,"height.pkt"]==0)
+  if (length(index) > 0) {
+    outlist.persample <- outlist.persample[-index,]
+  }
+  
+  # save(outlist.persample, file=paste(outdir, paste(sampname, "_", scanmode, ".RData", sep=""), sep="/"))
+  save(outlist.persample, file=paste("./", sampname, "_", scanmode, ".RData", sep=""))
+  
+  cat(paste("There were", values$spikes, "spikes!"))
+}
diff --git a/DIMS/AddOnFunctions/fit1Peak.R b/DIMS/AddOnFunctions/fit1Peak.R
new file mode 100644
index 0000000..b09ce20
--- /dev/null
+++ b/DIMS/AddOnFunctions/fit1Peak.R
@@ -0,0 +1,120 @@
+fit1Peak <- function(x2,x,y,index,scale,resol,plot,FQ,useBounds) {
+  #FQ=FQ1
+  
+  if (length(y)<3){
+    message("Range to small, no fit possible!")
+  } else {
+    
+    if ((length(y)==4)) {
+      mu = weighted.mean(x,y)
+      sigma = getSD(x,y)
+      fitP = fitG_2(x,y,sigma,mu,scale,useBounds)
+    } else {
+      
+      if ((length(x) - length(index)) < 2) {
+        range1=c((length(x)-4):length(x))
+      } else if (length(index) < 2) {
+        range1=c(1:5)
+      } else {
+        range1=c(index[1]-2,index[1]-1,index[1],index[1]+1,index[1]+2)
+      }
+      
+      if (range1[1]==0) range1=range1[-1]
+      
+      # remove NA
+      if (length(which(is.na(y[range1])))!=0) range1=range1[-which(is.na(y[range1]))]
+      
+      mu = weighted.mean(x[range1],y[range1])
+      sigma = getSD(x[range1],y[range1])
+      fitP = fitG_2(x,y,sigma,mu,scale,useBounds)
+    }
+    
+    p2 = fitP$par
+    
+    #fq_new = abs(sum(y) - sum(p2[2]*dnorm(x,p2[1],sigma)))/sum(y)
+    fq_new = getFitQuality(x,y,p2[1],p2[1],resol,p2[2],sigma)$fq_new
+    
+    if (plot & (fq_new < FQ)) lines(x2,p2[2]*dnorm(x2,p2[1],sigma), col="green")
+    
+    scale_new = 1.2*scale
+    # cat(fq_new)
+    
+    if (fq_new > FQ) { # <=== bad fit?
+      # optimize scaling factor
+      fq = 0
+      scale = 0
+      
+      if (sum(y)>sum(p2[2]*dnorm(x,p2[1],sigma))){
+        while ((round(fq, digits = 3) != round(fq_new, digits = 3)) & (scale_new<10000))  {
+          fq = fq_new
+          scale = scale_new
+          
+          #cat(scale)
+          fitP = fitG_2(x,y,sigma,mu,scale,useBounds)
+          p2 = fitP$par
+          
+          #fq_new = abs(sum(y) - sum(p2[2]*dnorm(x,p2[1],sigma)))/sum(y)
+          fq_new = getFitQuality(x,y,p2[1],p2[1],resol,p2[2],sigma)$fq_new
+          scale_new=1.2*scale
+          
+          if (plot & (fq_new < FQ)) lines(x2,p2[2]*dnorm(x2,p2[1],sigma), col="green")
+          #           cat(paste("fq_new: ", fq_new))
+          #           cat(paste("scale_new: ", scale_new))
+          #           (round(fq, digits = 4) != round(fq_new, digits = 4))
+        }
+      } else {
+        while ((round(fq, digits = 3) != round(fq_new, digits = 3)) & (scale_new<10000)) {
+          fq = fq_new
+          scale = scale_new
+          
+          # cat(scale)
+          fitP = fitG_2(x,y,sigma,mu,scale,useBounds)
+          p2 = fitP$par
+          
+          #fq_new = abs(sum(y) - sum(p2[2]*dnorm(x,p2[1],sigma)))/sum(y)
+          fq_new = getFitQuality(x,y,p2[1],p2[1],resol,p2[2],sigma)$fq_new
+          scale_new=0.8*scale
+          
+          if (plot & (fq_new < FQ)) lines(x2,p2[2]*dnorm(x2,p2[1],sigma), col="green")
+          # cat(paste("fq_new: ", fq_new))
+        }
+      }
+      
+      if (fq < fq_new) {
+        #         cat(paste("fq_new: ", fq_new))
+        #         cat(paste("fq: ", fq))
+        #         cat(paste("scale_new: ", scale_new))
+        #         cat(paste("scale: ", scale))
+        
+        fitP = fitG_2(x,y,sigma,mu,scale,useBounds)
+        p2 = fitP$par
+        fq_new = fq
+        # cat(paste("==> fq_new: ", fq_new))
+        if (plot & (fq_new < FQ)) lines(x2,p2[2]*dnorm(x2,p2[1],sigma), col="dark green")
+        
+      }
+    }
+    
+    if (plot & (fq_new < FQ)) {
+      # plot ###################
+      #lines(x2,p2[2]*dnorm(x2,p2[1],sigma), col="green")
+      fwhm = getFwhm(p2[1],resol)
+      half_max = max(p2[2]*dnorm(x2,p2[1],sigma))*0.5
+      lines(c(p2[1] - 0.5*fwhm, p2[1] + 0.5*fwhm),c(half_max,half_max),col="orange")
+      abline(v = p2[1], col="green")
+      h=c(paste("mean =", p2[1], sep=" "),
+          paste("fq =", fq_new, sep=" "))
+      legend("topright", legend=h)
+      ##########################
+      
+      
+      #       abline(v = x[6], col="red")
+      #       fwhm = getFwhm(x[6])
+      #       abline(v =x[6] + 0.6*fwhm, col="red")
+      #       abline(v =x[6] - 0.6*fwhm, col="red")
+      
+    }
+  }
+  
+  return(list("mean"=p2[1], "scale"=p2[2], "sigma"=sigma, "qual"=fq_new))
+}
diff --git a/DIMS/AddOnFunctions/fit2G.R b/DIMS/AddOnFunctions/fit2G.R
new file mode 100644
index 0000000..9dc76dd
--- /dev/null
+++ b/DIMS/AddOnFunctions/fit2G.R
@@ -0,0 +1,43 @@
+fit2G_2 <- function(x,y,sig1,sig2,mu1,scale1,mu2,scale2,useBounds){
+  
+  f = function(p){
+    d = p[2]*dnorm(x,mean=p[1],sd=sig1) + p[4]*dnorm(x,mean=p[3],sd=sig2)
+    sum((d-y)^2)
+  }
+  
+  if (useBounds){
+    lower = c(x[1],0,x[1],0)
+    upper = c(x[length(x)],Inf,x[length(x)],Inf)
+    
+    if (is.null(mu2) && is.null(scale2) && is.null(sig2)){
+      sig2=sig1
+      optim(c(as.numeric(mu1),
+              as.numeric(scale1),
+              as.numeric(mu1),
+              as.numeric(scale1)),
+            f,control=list(maxit=10000),method="L-BFGS-B",lower=lower,upper=upper)
+    } else {
+      optim(c(as.numeric(mu1),
+              as.numeric(scale1),
+              as.numeric(mu2),
+              as.numeric(scale2)),
+            f,control=list(maxit=10000),method="L-BFGS-B",lower=lower,upper=upper)
+    }
+    
+  } else {
+    if (is.null(mu2) && is.null(scale2) && is.null(sig2)){
+      sig2=sig1
+      optim(c(as.numeric(mu1),
+              as.numeric(scale1),
+              as.numeric(mu1),
+              as.numeric(scale1)),
+            f,control=list(maxit=10000))
+    } else{
+      optim(c(as.numeric(mu1),
+              as.numeric(scale1),
+              as.numeric(mu2),
+              as.numeric(scale2)),
+            f,control=list(maxit=10000))
+    }  
+  }
+}
diff --git a/DIMS/AddOnFunctions/fit2peaks.R b/DIMS/AddOnFunctions/fit2peaks.R
new file mode 100644
index 0000000..4d9c64d
--- /dev/null
+++ b/DIMS/AddOnFunctions/fit2peaks.R
@@ -0,0 +1,100 @@
+fit2peaks <- function(x2,x,y,index,scale,resol,useBounds=FALSE,plot=FALSE,FQ,int.factor){
+  
+  peak.mean = NULL
+  peak.area = NULL
+  peak.scale = NULL
+  peak.sigma = NULL
+  
+  range1=c(index[1]-2,index[1]-1,index[1],index[1]+1,index[1]+2)
+  if (range1[1]==0) range1=range1[-1]
+  
+  range2=c(index[2]-2,index[2]-1,index[2],index[2]+1,index[2]+2)
+  
+  if (length(x)<range2[length(range2)]) range2=range2[-length(range2)]
+  
+  range1=checkOverlap(range1,range2)[[1]]
+  range2=checkOverlap(range1,range2)[[2]]
+  
+  # check for negative or 0
+  remove=which(range1<1)
+  if (length(remove)>0) range1=range1[-remove]
+  remove=which(range2<1)
+  if (length(remove)>0) range2=range2[-remove]
+  
+  # remove NA
+  if (length(which(is.na(y[range1])))!=0) range1=range1[-which(is.na(y[range1]))]
+  if (length(which(is.na(y[range2])))!=0) range2=range2[-which(is.na(y[range2]))]
+  
+  mu1 = weighted.mean(x[range1],y[range1])
+  sigma1 = getSD(x[range1],y[range1])
+  
+  #   message(paste("fit2peaks mu =>", mu1))
+  #   message(paste("fit2peaks sigma =>", sigma1))
+  #   message(paste("fit2peaks scale =>", scale))
+  
+  fitP = fitG_2(x[range1],y[range1],sigma1,mu1,scale,useBounds)
+  p = fitP$par
+  
+  mu2 = weighted.mean(x[range2],y[range2])
+  sigma2 = getSD(x[range2],y[range2])
+  fitP = fitG_2(x[range2],y[range2],sigma2,mu2,scale,useBounds)
+  p2 = fitP$par
+  
+  fit2P = fit2G_2(x, y, sigma1, sigma2, p[1], p[2], p2[1], p2[2],useBounds)
+  p3 = fit2P$par
+  
+  if (is.null(sigma2)) sigma2=sigma1
+  
+  
+  # plot ###################
+  sumFit2 = (p3[2]*dnorm(x2,p3[1],sigma1))+(p3[4]*dnorm(x2,p3[3],sigma2))
+  sumFit = (p3[2]*dnorm(x,p3[1],sigma1))+(p3[4]*dnorm(x,p3[3],sigma2))
+  fq=getFitQuality(x,y,sort(c(p3[1],p3[3]))[1],sort(c(p3[1],p3[3]))[2],resol,sumFit=sumFit)$fq_new
+  
+  fwhm = getFwhm(p3[1],resol)
+  half_max = max(p3[2]*dnorm(x2,p3[1],sigma1))*0.5
+  if (plot & (fq < FQ)) lines(c(p3[1] - 0.5*fwhm, p3[1] + 0.5*fwhm),c(half_max,half_max),col="orange")
+  if (plot & (fq < FQ)) lines(x2,p3[4]*dnorm(x2,p3[3],sigma2),col="grey")
+  if (plot & (fq < FQ)) abline(v = p3[3], col="grey")
+  
+  fwhm = getFwhm(p3[3],resol)
+  half_max = max(p3[4]*dnorm(x2,p3[3],sigma2))*0.5
+  if (plot & (fq < FQ)) lines(c(p3[3] - 0.5*fwhm, p3[3] + 0.5*fwhm),c(half_max,half_max),col="orange")
+  
+  if (plot & (fq < FQ)) lines(x2,sumFit2,col="black")
+  
+  if (plot & (fq < FQ)) lines(x2,p3[2]*dnorm(x2,p3[1],sigma1),col="grey")
+  if (plot & (fq < FQ)) abline(v = p3[1], col="grey")
+  
+  
+  h2=c(paste("mean =", p3[1], sep=" "),
+       paste("mean =", p3[3], sep=" "),
+       paste("fq =", fq, sep=" "))
+  
+  if (plot & (fq < FQ)) legend("topright", legend=h2)
+  ##########################
+  
+  #  lines(x2,p3[4]*dnorm(x2,p3[3],sigma2),col="red")
+  #   area1 = sum(p3[2]*dnorm(x2,p3[1],sigma1))
+  #   area2 = sum(p3[4]*dnorm(x2,p3[3],sigma2))
+  
+  #   area1 = max(p3[2]*dnorm(x2,p3[1],sigma1))
+  #   area2 = max(p3[4]*dnorm(x2,p3[3],sigma2))
+  
+  area1 = getArea(p3[1],resol,p3[2],sigma1,int.factor)
+  area2 = getArea(p3[3],resol,p3[4],sigma2,int.factor)
+  
+  peak.area = c(peak.area, area1)
+  peak.area = c(peak.area, area2)
+  
+  peak.mean = c(peak.mean, p3[1])
+  peak.mean = c(peak.mean, p3[3])
+  
+  peak.scale = c(peak.scale, p3[2])
+  peak.scale = c(peak.scale, p3[4])
+  
+  peak.sigma = c(peak.sigma, sigma1)
+  peak.sigma = c(peak.sigma, sigma2)
+  
+  return(list("mean"=peak.mean, "scale"=peak.scale, "sigma"=peak.sigma, "area"=peak.area, "qual"=fq))
+}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/fit3G.R b/DIMS/AddOnFunctions/fit3G.R
new file mode 100644
index 0000000..05543c5
--- /dev/null
+++ b/DIMS/AddOnFunctions/fit3G.R
@@ -0,0 +1,18 @@
+fit3G_2 <- function(x,y,sig1,sig2,sig3,mu1,scale1,mu2,scale2,mu3,scale3,useBounds){
+  
+  f = function(p){
+    d = p[2]*dnorm(x,mean=p[1],sd=sig1) + p[4]*dnorm(x,mean=p[3],sd=sig2) + p[6]*dnorm(x,mean=p[5],sd=sig3)
+    sum((d-y)^2)
+  }
+  
+  if (useBounds){
+    lower = c(x[1],0,x[1],0,x[1],0)
+    upper = c(x[length(x)],Inf,x[length(x)],Inf,x[length(x)],Inf)
+    
+    optim(c(mu1,scale1,mu2,scale2,mu3,scale3),f,control=list(maxit=10000),method="L-BFGS-B",lower=lower,upper=upper)
+    
+  } else {
+    optim(c(mu1,scale1,mu2,scale2,mu3,scale3),f,control=list(maxit=10000))
+  }
+  
+}
diff --git a/DIMS/AddOnFunctions/fit3peaks.R b/DIMS/AddOnFunctions/fit3peaks.R
new file mode 100644
index 0000000..e373f14
--- /dev/null
+++ b/DIMS/AddOnFunctions/fit3peaks.R
@@ -0,0 +1,122 @@
+fit3peaks <- function(x2,x,y,index,scale,resol,useBounds=FALSE,plot=FALSE,FQ,int.factor){
+  
+  peak.mean = NULL
+  peak.area = NULL
+  peak.scale = NULL
+  peak.sigma = NULL
+  
+  range1=c(index[1]-2,index[1]-1,index[1],index[1]+1,index[1]+2)
+  range2=c(index[2]-2,index[2]-1,index[2],index[2]+1,index[2]+2)
+  range3=c(index[3]-2,index[3]-1,index[3],index[3]+1,index[3]+2)
+  
+  remove=which(range1<1)
+  if (length(remove)>0) {
+    range1=range1[-remove]
+  }
+  remove=which(range2<1)
+  if (length(remove)>0) {
+    range2=range2[-remove]
+  }
+  
+  if (length(x)<range3[length(range3)]) range3=range3[-length(range3)]
+  
+  range1=checkOverlap(range1,range2)[[1]]
+  range2=checkOverlap(range1,range2)[[2]]
+  range2=checkOverlap(range2,range3)[[1]]
+  range3=checkOverlap(range2,range3)[[2]]
+  
+  # check for negative or 0
+  remove=which(range1<1)
+  if (length(remove)>0) range1=range1[-remove]
+  remove=which(range2<1)
+  if (length(remove)>0) range2=range2[-remove]
+  remove=which(range3<1)
+  if (length(remove)>0) range3=range3[-remove]
+  
+  # remove NA
+  if (length(which(is.na(y[range1])))!=0) range1=range1[-which(is.na(y[range1]))]
+  if (length(which(is.na(y[range2])))!=0) range2=range2[-which(is.na(y[range2]))]
+  if (length(which(is.na(y[range3])))!=0) range3=range3[-which(is.na(y[range3]))]
+  
+  mu1 = weighted.mean(x[range1],y[range1])
+  sigma1 = getSD(x[range1],y[range1])
+  fitP = fitG_2(x[range1],y[range1],sigma1,mu1,scale,useBounds)
+  p = fitP$par
+  
+  mu2 = weighted.mean(x[range2],y[range2])
+  sigma2 = getSD(x[range2],y[range2])
+  fitP = fitG_2(x[range2],y[range2],sigma2,mu2,scale,useBounds)
+  p2 = fitP$par
+  
+  mu3 = weighted.mean(x[range3],y[range3])
+  sigma3 = getSD(x[range3],y[range3])
+  fitP = fitG_2(x[range3],y[range3],sigma3,mu3,scale,useBounds)
+  p3 = fitP$par
+  
+  fit3P = fit3G_2(x, y, sigma1, sigma2, sigma3, p[1], p[2], p2[1], p2[2], p3[1], p3[2], useBounds)
+  p4 = fit3P$par
+  
+  # plot ##############################
+  sumFit2 = (p4[2]*dnorm(x2,p4[1],sigma1))+(p4[4]*dnorm(x2,p4[3],sigma2))+(p4[6]*dnorm(x2,p4[5],sigma3))
+  sumFit = (p4[2]*dnorm(x,p4[1],sigma1))+(p4[4]*dnorm(x,p4[3],sigma2))+(p4[6]*dnorm(x,p4[5],sigma3))
+  fq=getFitQuality(x,y,sort(c(p4[1],p4[3],p4[5]))[1],sort(c(p4[1],p4[3],p4[5]))[3],resol,sumFit=sumFit)$fq_new
+  
+  if (plot & (fq < FQ)) lines(x2,p4[2]*dnorm(x2,p4[1],sigma1),col="yellow")
+  if (plot & (fq < FQ)) abline(v = p4[1], col="yellow")
+  fwhm = getFwhm(p4[1],resol)
+  half_max = max(p4[2]*dnorm(x2,p4[1],sigma1))*0.5
+  if (plot & (fq < FQ)) lines(c(p4[1] - 0.5*fwhm, p4[1] + 0.5*fwhm),c(half_max,half_max),col="orange")
+  
+  if (plot & (fq < FQ)) lines(x2,p4[4]*dnorm(x2,p4[3],sigma2),col="yellow")
+  if (plot & (fq < FQ)) abline(v = p4[3], col="yellow")
+  fwhm = getFwhm(p4[3],resol)
+  half_max = max(p4[4]*dnorm(x2,p4[3],sigma2))*0.5
+  if (plot & (fq < FQ)) lines(c(p4[3] - 0.5*fwhm, p4[3] + 0.5*fwhm),c(half_max,half_max),col="orange")
+  
+  if (plot & (fq < FQ)) lines(x2,p4[6]*dnorm(x2,p4[5],sigma3),col="yellow")
+  if (plot & (fq < FQ)) abline(v = p4[5], col="yellow")
+  fwhm = getFwhm(p4[5],resol)
+  half_max = max(p4[6]*dnorm(x2,p4[5],sigma3))*0.5
+  if (plot & (fq < FQ)) lines(c(p4[5] - 0.5*fwhm, p4[5] + 0.5*fwhm),c(half_max,half_max),col="orange")
+  
+  if (plot & (fq < FQ)) lines(x2,sumFit2,col="red")
+  
+  h2=c(paste("mean =", p4[1], sep=" "),
+       paste("mean =", p4[3], sep=" "),
+       paste("mean =", p4[5], sep=" "),
+       paste("fq =", fq, sep=" "))
+  
+  if (plot & (fq < FQ)) legend("topright", legend=h2)
+  #########################################
+  
+  #   area1 = sum(p4[2]*dnorm(x2,p4[1],sigma1))
+  #   area2 = sum(p4[4]*dnorm(x2,p4[3],sigma2))
+  #   area3 = sum(p4[6]*dnorm(x2,p4[5],sigma3))
+  
+  #   area1 = max(p4[2]*dnorm(x2,p4[1],sigma1))
+  #   area2 = max(p4[4]*dnorm(x2,p4[3],sigma2))
+  #   area3 = max(p4[6]*dnorm(x2,p4[5],sigma3))
+  
+  area1 = getArea(p4[1],resol,p4[2],sigma1,int.factor)
+  area2 = getArea(p4[3],resol,p4[4],sigma2,int.factor)
+  area3 = getArea(p4[5],resol,p4[6],sigma3,int.factor)
+  
+  peak.area = c(peak.area, area1)
+  peak.area = c(peak.area, area2)
+  peak.area = c(peak.area, area3)
+  
+  peak.mean = c(peak.mean, p4[1])
+  peak.mean = c(peak.mean, p4[3])
+  peak.mean = c(peak.mean, p4[5])
+  
+  peak.scale = c(peak.scale, p4[2])
+  peak.scale = c(peak.scale, p4[4])
+  peak.scale = c(peak.scale, p4[6])
+  
+  peak.sigma = c(peak.sigma, sigma1)
+  peak.sigma = c(peak.sigma, sigma2)
+  peak.sigma = c(peak.sigma, sigma3)
+  
+  return(list("mean"=peak.mean, "scale"=peak.scale, "sigma"=peak.sigma, "area"=peak.area, "qual"=fq))
+  
+}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/fit4G.R b/DIMS/AddOnFunctions/fit4G.R
new file mode 100644
index 0000000..6cad63d
--- /dev/null
+++ b/DIMS/AddOnFunctions/fit4G.R
@@ -0,0 +1,18 @@
+fit4G_2 <- function(x,y,sig1,sig2,sig3,sig4,mu1,scale1,mu2,scale2,mu3,scale3,mu4,scale4,useBounds){
+  
+  f = function(p){
+    d = p[2]*dnorm(x,mean=p[1],sd=sig1) + p[4]*dnorm(x,mean=p[3],sd=sig2) + p[6]*dnorm(x,mean=p[5],sd=sig3) + p[8]*dnorm(x,mean=p[7],sd=sig4)
+    sum((d-y)^2)
+  }
+  
+  if (useBounds){
+    lower = c(x[1],0,x[1],0,x[1],0,x[1],0)
+    upper = c(x[length(x)],Inf,x[length(x)],Inf,x[length(x)],Inf,x[length(x)],Inf)
+    
+    optim(c(mu1,scale1,mu2,scale2,mu3,scale3,mu4,scale4),f,control=list(maxit=10000),method="L-BFGS-B",lower=lower,upper=upper)
+    
+  } else {
+    optim(c(mu1,scale1,mu2,scale2,mu3,scale3,mu4,scale4),f,control=list(maxit=10000))
+  }
+  
+}
diff --git a/DIMS/AddOnFunctions/fit4peaks.R b/DIMS/AddOnFunctions/fit4peaks.R
new file mode 100644
index 0000000..a506541
--- /dev/null
+++ b/DIMS/AddOnFunctions/fit4peaks.R
@@ -0,0 +1,149 @@
+fit4peaks <- function(x2,x,y,index,scale,resol,useBounds=FALSE,plot=FALSE,FQ,int.factor) {
+  
+  peak.mean = NULL
+  peak.area = NULL
+  peak.scale = NULL
+  peak.sigma = NULL
+  
+  range1=c(index[1]-2,index[1]-1,index[1],index[1]+1,index[1]+2)
+  range2=c(index[2]-2,index[2]-1,index[2],index[2]+1,index[2]+2)
+  range3=c(index[3]-2,index[3]-1,index[3],index[3]+1,index[3]+2)
+  range4=c(index[4]-2,index[4]-1,index[4],index[4]+1,index[4]+2)
+  if (range1[1]==0) range1=range1[-1]
+  if (length(x)<range4[length(range4)]) range4=range4[-length(range4)]
+  
+  range1=checkOverlap(range1,range2)[[1]]
+  range2=checkOverlap(range1,range2)[[2]]
+  range2=checkOverlap(range2,range3)[[1]]
+  range3=checkOverlap(range2,range3)[[2]]
+  range3=checkOverlap(range3,range4)[[1]]
+  range4=checkOverlap(range3,range4)[[2]]
+  
+  remove=which(range4>length(x))
+  if (length(remove)>0) {
+    range4=range4[-remove]
+    # message(length(range4))
+  }
+  
+  # check for negative or 0
+  remove=which(range1<1)
+  if (length(remove)>0) range1=range1[-remove]
+  remove=which(range2<1)
+  if (length(remove)>0) range2=range2[-remove]
+  remove=which(range3<1)
+  if (length(remove)>0) range3=range3[-remove]
+  remove=which(range4<1)
+  if (length(remove)>0) range4=range4[-remove]
+  
+  # remove NA
+  if (length(which(is.na(y[range1])))!=0) range1=range1[-which(is.na(y[range1]))]
+  if (length(which(is.na(y[range2])))!=0) range2=range2[-which(is.na(y[range2]))]
+  if (length(which(is.na(y[range3])))!=0) range3=range3[-which(is.na(y[range3]))]
+  if (length(which(is.na(y[range4])))!=0) range4=range4[-which(is.na(y[range4]))]
+  
+  mu1 = weighted.mean(x[range1],y[range1])
+  sigma1 = getSD(x[range1],y[range1])
+  fitP = fitG_2(x[range1],y[range1],sigma1,mu1,scale,useBounds)
+  p = fitP$par
+  
+  mu2 = weighted.mean(x[range2],y[range2])
+  sigma2 = getSD(x[range2],y[range2])
+  fitP = fitG_2(x[range2],y[range2],sigma2,mu2,scale,useBounds)
+  p2 = fitP$par
+  
+  mu3 = weighted.mean(x[range3],y[range3])
+  sigma3 = getSD(x[range3],y[range3])
+  fitP = fitG_2(x[range3],y[range3],sigma3,mu3,scale,useBounds)
+  p3 = fitP$par
+  
+  mu4 = weighted.mean(x[range4],y[range4])
+  sigma4 = getSD(x[range4],y[range4])
+  fitP = fitG_2(x[range4],y[range4],sigma4,mu4,scale,useBounds)
+  p4 = fitP$par
+  
+  fit4P = fit4G_2(x, y, sigma1, sigma2, sigma3, sigma3, p[1], p[2], p2[1], p2[2], p3[1], p3[2],  p4[1], p4[2], useBounds)
+  p5 = fit4P$par
+  
+  # plot #####################################
+  sumFit2 = (p5[2]*dnorm(x2,p5[1],sigma1))+(p5[4]*dnorm(x2,p5[3],sigma2))+(p5[6]*dnorm(x2,p5[5],sigma3))+(p5[8]*dnorm(x2,p5[7],sigma3))
+  sumFit = (p5[2]*dnorm(x,p5[1],sigma1))+(p5[4]*dnorm(x,p5[3],sigma2))+(p5[6]*dnorm(x,p5[5],sigma3))+(p5[8]*dnorm(x,p5[7],sigma3))
+  fq=getFitQuality(x,y,sort(c(p5[1],p5[3],p5[5],p5[7]))[1],sort(c(p5[1],p5[3],p5[5],p5[7]))[4],resol,sumFit=sumFit)$fq_new
+  
+  if (plot & (fq < FQ)) lines(x2,p5[2]*dnorm(x2,p5[1],sigma1),col="purple")
+  if (plot & (fq < FQ)) abline(v = p5[1], col="purple")
+  fwhm = getFwhm(p5[1],resol)
+  half_max = max(p5[2]*dnorm(x2,p5[1],sigma1))*0.5
+  if (plot & (fq < FQ)) lines(c(p5[1] - 0.5*fwhm, p5[1] + 0.5*fwhm),c(half_max,half_max),col="orange")
+  
+  if (plot & (fq < FQ)) lines(x2,p5[4]*dnorm(x2,p5[3],sigma2),col="purple")
+  if (plot & (fq < FQ)) abline(v = p5[3], col="purple")
+  fwhm = getFwhm(p5[3],resol)
+  half_max = max(p5[4]*dnorm(x2,p5[3],sigma2))*0.5
+  if (plot & (fq < FQ)) lines(c(p5[3] - 0.5*fwhm, p5[3] + 0.5*fwhm),c(half_max,half_max),col="orange")
+  
+  if (plot & (fq < FQ)) lines(x2,p5[6]*dnorm(x2,p5[5],sigma3),col="purple")
+  if (plot & (fq < FQ)) abline(v = p5[5], col="purple")
+  fwhm = getFwhm(p5[5],resol)
+  half_max = max(p5[6]*dnorm(x2,p5[5],sigma3))*0.5
+  if (plot & (fq < FQ)) lines(c(p5[5] - 0.5*fwhm, p5[5] + 0.5*fwhm),c(half_max,half_max),col="orange")
+  
+  if (plot & (fq < FQ)) lines(x2,p5[8]*dnorm(x2,p5[7],sigma3),col="purple")
+  if (plot & (fq < FQ)) abline(v = p5[7], col="purple")
+  fwhm = getFwhm(p5[7],resol)
+  half_max = max(p5[8]*dnorm(x2,p5[7],sigma3))*0.5
+  if (plot & (fq < FQ)) lines(c(p5[7] - 0.5*fwhm, p5[7] + 0.5*fwhm),c(half_max,half_max),col="orange")
+  
+  if (plot & (fq < FQ)) lines(x2,sumFit2,col="blue")
+  
+  #fq = abs(sum(y) - sum(sumFit))/sum(y)
+  #fq=abs(sum(y) - sum(sumFit))/sum(y)
+  #fq=mean(abs(sumFit - y)/sumFit)
+  
+  
+  h2=c(paste("mean =", p5[1], sep=" "),
+       paste("mean =", p5[3], sep=" "),
+       paste("mean =", p5[5], sep=" "),
+       paste("mean =", p5[7], sep=" "),
+       paste("fq =", fq, sep=" "))
+  
+  if (plot & (fq < FQ)) legend("topright", legend=h2)
+  #############################################
+  
+  #   area1 = sum(p5[2]*dnorm(x2,p5[1],sigma1))
+  #   area2 = sum(p5[4]*dnorm(x2,p5[3],sigma2))
+  #   area3 = sum(p5[6]*dnorm(x2,p5[5],sigma3))
+  #   area4 = sum(p5[8]*dnorm(x2,p5[7],sigma4))
+  
+  #   area1 = max(p5[2]*dnorm(x2,p5[1],sigma1))
+  #   area2 = max(p5[4]*dnorm(x2,p5[3],sigma2))
+  #   area3 = max(p5[6]*dnorm(x2,p5[5],sigma3))
+  #   area4 = max(p5[8]*dnorm(x2,p5[7],sigma4))
+  
+  area1 = getArea(p5[1],resol,p5[2],sigma1,int.factor)
+  area2 = getArea(p5[3],resol,p5[4],sigma2,int.factor)
+  area3 = getArea(p5[5],resol,p5[6],sigma3,int.factor)
+  area4 = getArea(p5[7],resol,p5[8],sigma4,int.factor)
+  
+  peak.area = c(peak.area, area1)
+  peak.area = c(peak.area, area2)
+  peak.area = c(peak.area, area3)
+  peak.area = c(peak.area, area4)
+  
+  peak.mean = c(peak.mean, p5[1])
+  peak.mean = c(peak.mean, p5[3])
+  peak.mean = c(peak.mean, p5[5])
+  peak.mean = c(peak.mean, p5[7])
+  
+  peak.scale = c(peak.scale, p5[2])
+  peak.scale = c(peak.scale, p5[4])
+  peak.scale = c(peak.scale, p5[6])
+  peak.scale = c(peak.scale, p5[8])
+  
+  peak.sigma = c(peak.sigma, sigma1)
+  peak.sigma = c(peak.sigma, sigma2)
+  peak.sigma = c(peak.sigma, sigma3)
+  peak.sigma = c(peak.sigma, sigma4)
+  
+  return(list("mean"=peak.mean, "scale"=peak.scale, "sigma"=peak.sigma, "area"=peak.area, "qual"=fq))
+  
+}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/fitG.R b/DIMS/AddOnFunctions/fitG.R
new file mode 100644
index 0000000..b1b595d
--- /dev/null
+++ b/DIMS/AddOnFunctions/fitG.R
@@ -0,0 +1,18 @@
+fitG_2 <- function(x,y,sig,mu,scale,useBounds) {
+  
+  f = function(p) {
+    d = p[2]*dnorm(x,mean=p[1],sd=sig)
+    sum((d-y)^2)
+  }
+  
+  if (useBounds){
+    lower = c(x[1],0,x[1],0)
+    upper = c(x[length(x)],Inf,x[length(x)],Inf)
+    
+    optim(c(as.numeric(mu), as.numeric(scale)),
+          f,control=list(maxit=10000),method="L-BFGS-B",lower=lower,upper=upper)
+  } else {  
+    #optim(c(mu,scale),f)
+    optim(c(as.numeric(mu),as.numeric(scale)),f,control=list(maxit=10000))
+  }  
+}
diff --git a/DIMS/AddOnFunctions/fitGaussian.R b/DIMS/AddOnFunctions/fitGaussian.R
new file mode 100644
index 0000000..3c5027d
--- /dev/null
+++ b/DIMS/AddOnFunctions/fitGaussian.R
@@ -0,0 +1,327 @@
+fitGaussian <- function(x2,x,y,index,scale,resol,outdir,force,useBounds,plot,scanmode,int.factor,width,height) {
+  # force=length(index)
+  # useBounds=FALSE
+  
+  peak.mean = NULL
+  peak.area = NULL
+  peak.qual = NULL
+  peak.min = NULL
+  peak.max = NULL
+  
+  FQ1 = 0.15
+  FQ = 0.2
+  
+  # One local max
+  if (force==1){
+    
+    retVal = fit1Peak(x2,x,y,index,scale,resol,plot,FQ1,useBounds)
+    
+    scale = 2
+    
+    if (retVal$mean[1]<x[1] | retVal$mean[1]>x[length(x)]) {   # <=== mean outside range
+      
+      # do it again
+      return(fitGaussian(x2,x,y,index,scale,resol,outdir,force=1,useBounds=TRUE,plot,scanmode,int.factor,width,height))
+      
+    } else { # <=== mean within range
+      
+      if (retVal$qual > FQ1){ # <=== bad fit
+        
+        # Try to fit two curves
+        
+        # diff(diff(x)) essentially computes the discrete analogue of the second derivative, so should be negative at local maxima.
+        # The +1 below takes care of the fact that the result of diff is shorter than the input vector.
+        new_index=which(diff(sign(diff(y)))==-2)+1
+        
+        if (length(new_index)!=2) {
+          new_index = round(length(x)/3)
+          new_index = c(new_index, 2*new_index)
+        }
+        
+        #length(new_index)  
+        return(fitGaussian(x2,x,y,new_index,scale,resol,outdir,force=2,useBounds=FALSE,plot,scanmode,int.factor,width,height))
+        
+      } else { # <=== good fit
+        
+        peak.mean = c(peak.mean, retVal$mean)
+        #peak.area = c(peak.area, sum(retVal$scale*dnorm(x2,retVal$mean,retVal$sigma)))
+        # "Centroid mode"
+        #         peak.area = c(peak.area, max(retVal$scale*dnorm(x2,retVal$mean,retVal$sigma)))
+        peak.area = c(peak.area, getArea(retVal$mean,resol,retVal$scale,retVal$sigma,int.factor))
+        peak.qual = retVal$qual
+        peak.min = x[1]
+        peak.max = x[length(x)]
+      }
+    }
+    
+    # Two local max  
+  } else if (force==2 & (length(x)>6)) {
+    
+    # fit two curves
+    retVal = fit2peaks(x2,x,y,index,scale,resol,useBounds,plot,FQ,int.factor) # <=== fit 2 curves
+    
+    if (retVal$mean[1]<x[1] | retVal$mean[1]>x[length(x)] |   # <=== one of means outside range
+        retVal$mean[2]<x[1] | retVal$mean[2]>x[length(x)]) {
+      
+      # check quality
+      if (retVal$qual > FQ) { # <=== bad fit 
+        
+        # do it again
+        return(fitGaussian(x2,x,y,index,scale,resol,outdir,force=2,useBounds=TRUE,plot,scanmode,int.factor,width,height))
+        
+        # good fit
+      } else {
+        
+        # check which mean is outside range
+        # Todo ========> check this!!!!!!!!!!!!!!!
+        for (i in 1:length(retVal$mean)){
+          if (retVal$mean[i]<x[1] | retVal$mean[i]>x[length(x)] ) {
+            peak.mean = c(peak.mean, -i)
+            peak.area = c(peak.area, -i)
+          } else {
+            peak.mean = c(peak.mean, retVal$mean[i])
+            peak.area = c(peak.area, retVal$area[i])
+          }
+        }
+        peak.qual = retVal$qual
+        peak.min = x[1]
+        peak.max = x[length(x)]
+      }
+    } else { # <=== all means within range
+      
+      if (retVal$qual > FQ) { # <=== bad fit 
+        
+        # Try to fit three curves
+        new_index=which(diff(sign(diff(y)))==-2)+1
+        
+        if (length(new_index)!=3) {
+          new_index = round(length(x)/4)
+          new_index = c(new_index, 2*new_index, 3*new_index)
+        }
+        
+        #length(new_index)      
+        return(fitGaussian(x2,x,y,new_index,scale,resol,outdir,force=3,useBounds=FALSE,plot,scanmode,int.factor,width,height))
+        
+      } else { # <======== good fit
+        
+        # check if means are within 3 ppm and sum if so  
+        tmp = retVal$qual 
+        
+        nMeanNew = -1
+        nMean = length(retVal$mean)
+        while (nMean!=nMeanNew){
+          nMean = length(retVal$mean)
+          retVal = isWithinXppm(retVal$mean, retVal$scale, retVal$sigma, retVal$area, x2, x, ppm=4, resol, plot)
+          nMeanNew = length(retVal$mean)
+        }
+        
+        retVal$qual = tmp
+        
+        h2=NULL
+        
+        for (i in 1:length(retVal$mean)){
+          h2 = c(h2, paste("mean =", retVal$mean[i], sep=" "))
+          
+          peak.mean = c(peak.mean, retVal$mean[i])
+          peak.area = c(peak.area, retVal$area[i])
+        }
+        peak.qual = retVal$qual
+        peak.min = x[1]
+        peak.max = x[length(x)]
+        
+        h2 = c(h2, paste("fq =", retVal$qual, sep=" "))
+        if (plot) legend("topright", legend=h2)
+      }  
+    }
+    
+    # Three local max  
+  } else if (force==3 & (length(x)>6)){
+    
+    retVal = fit3peaks(x2,x,y,index,scale,resol,useBounds,plot,FQ,int.factor)
+    
+    # outside range
+    if (retVal$mean[1]<x[1] | retVal$mean[1]>x[length(x)] | # <=== one of means outside range
+        retVal$mean[2]<x[1] | retVal$mean[2]>x[length(x)] |
+        retVal$mean[3]<x[1] | retVal$mean[3]>x[length(x)]) {
+      
+      # check quality
+      if (retVal$qual > FQ) { # <=== bad fit 
+        
+        # do it again
+        return(fitGaussian(x2,x,y,index,scale,resol,outdir,force,useBounds=TRUE,plot,scanmode,int.factor,width,height))
+        
+        # good fit
+      } else {
+        
+        # check which mean is outside range
+        # Todo ========> check this!!!!!!!!!!!!!!!
+        for (i in 1:length(retVal$mean)){
+          if (retVal$mean[i]<x[1] | retVal$mean[i]>x[length(x)] ) {
+            peak.mean = c(peak.mean, -i)
+            peak.area = c(peak.area, -i)
+          } else {
+            peak.mean = c(peak.mean, retVal$mean[i])
+            peak.area = c(peak.area, retVal$area[i])
+          }
+        }
+        peak.qual = retVal$qual
+        peak.min = x[1]
+        peak.max = x[length(x)]
+      }
+      
+    } else { # <=== all means within range
+      
+      if (retVal$qual > FQ) { # <=== bad fit 
+        
+        # Try to fit four curves
+        new_index=which(diff(sign(diff(y)))==-2)+1
+        
+        if (length(new_index)!=4) {
+          new_index = round(length(x)/5)
+          new_index = c(new_index, 2*new_index, 3*new_index, 4*new_index)
+        }
+        
+        #length(new_index)
+        return(fitGaussian(x2,x,y,new_index,scale,resol,outdir,force=4,useBounds=FALSE,plot,scanmode,int.factor,width,height))
+        
+      } else { # <======== good fit
+        
+        # check if means are within 3 ppm and sum if so  
+        tmp = retVal$qual 
+        
+        nMeanNew = -1
+        nMean = length(retVal$mean)
+        while (nMean!=nMeanNew){
+          nMean = length(retVal$mean)
+          retVal = isWithinXppm(retVal$mean, retVal$scale, retVal$sigma, retVal$area, x2, x, ppm=4, resol, plot)
+          nMeanNew = length(retVal$mean)
+        }
+        
+        retVal$qual = tmp
+        
+        h2=NULL
+        #         peak.mean=NULL
+        #         peak.area=NULL
+        
+        for (i in 1:length(retVal$mean)){
+          h2 = c(h2, paste("mean =", retVal$mean[i], sep=" "))
+          
+          peak.mean = c(peak.mean, retVal$mean[i])
+          peak.area = c(peak.area, retVal$area[i])
+        }
+        peak.qual = retVal$qual
+        peak.min = x[1]
+        peak.max = x[length(x)]
+        
+        h2 = c(h2, paste("fq =", retVal$qual, sep=" "))
+        if (plot) legend("topright", legend=h2)
+        
+      }  
+    }
+    
+    # Four local max  
+  } else if (force==4 & (length(x)>6)){
+    
+    retVal = fit4peaks(x2,x,y,index,scale,resol,useBounds,plot,FQ,int.factor)
+    
+    if (retVal$mean[1]<x[1] | retVal$mean[1]>x[length(x)] |
+        retVal$mean[2]<x[1] | retVal$mean[2]>x[length(x)] |
+        retVal$mean[3]<x[1] | retVal$mean[3]>x[length(x)] |
+        retVal$mean[4]<x[1] | retVal$mean[4]>x[length(x)]) {
+      
+      # check quality
+      if (retVal$qual > FQ) { # <=== bad fit 
+        
+        # do it again
+        return(fitGaussian(x2,x,y,index,scale,resol,outdir,force,useBounds=TRUE,plot,scanmode,int.factor,width,height))
+        
+        # good fit
+      } else {
+        
+        # check which mean is outside range
+        # Todo ========> check this!!!!!!!!!!!!!!!
+        for (i in 1:length(retVal$mean)){
+          if (retVal$mean[i]<x[1] | retVal$mean[i]>x[length(x)] ) {
+            peak.mean = c(peak.mean, -i)
+            peak.area = c(peak.area, -i)
+          } else {
+            peak.mean = c(peak.mean, retVal$mean[i])
+            peak.area = c(peak.area, retVal$area[i])
+          }
+        }
+        peak.qual = retVal$qual
+        peak.min = x[1]
+        peak.max = x[length(x)]
+      }
+      
+    } else {
+      
+      if (retVal$qual > FQ) { # <=== bad fit 
+        
+        # Generate 1 curve
+        return(fitGaussian(x2,x,y,index,scale,resol,outdir,force=5,useBounds=FALSE,plot,scanmode,int.factor,width,height))
+        
+      } else { # <======== good fit
+        
+        # check if means are within 3 ppm and sum if so  
+        tmp = retVal$qual 
+        
+        nMeanNew = -1
+        nMean = length(retVal$mean)
+        while (nMean!=nMeanNew){
+          nMean = length(retVal$mean)
+          retVal = isWithinXppm(retVal$mean, retVal$scale, retVal$sigma, retVal$area, x2, x, ppm=4, resol, plot)
+          nMeanNew = length(retVal$mean)
+        }
+        
+        retVal$qual = tmp
+        
+        h2=NULL
+        
+        for (i in 1:length(retVal$mean)){
+          h2 = c(h2, paste("mean =", retVal$mean[i], sep=" "))
+          
+          peak.mean = c(peak.mean, retVal$mean[i])
+          peak.area = c(peak.area, retVal$area[i])
+        }
+        peak.qual = retVal$qual
+        peak.min = x[1]
+        peak.max = x[length(x)]
+        
+        h2 = c(h2, paste("fq =", retVal$qual, sep=" "))
+        if (plot) legend("topright", legend=h2)
+      }  
+    }
+    
+    # More then four local max  
+  } else {
+    
+    scale=2
+    FQ1=0.40
+    useBounds=TRUE
+    index=which(y==max(y))
+    retVal = fit1Peak(x2,x,y,index,scale,resol,plot,FQ1,useBounds)
+    
+    if (retVal$qual > FQ1){ # <=== bad fit
+      
+      if (plot) dev.off()    
+      
+      rval = generateGaussian(x,y,resol,plot,scanmode,int.factor, width, height)
+      peak.mean = c(peak.mean, rval$mean)
+      peak.area = c(peak.area, rval$area)
+      peak.min = rval$min
+      peak.max = rval$max
+      peak.qual = 0
+      
+    } else { # <=== good fit
+      
+      peak.mean = c(peak.mean, retVal$mean)
+      peak.area = c(peak.area, getArea(retVal$mean,resol,retVal$scale,retVal$sigma,int.factor))
+      peak.qual = retVal$qual
+      peak.min = x[1]
+      peak.max = x[length(x)]
+    }
+  }
+  
+  return(list("mean"=peak.mean, "area"=peak.area, "qual" = peak.qual, "min"=peak.min, "max"=peak.max))
+}
diff --git a/DIMS/AddOnFunctions/fitGaussianInit.R b/DIMS/AddOnFunctions/fitGaussianInit.R
new file mode 100644
index 0000000..1c92bce
--- /dev/null
+++ b/DIMS/AddOnFunctions/fitGaussianInit.R
@@ -0,0 +1,60 @@
+fitGaussianInit <- function(x,y,int.factor,scale,resol,outdir,sampname,scanmode,plot,width,height,i,start,end) {
+  # scanmode="negative"
+  
+  mz.range = x[length(x)] - x[1]
+  x2 = seq(x[1],x[length(x)],length=mz.range*int.factor)
+  
+  #   # diff(diff(x)) essentially computes the discrete analogue of the second derivative, so should be negative at local maxima.
+  #   # The +1 below takes care of the fact that the result of diff is shorter than the input vector.
+  #   index=which(diff(sign(diff(y)))==-2)+1
+  
+  # Alway try to fit one curve first
+  index = which(y==max(y))
+  
+  if (scanmode=="positive"){
+    plot_label="pos_fit.png"
+  } else {
+    plot_label="neg_fit.png"
+  }
+  
+  if (plot) {
+    CairoPNG(filename=paste(outdir,"Gaussian_fit",paste(sampname, x[1], plot_label, sep="_"), sep="/"), width, height)
+    plot(x,y,xlab="m/z",ylab="I", ylim=c(0,1.5*max(y)),main=paste(i,start,end, sep=" ")) #, ylim=c(0,1e+05)
+  }
+  
+  retVal = fitGaussian(x2,x,y,index,scale,resol,outdir,force=length(index),useBounds=FALSE,plot,scanmode,int.factor,width,height)
+  
+  if (plot) {
+    if (length(retVal$mean)==1) {
+      
+      result = tryCatch(dev.off(), warning = function(w){},
+                        error=function(e){},
+                        finally = {})
+      
+      # file.rename(paste(outdir,"Gaussian_fit", paste(sampname, x[1], plot_label, sep="_"), sep="/"),
+      #             paste(outdir,"Gaussian_fit", paste(sampname, round(retVal$mean,digits = 6), plot_label, sep="_"), sep="/"))
+    } else {
+      
+      
+      h2=NULL
+      for (i in 1:length(retVal$mean)){
+        h2=c(h2, paste("mean =", retVal$mean[i], sep=" "))
+      }
+      h2=c(h2, paste("fq =", retVal$qual, sep=" "))
+      legend("topright", legend=h2)
+      
+      dev.off()
+      
+      for (i in 1:length(retVal$mean)){
+        if (retVal$mean[i]!=-1){
+          file.copy(paste(outdir,"Gaussian_fit", paste(sampname, x[1], plot_label, sep="_"), sep="/"),
+                    paste(outdir,"Gaussian_fit", paste(sampname, round(retVal$mean[i],digits = 6), plot_label, sep="_"), sep="/"))
+        }
+      }
+      file.remove(paste(outdir,"Gaussian_fit", paste(sampname, x[1], plot_label, sep="_"), sep="/"))
+    }
+  }
+  
+  return(list("mean"=retVal$mean, "area"=retVal$area, "qual"=retVal$qual, "min"=retVal$min, "max"=retVal$max))
+  
+}
diff --git a/DIMS/AddOnFunctions/generateBreaksFwhm.R b/DIMS/AddOnFunctions/generateBreaksFwhm.R
new file mode 100644
index 0000000..1a42631
--- /dev/null
+++ b/DIMS/AddOnFunctions/generateBreaksFwhm.R
@@ -0,0 +1,26 @@
+# options(digits=16)
+# resol = 140000
+# breaks.fwhm = 1
+# i = 1
+# breaks.fwhm.avg = NULL
+#
+# Sys.time()
+#
+# while (breaks.fwhm[length(breaks.fwhm)]<1000){
+#
+#   resol.mz = resol*(1/sqrt(2)^(log2(breaks.fwhm[i]/200)))
+#   fwhm.0.1 = (breaks.fwhm[i]/resol.mz)/10
+#   breaks.fwhm = c(breaks.fwhm, breaks.fwhm[i] + fwhm.0.1)
+#   breaks.fwhm.avg = c(breaks.fwhm.avg,(breaks.fwhm[i] +breaks.fwhm[i+1])/2)
+#
+#   if (i %% 10000 == 0){
+#     cat(paste("i =", i))
+#     cat(paste("breaks.fwhm =", breaks.fwhm[length(breaks.fwhm)]))
+#   }
+#
+#   i = i + 1
+# }
+#
+# Sys.time()
+# save(list=c("breaks.fwhm", "breaks.fwhm.avg"), file="breaks.RData")
+# cat("Breaks saved!")
diff --git a/DIMS/AddOnFunctions/generateExcelFile.R b/DIMS/AddOnFunctions/generateExcelFile.R
new file mode 100644
index 0000000..31474d9
--- /dev/null
+++ b/DIMS/AddOnFunctions/generateExcelFile.R
@@ -0,0 +1,24 @@
+generateExcelFile <- function(peaklist,  fileName, sub, plot = TRUE) {
+  # plotdir=file.path(plotdir)
+  # imageNum=2
+  # fileName="./results/xls/Pos_allpgrps_identified"
+  # subName=c("","_box")
+  
+  end <- 0
+  i <- 0
+  
+  if (dim(peaklist)[1]>=sub & (sub>0)){
+    for (i in 1:floor(dim(peaklist)[1]/sub)){
+      start=-(sub-1)+i*sub
+      end=i*sub
+      message(paste0(start, ":", end))
+      
+      genExcelFileV3(peaklist[c(start:end),], paste(fileName, sep="_"), plot)
+    }
+  }  
+  start = end + 1
+  end = dim(peaklist)[1]
+  message(start)
+  message(end)
+  genExcelFileV3(peaklist[c(start:end),], paste(fileName, i+1, sep="_"), plot)
+}
diff --git a/DIMS/AddOnFunctions/generateGaussian.R b/DIMS/AddOnFunctions/generateGaussian.R
new file mode 100644
index 0000000..da6dec2
--- /dev/null
+++ b/DIMS/AddOnFunctions/generateGaussian.R
@@ -0,0 +1,48 @@
+generateGaussian <- function(x,y,resol,plot,scanmode,int.factor,width,height) {
+  
+  factor=1.5
+  index = which(y==max(y))[1]
+  x=x[index]
+  y=y[index]
+  mu = x
+  fwhm = getFwhm(mu,resol)
+  x.p = c(mu-factor*fwhm, x, mu+factor*fwhm)
+  y.p = c(0, y, 0)
+  
+  #   if (plot) dir.create("./results/plots",showWarnings = FALSE)
+  #   if (plot) dir.create("./results/plots/Gaussian_fit",showWarnings = FALSE)
+  
+  if (plot) {
+    if (scanmode=="positive"){
+      plot_label="pos_fit.png"
+    } else {
+      plot_label="neg_fit.png"
+    }
+  }
+  
+  mz.range = x.p[length(x.p)] - x.p[1]
+  x2 = seq(x.p[1],x.p[length(x.p)],length=mz.range*int.factor)
+  sigma = getSD(x.p,y.p)
+  scale = optimizeGauss(x.p,y.p,sigma,mu)
+  
+  if (plot) {
+    CairoPNG(filename=paste("./results/Gaussian_fit",paste(sampname, mu, plot_label, sep="_"), sep="/"), width, height)
+    
+    plot(x.p,y.p,xlab="m/z",ylab="I", ylim=c(0,1.5*max(y)))
+    lines(x2,scale*dnorm(x2,mu,sigma), col="green")
+    half_max = max(scale*dnorm(x2,mu,sigma))*0.5
+    lines(c(mu - 0.5*fwhm, mu + 0.5*fwhm),c(half_max,half_max),col="orange")
+    abline(v = mu, col="green")
+    h=c(paste("mean =", mu, sep=" "))
+    legend("topright", legend=h)
+    
+    dev.off()
+  }
+  
+  #  area = sum(scale*dnorm(x2,mu,sigma))
+  #  area = max(scale*dnorm(x2,mu,sigma))
+  area = getArea(mu,resol,scale,sigma,int.factor)
+  
+  return(list("mean"=mu, "area"=area, "min"=x2[1] , "max"=x2[length(x2)]))
+  
+}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/getArea.R b/DIMS/AddOnFunctions/getArea.R
new file mode 100644
index 0000000..415b050
--- /dev/null
+++ b/DIMS/AddOnFunctions/getArea.R
@@ -0,0 +1,23 @@
+getArea <- function(mu,resol,scale,sigma,int.factor){
+  
+  #   mu=p3[1]
+  #   scale=p3[2]
+  #   sigma=sigma1
+  
+  #   mu=p3[1]
+  #   scale=p3[2]
+  #   sigma=sigma1
+  
+  # avoid too big vector (cannot allocate vector of size ...)
+  if (mu>1200) return(0)
+  
+  fwhm = getFwhm(mu,resol)
+  mzMin = mu - 2*fwhm
+  mzMax = mu + 2*fwhm
+  mz.range = mzMax - mzMin 
+  x_int = seq(mzMin,mzMax,length=mz.range*int.factor)
+  
+  #plot(x_int,scale*dnorm(x_int,mu,sigma),col="red",type="l")
+  
+  return(sum(scale*dnorm(x_int,mu,sigma))/100)  
+}
diff --git a/DIMS/AddOnFunctions/getDeltaMZ.R b/DIMS/AddOnFunctions/getDeltaMZ.R
new file mode 100644
index 0000000..f09a0a4
--- /dev/null
+++ b/DIMS/AddOnFunctions/getDeltaMZ.R
@@ -0,0 +1,5 @@
+getDeltaMZ <- function(mass, breaks.fwhm){
+  index = which(breaks.fwhm<mass)
+  tmp = index[length(index)]
+  return(breaks.fwhm[tmp+1] - breaks.fwhm[tmp]) 
+}
diff --git a/DIMS/AddOnFunctions/getFitQuality.R b/DIMS/AddOnFunctions/getFitQuality.R
new file mode 100644
index 0000000..f9197c8
--- /dev/null
+++ b/DIMS/AddOnFunctions/getFitQuality.R
@@ -0,0 +1,65 @@
+getFitQuality <- function(x,y,muFirst,muLast,resol,scale=NULL,sigma=NULL,sumFit=NULL){
+  # muFirst=sort(c(p5[1],p5[3],p5[5],p5[7]))[1]
+  # muLast=sort(c(p5[1],p5[3],p5[5],p5[7]))[4]
+  
+  # muFirst=p2[1]
+  # muLast=p2[1]
+  # scale=p2[2]
+  
+  # x,y,sort(c(p5[1],p5[3],p5[5],p5[7]))[1],sort(c(p5[1],p5[3],p5[5],p5[7]))[4],resol,sumFit=sumFit
+  
+  if (is.null(sumFit)){
+    # fwhm = getFwhm(muFirst,resol)
+    # intRangeMin = muFirst - 1.2*fwhm
+    # intRangeMax = muFirst + 1.2*fwhm
+    # tmp = which((x>intRangeMin) & (x<intRangeMax))
+    
+    # # mu outside range!!!
+    # if (length(tmp)==0) return(list("fq_new"=1, "x_int"=x, "y_int"=y))
+    # 
+    # x_int = x[tmp]
+    # y_int = y[tmp] 
+    
+    x_int = x
+    y_int = y
+    
+    # fq_new = mean(abs((scale*dnorm(x_int,muFirst,sigma)) - y_int)/(scale*dnorm(x_int,muFirst,sigma)))
+    fq_new = mean(abs((scale*dnorm(x_int,muFirst,sigma)) - y_int)/rep((max(scale*dnorm(x_int,muFirst,sigma))/2),length(x_int)))
+    
+    # lines(x_int,scale*dnorm(x_int,muFirst,sigma), col="blue")
+    # fq_new = abs(sum(y) - sum(p2[2]*dnorm(x,p2[1],sigma)))/sum(y)
+    
+  } else {
+    # fwhm = getFwhm(muFirst,resol)
+    # intRangeMin = muFirst - 1.2*fwhm
+    # fwhm = getFwhm(muLast,resol)
+    # intRangeMax = muLast + 1.2*fwhm
+    # 
+    # tmp = which((x>intRangeMin) & (x<intRangeMax))
+    # 
+    # if (length(tmp)==0){
+    #   # outside range
+    #   fq_new = 1
+    #   x_int = x
+    #   y_int = y
+    #   
+    # } else {
+    
+    # x_int = x[tmp]
+    # y_int = y[tmp]
+    # sumFit_int = sumFit[tmp] 
+    
+    sumFit_int = sumFit
+    y_int = y
+    x_int = x
+    
+    # fq_new = mean(abs(sumFit_int - y_int)/sumFit_int)
+    fq_new = mean(abs(sumFit_int - y_int)/rep(max(sumFit_int)/2,length(sumFit_int)))
+    # }    
+  }
+  
+  # Caused by dividing by 0
+  if (is.nan(fq_new)) fq_new=1 
+  
+  return(list("fq_new"=fq_new, "x_int"=x_int, "y_int"=y_int))  
+}
diff --git a/DIMS/AddOnFunctions/getFwhm.R b/DIMS/AddOnFunctions/getFwhm.R
new file mode 100644
index 0000000..890a865
--- /dev/null
+++ b/DIMS/AddOnFunctions/getFwhm.R
@@ -0,0 +1,24 @@
+getFwhm <- function(mu,resol) {   
+  
+  if (is.nan(mu)) mu=0
+  if (is.na(mu)) mu=0
+  if (is.null(mu)) mu=0
+  if (mu<0) mu=0
+  
+  # tryCatch({
+  resol.mz = resol*(1/sqrt(2)^(log2(mu/200)))
+  fwhm = mu/resol.mz
+  #   }, warning = function(resol.mz, fwhm) {
+  #     message(warnings())
+  #     message(paste("====================> mu: ",mu))
+  #     message(paste("====================> resol.mz: ",resol.mz))
+  #   })
+  
+  #   , error = function(e) {
+  #     error-handler-code
+  #   }, finally = {
+  #     cleanup-code
+  #   })
+  
+  return(fwhm)
+}
diff --git a/DIMS/AddOnFunctions/getPatients.R b/DIMS/AddOnFunctions/getPatients.R
new file mode 100644
index 0000000..3b03f0a
--- /dev/null
+++ b/DIMS/AddOnFunctions/getPatients.R
@@ -0,0 +1,9 @@
+getPatients <- function(peaklist){
+  patients=colnames(peaklist)[grep("P", colnames(peaklist), fixed = TRUE)]
+  patients=unique(as.vector(unlist(lapply(strsplit(patients, ".", fixed = TRUE), function(x) x[1]))))
+  # ToDo: If 2 P's in sample names!!!!!!!!!!!!!
+  # patients=sort(as.numeric(unique(as.vector(unlist(lapply(strsplit(patients, "_P", fixed = TRUE), function(x) x[2]))))))
+  patients=sort(as.numeric(unique(as.vector(unlist(lapply(strsplit(patients, "P", fixed = TRUE), function(x) x[2]))))))
+  
+  return(patients)
+}
diff --git a/DIMS/AddOnFunctions/getSD.R b/DIMS/AddOnFunctions/getSD.R
new file mode 100644
index 0000000..197a002
--- /dev/null
+++ b/DIMS/AddOnFunctions/getSD.R
@@ -0,0 +1,9 @@
+getSD <- function(x,y,resol=140000) {   
+  
+  index = which(y==max(y))
+  mean = x[index]
+  resol.mz = resol*(1/sqrt(2)^(log2(mean/200)))
+  fwhm = mean/resol.mz
+  sd = (fwhm/2)*0.85 
+  return(sd)
+}
diff --git a/DIMS/AddOnFunctions/get_patient_data_to_helix.R b/DIMS/AddOnFunctions/get_patient_data_to_helix.R
new file mode 100644
index 0000000..c582177
--- /dev/null
+++ b/DIMS/AddOnFunctions/get_patient_data_to_helix.R
@@ -0,0 +1,31 @@
+get_patient_data_to_helix <- function(metab_interest_sorted, metab_list_all){
+  # Combine Z-scores of metab groups together
+  df_all_metabs_zscores <- bind_rows(metab_interest_sorted)
+  # Change columnnames 
+  colnames(df_all_metabs_zscores) <- c("HMDB_name", "Patient", "Z_score")
+  # Change Patient column to character instead of factor
+  df_all_metabs_zscores$Patient <- as.character(df_all_metabs_zscores$Patient)
+  
+  # Delete whitespaces HMDB_name 
+  df_all_metabs_zscores$HMDB_name <- str_trim(df_all_metabs_zscores$HMDB_name, "right")
+  
+  # Split HMDB_name column on "nitine;" for match dims_helix_table
+  df_all_metabs_zscores$HMDB_name_split <- str_split_fixed(df_all_metabs_zscores$HMDB_name, "nitine;", 2)[,1]
+  
+  # Combine stofgroepen
+  dims_helix_table <- bind_rows(metab_list_all)
+  # Filter table for metabolites for Helix
+  dims_helix_table <- dims_helix_table %>% filter(Helix == "ja")
+  # Split HMDB_name column on "nitine;" for match df_all_metabs_zscores
+  dims_helix_table$HMDB_name_split <- str_split_fixed(dims_helix_table$HMDB_name, "nitine;", 2)[,1]
+  dims_helix_table <- dims_helix_table %>% select(HMDB_name_split, Helix_naam, high_zscore, low_zscore)
+  
+  # Filter DIMS results for metabolites for Helix
+  df_metabs_helix <- df_all_metabs_zscores %>% filter(HMDB_name_split %in% dims_helix_table$HMDB_name_split)
+  # Combine dims_helix_table and df_metabs_helix, adding Helix codes etc.
+  df_metabs_helix <- df_metabs_helix %>% left_join(dims_helix_table, by = join_by(HMDB_name_split))
+  
+  df_metabs_helix <- df_metabs_helix %>% select(HMDB_name, Patient, Z_score, Helix_naam, high_zscore, low_zscore)
+  
+  return(df_metabs_helix)
+}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/globalAssignments.HPC.R b/DIMS/AddOnFunctions/globalAssignments.HPC.R
new file mode 100644
index 0000000..9207661
--- /dev/null
+++ b/DIMS/AddOnFunctions/globalAssignments.HPC.R
@@ -0,0 +1,114 @@
+#.libPaths(new="/hpc/local/CentOS7/dbg_mz/R_libs/3.2.2")
+## calculate relative abundancies from theoretical mass and composition
+
+#library(Rdisop)
+
+#theor.table <- read.table(file="C:/Users/mraves/Metabolomics/TheoreticalMZ_Negative_composition.txt", sep="\t", header=TRUE)
+options(digits=16)
+
+#library(OrgMassSpecR)
+
+suppressPackageStartupMessages(library(lattice))
+# The following list was copied from Rdisop elements.R and corrected for C, H, O, Cl, S according to NIST
+Ba <- list(name= 'Ba', mass=130, isotope=list(mass=c(-0.093718, 0, -0.094958, 0, -0.095514, -0.094335, -0.095447, -0.094188, -0.094768),                                      abundance=c(0.00106, 0, 0.00101, 0, 0.02417, 0.06592, 0.07854, 0.1123, 0.717)))
+Br <- list(name= 'Br', mass=79,  isotope=list(mass=c(-0.0816639, 0, -0.083711),                                                                                               abundance=c(0.5069, 0, 0.4931)))
+C <-  list(name= 'C',  mass=12,  isotope=list(mass=c(0, 0.003354838, 0.003241989),                                                                                            abundance=c(0.9893, 0.0107, 0)))
+Ca <- list(name= 'Ca', mass=40,  isotope=list(mass=c(-0.0374094, 0, -0.0413824, -0.0412338, -0.0445194, 0, -0.046311, 0, -0.047467),                                          abundance=c(0.96941, 0, 0.00647, 0.00135, 0.02086, 0, 4e-05, 0, 0.00187)))
+Cl <- list(name= 'Cl', mass=35,  isotope=list(mass=c(-0.03114732, 0, -0.03409741),                                                                                            abundance=c(0.7576, 0, 0.2424)))
+Cr <- list(name= 'Cr', mass=50,  isotope=list(mass=c(-0.0539536, 0, -0.0594902, -0.0593487, -0.0611175),                                                                      abundance=c(0.04345, 0, 0.83789, 0.09501, 0.02365)))
+Cu <- list(name= 'Cu', mass=63,  isotope=list(mass=c(-0.0704011, 0, -0.0722071),                                                                                              abundance=c(0.6917, 0, 0.3083)))
+F <-  list(name= 'F',  mass=19,  isotope=list(mass=c(-0.00159678),                                                                                                            abundance=c(1)))
+Fe <- list(name= 'Fe', mass=54,  isotope=list(mass=c(-0.0603873, 0, -0.0650607, -0.0646042, -0.0667227),                                                                      abundance=c(0.058, 0, 0.9172, 0.022, 0.0028)))
+H <-  list(name= 'H',  mass=1,   isotope=list(mass=c(0.00782503207, 0.014101778, 0.01604928),                                                                                 abundance=c(0.999885, 0.000115, 0)))
+Hg <- list(name= 'Hg', mass=196, isotope=list(mass=c(-0.034193, 0, -0.033257, -0.031746, -0.0317, -0.029723, -0.029383, 0, -0.026533),                                        abundance=c(0.0015, 0, 0.0997, 0.1687, 0.231, 0.1318, 0.2986, 0, 0.0687)))
+I <-  list(name= 'I',  mass=127, isotope=list(mass=c(-0.095527),                                                                                                              abundance=c(1)))
+K <-  list(name= 'K',  mass=39,  isotope=list(mass=c(-0.0362926, -0.0360008, -0.0381746),                                                                                     abundance=c(0.932581, 0.000117, 0.067302)))
+Li <- list(name= 'Li', mass=6,   isotope=list(mass=c(0.0151214, 0.016003),                                                                                                    abundance=c(0.075, 0.925)))
+Mg <- list(name= 'Mg', mass=24,  isotope=list(mass=c(-0.0149577, -0.0141626, -0.0174063),                                                                                     abundance=c(0.7899, 0.1, 0.1101)))
+Mn <- list(name= 'Mn', mass=55,  isotope=list(mass=c(-0.0619529),                                                                                                             abundance=c(1)))
+N <-  list(name= 'N',  mass=14,  isotope=list(mass=c(0.003074002, 0.00010897),                                                                                                abundance=c(0.99634, 0.00366)))
+Na <- list(name= 'Na', mass=23,  isotope=list(mass=c(-0.0102323),                                                                                                             abundance=c(1)))
+Ni <- list(name= 'Ni', mass=58,  isotope=list(mass=c(-0.0646538, 0, -0.0692116, -0.0689421, -0.0716539, 0, -0.0720321),                                                       abundance=c(0.68077, 0, 0.26223, 0.0114, 0.03634, 0, 0.00926)))
+O <-  list(name= 'O',  mass=16,  isotope=list(mass=c(-0.00508538044, -0.0008683, -0.0008397),                                                                                 abundance=c(0.99757, 0.000381, 0.00205)))
+P <-  list(name= 'P',  mass=31,  isotope=list(mass=c(-0.026238),                                                                                                              abundance=c(1)))
+S <-  list(name= 'S',  mass=32,  isotope=list(mass=c(-0.027929, -0.02854124, -0.0321331, 0, -0.03291924),                                                                     abundance=c(0.9499, 0.0075, 0.0425, 0, 1e-04)))
+Se <- list(name= 'Se', mass=74,  isotope=list(mass=c(-0.0775254, 0, -0.080788, -0.0800875, -0.0826924, 0, -0.0834804, 0, -0.0833022),                                         abundance=c(0.0089, 0, 0.0936, 0.0763, 0.2378, 0, 0.4961, 0, 0.0873)))
+Si <- list(name= 'Si', mass=28,  isotope=list(mass=c(-0.0230729, -0.0235051, -0.0262293),                                                                                     abundance=c(0.9223, 0.0467, 0.031)))
+Sn <- list(name= 'Sn', mass=112, isotope=list(mass=c(-0.095174, 0, -0.097216, -0.096652, -0.098253, -0.097044, -0.098391, -0.09669, -0.0978009, 0, -0.0965596, 0, -0.0947257),abundance=c(0.0097, 0, 0.0065, 0.0034, 0.1453, 0.0768, 0.2423, 0.0859, 0.3259, 0, 0.0463, 0, 0.0579)))
+Zn <- list(name= 'Zn', mass=64,  isotope=list(mass=c(-0.0708552, 0, -0.0739653, -0.0728709, -0.0751541, 0, -0.074675),                                                        abundance=c(0.486, 0, 0.279, 0.041, 0.188, 0, 0.006)))
+
+NH4 <- list(name= "NH4", mass=18, isotope=list(mass=c(0.03437, 0.03141, -0.95935)), abundance=c(0.995, 0.004, 0.001)) # SISweb: 18.03437 100 19.03141 0.4 19.04065 0.1
+Ac <- list(name= "Ac", mass=60, isotope=list(mass=c(0.02114, 0.02450, 0.02538)), abundance=c(0.975, 0.021, 0.004))   # SISweb: 60.02114 100 61.02450 2.2 62.02538 0.4
+NaCl <- list(name= "NaCl", mass=58, isotope=list(mass=c(-0.04137, 0, -0.04433)), abundance=c(0.755, 0, 0.245))   # SISweb: 57.95862 100 59.95567 32.4
+NaCl2 <- list(name= "NaCl2", mass=116, isotope=list(mass=c(-0.08274, 0, -0.08866)), abundance=c(0.755, 0, 0.245))   # SISweb: 57.95862 100 59.95567 32.4
+NaCl3 <- list(name= "NaCl3", mass=174, isotope=list(mass=c(-0.12411, 0, -0.13299)), abundance=c(0.755, 0, 0.245))   # SISweb: 57.95862 100 59.95567 32.4
+NaCl4 <- list(name= "NaCl4", mass=232, isotope=list(mass=c(-0.16548, 0, -0.17732)), abundance=c(0.755, 0, 0.245))   # SISweb: 57.95862 100 59.95567 32.4
+NaCl5 <- list(name= "NaCl5", mass=290, isotope=list(mass=c(-0.20685, 0, -0.22165)), abundance=c(0.755, 0, 0.245))   # SISweb: 57.95862 100 59.95567 32.4
+For <- list(name= "For", mass=45, isotope=list(mass=c(-0.00233, 0.00103)), abundance=c(0.989, 0.011))   # SISweb: 46.00549 100 47.00885 1.1 (47.0097 0.1) 48.00973 0.4
+Na2 <- list(name= "2Na-H", mass=46, isotope=list(mass=c(-1.0282896)), abundance=c(1))  # SISweb for Na2: 45.97954 100 # minus 1 H !
+Met <- list(name= "CH3OH", mass=32, isotope=list(mass=c(1.034045,1.037405)), abundance=c(0.989,0.011)) # SISweb: 32.02622 100 33.02958 1.1 33.0325 0.1 34.03046 0.2
+CH3OH <- list(name= "CH3OH", mass=32, isotope=list(mass=c(1.034045,1.037405)), abundance=c(0.989,0.011)) # SISweb: 32.02622 100 33.02958 1.1 33.0325 0.1 34.03046 0.2
+Na3 <- list(name= "3Na-2H", mass=69, isotope=list(mass=c(-2.0463469)), abundance=c(1))  # SISweb for Na2: 45.97954 100 # minus 1 H !
+KCl <- list(name= "KCl", mass=74, isotope=list(mass=c(-0.06744,0.92961,-0.06744,0.92772)), abundance=c(0.7047,0.2283,0.0507,0.0162))   # SISweb: 73.93256 100 75.92961 32.4 75.93067 7.2 77.92772 2.3
+H2PO4 <- list(name= "H2PO4", mass=97, isotope=list(mass=c(-0.03091)), abundance=c(1))
+HSO4 <- list(name= "HSO4", mass=97, isotope=list(mass=c(-0.04042,0,-0.04462)), abundance=c(0.96,0,0.04))
+Met2 <- list(name= "Met2", mass=64, isotope=list(mass=c(1.060265,1.013405)), abundance=c(0.978,0.022))
+Met3 <- list(name= "Met3", mass=96, isotope=list(mass=c(1.086485,1.089845)), abundance=c(0.969,0.031))
+Met4 <- list(name= "Met4", mass=128, isotope=list(mass=c(1.112705,1.116065)), abundance=c(0.959,0.041))
+Met5 <- list(name= "Met5", mass=160, isotope=list(mass=c(1.20935,1.142285)), abundance=c(0.949,0.051))
+NaminH <- list(name= "Na-H", mass=21, isotope=list(mass=c(-0.02571416)), abundance=c(1))
+KminH <- list(name= "K-H", mass=37, isotope=list(mass=c(-0.05194,0.94617)), abundance=c(0.9328,0.0672))
+H2O <- list(name= "H2O", mass=-19, isotope=list(mass=c(-0.01894358)), abundance=c(1))
+NaK <- list(name= "NaK-H", mass=61, isotope=list(mass=c(-0.054345,0.943765)), abundance=c(0.9328,0.0672))
+min2H <- list(name= "min2H", mass=-2, isotope=list(mass=c(-0.0151014)), abundance=c(1))
+plus2H <- list(name= "plus2H", mass=2, isotope=list(mass=c(0.0151014)), abundance=c(1))
+plus2Na <- list(name= "plus2Na", mass=46,  isotope=list(mass=c(-0.02046), abundance=c(1)))
+plusNaH <- list(name= "plusNaH", mass=24,  isotope=list(mass=c(-0.00295588), abundance=c(1)))
+plusKH <- list(name= "plusKH", mass=40, isotope=list(mass=c(-0.029008,0.969101)), abundance=c(0.9328,0.0672))
+plusHNH <- list(name= "plusHNH", mass=19, isotope=list(mass=c(0.04164642)), abundance=c(1))
+min3H <- list(name= "min3H", mass=-3, isotope=list(mass=c(-0.02182926)), abundance=c(1))
+plus3H <- list(name= "plus3H", mass=3, isotope=list(mass=c(0.02182926)), abundance=c(1))
+plus3Na <- list(name= "plus3Na", mass=68, isotope=list(mass=c(0.96931)), abundance=c(1))
+plus2NaH <- list(name= "plus2NaH", mass=47, isotope=list(mass=c(0.2985712)), abundance=c(1))
+plusNa2H <- list(name= "plusNa2H", mass=25, isotope=list(mass=c(0.00432284)), abundance=c(1))
+
+Cl37 <- list(name= "Cl37", mass=37,  isotope=list(mass=c(-0.03409741)),  abundance=c(1))
+
+allelements <- list(Ba, Br, C, Ca, Cl, Cr, Cu, F, Fe, H, Hg, I, K, Li, Mg, Mn, N, Na, Ni, O, P, S, Se, Si, Sn, Zn)
+allAdducts <- list(Ba, Br, Ca, Cl, Cl37, Cr, Cu, Fe, Hg, I, K, Li, Mg, Mn, Na, Ni, Se, Si, Sn, Zn, NH4, Ac, NaCl, For,Na2,CH3OH,NaCl2,NaCl3,NaCl4,NaCl5,Na3,KCl,H2PO4,HSO4,Met2,Met3,Met4,Met5,NaminH,KminH,H2O,NaK,min2H,plus2H,plus2Na,plusNaH,plusKH,min3H,plus3H,plusHNH,plus3Na,plus2NaH,plusNa2H)
+
+#atomsinuse <- c("P",     "O",         "N",           "C",     "H",          "S",        "Cl")
+#atomicWts <- c(30.97376163, 15.99491463, 14.0030740052, 12.0000, 1.0078250321, 31.9720707, 34.968852721)
+#electron <- 0.00054858
+#Mol.comp <- c(0,4,0,0,1,1,0)
+#Mol.exact <- sum(Mol.comp * atomicWts) + electron
+#Mol.corr <- Mol.exact - 0.0022 + 0.000007*Mol.exact  # mass as found in peak group list
+#getMass(Mol) + electron - 0.0022 + 0.000007*getMass(Mol)
+
+#atomsinuse <- c("P",     "O",         "N",           "C",     "H",          "S",        "Cl",          "D",           "34S",          "18O")
+#atomicWts <- c(30.97376163, 15.99491463, 14.0030740052, 12.0000, 1.0078250321, 31.9720707, 34.968852721, 2.0141017778, 33.96786690, 17.9991610)
+#electron <- 0.00054858
+#Mol.comp <- c(0,4,0,0,1,1,0,0,0,0)  # main peak HSO4
+#Mol.comp <- c(0,4,0,0,0,1,0,1,0,0)  # deuterated
+#Mol.comp <- c(0,4,0,0,1,0,0,0,1,0)  # 34S
+#Mol.comp <- c(0,3,0,0,1,1,0,0,0,1)  # 18O
+#Mol.exact <- sum(Mol.comp * atomicWts) + electron
+#Mol.corr <- Mol.exact - 0.0022 + 0.000007*Mol.exact  # mass as found in peak group list
+
+atomsinuse <- c("P",     "O",         "N",           "C",     "H",          "S",        "Cl",          "D",           "13C",       "34S",          "18O",     "37Cl")
+atomicWts <- c(30.97376163, 15.99491463, 14.0030740052, 12.0000, 1.0078250321, 31.9720707, 34.968852721, 2.0141017778, 13.0033548378, 33.96786690, 17.9991610, 36.96590259)
+electron <- 0.00054858
+############# P O N C H  S Cl D 13C 34S 18O 37Cl
+#Mol.comp <- c(0,6,0,6,12,0,1, 0, 0, 0,  0,  0)  # main peak Galactose HCl negative ion
+#Mol.comp <- c(0,6,0,5,12,0,1, 0, 1, 0,  0,  0)  # 13C Galactose HCl negative ion
+#Mol.comp <- c(0,6,0,6,11,0,1, 1, 0, 0,  0,  0)  # D Galactose HCl negative ion
+#Mol.comp <- c(0,5,0,6,12,0,1, 0, 0, 0,  1,  0)  # 18O Galactose HCl negative ion
+#Mol.comp <- c(0,6,0,6,12,0,0, 0, 0, 0,  0,  1)  # 18O Galactose HCl negative ion
+#Mol.exact <- sum(Mol.comp * atomicWts) + electron
+#Mol.corr <- Mol.exact - 0.0022 + 0.000007*Mol.exact  # mass as found in peak group list
+
+Hmass <- H$mass + H$isotope$mass[1]
+Dmass <- H$mass + 1 + H$isotope$mass[2]
+Tmass <- H$mass + 2 + H$isotope$mass[3]
+C13mass <- C$mass + 1 + C$isotope$mass[2]
+N15mass <- N$mass + 1 + N$isotope$mass[2]
diff --git a/DIMS/AddOnFunctions/iden.code.R b/DIMS/AddOnFunctions/iden.code.R
new file mode 100644
index 0000000..4853647
--- /dev/null
+++ b/DIMS/AddOnFunctions/iden.code.R
@@ -0,0 +1,45 @@
+iden.code <- function(peaklist, db, ppm, theor_mass_label) {
+  # theor_mass_label = {"MNeg", "Mpos"}
+  
+  mcol <- peaklist[ , "mzmed.pgrp"]
+  theor.mcol <- db[,theor_mass_label]
+  assi_HMDB <- iso_HMDB <- HMDB_code <- c(rep("",length(mcol)))
+  theormz_HMDB <- c(rep(0,length(mcol)))
+  
+  for(t in 1:length(mcol)){
+    mz<-mcol[t]
+    mtol <- mz*ppm/1000000
+    selp <- which(theor.mcol > (mz - mtol) & theor.mcol < (mz + mtol))
+    assinames <- isonames <- HMDBcodes <- ""
+    
+    if(length(selp)!=0){
+      for(i in 1:length(selp)){
+        
+        if(grepl(" iso ", db[selp[i], "CompoundName"])){
+          mainpeak <- strsplit(db[selp[i],"CompoundName"]," iso ")[[1]][1]
+          
+          # Check if peak without isotope occurs, this assumes that peaklist is ordered on mass!!! 
+          if(length(grep(mainpeak, assi_HMDB, fixed=TRUE)) > 0){
+            isonames <- as.character(paste(isonames,as.character(db[selp[i],"CompoundName"]), sep=";"))
+          }
+          
+        } else {
+          assinames <- as.character(paste(assinames,as.character(db[selp[i], "CompoundName"]), sep=";"))
+          HMDBcodes <- as.character(paste(HMDBcodes, as.character(rownames(db)[selp[i]]), sep=";"))
+        }  
+      }
+    }  
+    
+    assi_HMDB[t] <- as.character(assinames)
+    iso_HMDB[t] <-  as.character(isonames)
+    if ((assinames=="") & (isonames=="")){ 
+      theormz_HMDB[t] <- ""
+    } else {
+      theormz_HMDB[t] <- theor.mcol[selp[1]]
+    }
+    HMDB_code[t] <- as.character(HMDBcodes)
+  }
+  
+  return(cbind(peaklist, assi_HMDB, iso_HMDB, theormz_HMDB, HMDB_code))
+}
+
diff --git a/DIMS/AddOnFunctions/ident.hires.noise.HPC.R b/DIMS/AddOnFunctions/ident.hires.noise.HPC.R
new file mode 100644
index 0000000..0be43d1
--- /dev/null
+++ b/DIMS/AddOnFunctions/ident.hires.noise.HPC.R
@@ -0,0 +1,242 @@
+# modified identify function to also look for adducts and their isotopes
+ident.hires.noise.HPC <- function(peaklist, allAdducts, scanmode="Negative", look4=c("Cl", "Ac"), identlist=NULL, resol=280000, slope=0, incpt=0, ppm.fixed, ppm.iso.fixed) {
+  
+  #     peaklist=outlist_Neg
+  #     scanmode="Negative"
+  #     identlist=noise.Neg.MZ
+  #     look4=look4.addN
+  #     resol=resol
+  #     slope=0 
+  #     incpt=0
+  #     ppm.fixed=3
+  #     ppm.iso.fixed=2  
+  
+  metlin <- assi <- iso <- rep("", nrow(peaklist))
+  theormz <- nisos <- expint <- conf <- rep(0, nrow(peaklist))
+  # nrH <- nrD <- nrC <- nr13C <- nrN <- nr15N <- nrO <- nrP  <- nrS <- nrCl <- rep(0, nrow(peaklist))
+  
+  # add adducts to identification list
+  if (scanmode == "Positive") { add.mode <-  "+" } else { add.mode <- "-" }
+  # identlist <- theorMZ
+  identlist.orig <- identlist
+  
+  for (p in 1:length(look4)) { # loop over type of adduct
+    #print(p)
+    identlist.Adduct <- identlist.orig
+    identlist.Adduct[ , "CompoundName"] <- as.character(identlist.orig[ , "CompoundName"])
+    
+    #add2label <- paste("[M+", look4[p], "]", add.mode, sep="")
+    if (look4[p]=="H2O") {
+      add2label <- paste("[M-", look4[p], "]", add.mode, sep="")
+    } else {
+      add2label <- paste("[M+", look4[p], "]", add.mode, sep="")
+    }
+    
+    identlist.Adduct[ , "CompoundName"] <- paste(identlist.Adduct[ , "CompoundName"], add2label, sep=" ")
+    adductInfo <- elementInfo(look4[p], allAdducts)
+    if (scanmode == "Positive") {
+      adductMass <- adductInfo$mass[1] + adductInfo$isotope$mass[1] - Hmass } else {
+        adductMass <- adductInfo$mass[1] + adductInfo$isotope$mass[1] + Hmass
+      }
+    for (q in 1:nrow(identlist.Adduct)) { # loop over compounds in database
+      # construct information for compound + adduct:
+      if (scanmode == "Positive") {
+        identlist.Adduct[q, "Mpos"] <-  as.numeric(identlist.Adduct[q, "Mpos"]) + adductMass # + Na - H
+        identlist.Adduct[q, "MNeg"] <-  0
+      } else {
+        identlist.Adduct[q, "Mpos"] <-  0
+        identlist.Adduct[q, "MNeg"] <-  as.numeric(identlist.Adduct[q, "MNeg"]) + adductMass # + Cl + H
+      }
+    }
+    
+    #     # modify columns with info for mol. formula:
+    #     if (look4[p] == "2Na-H") {
+    #       identlist.Adduct[, "nrH"] <- as.numeric(identlist.Adduct[, "nrH"]) - 1
+    #     } else if (look4[p] == "NH4") {
+    #       identlist.Adduct[, "nrH"] <- as.numeric(identlist.Adduct[, "nrH"]) + 3
+    #       identlist.Adduct[, "nrN"] <- as.numeric(identlist.Adduct[, "nrN"]) + 1
+    #     } else if (look4[p] == "Cl") {
+    #       identlist.Adduct[, "nrCl"] <- as.numeric(identlist.Adduct[, "nrCl"]) + 1
+    #       identlist.Adduct[, "nrH"] <- as.numeric(identlist.Adduct[, "nrH"]) + 1
+    #     } else if (look4[p] == "Ac") {
+    #       identlist.Adduct[ , "nrC"] <- as.numeric(identlist.Adduct[ , "nrC"]) + 2
+    #       identlist.Adduct[ , "nrH"] <- as.numeric(identlist.Adduct[ , "nrH"]) + 3
+    #       identlist.Adduct[ , "nrO"] <- as.numeric(identlist.Adduct[ , "nrO"]) + 2
+    #     } else if (look4[p] == "CH3OH+H") {
+    #       identlist.Adduct[ , "nrC"] <- as.numeric(identlist.Adduct[ , "nrC"]) + 1
+    #       identlist.Adduct[ , "nrH"] <- as.numeric(identlist.Adduct[ , "nrH"]) + 4
+    #       identlist.Adduct[ , "nrO"] <- as.numeric(identlist.Adduct[ , "nrO"]) + 1
+    #     } else {
+    #       identlist.Adduct[, "nrH"] <- as.numeric(identlist.Adduct[, "nrH"]) - 1
+    #     }
+    identlist <- rbind(identlist, identlist.Adduct) 
+  } # end for p adducts in look4
+  
+  
+  if (scanmode == "Positive") { theor.mcol <- as.numeric(identlist[ , "Mpos"]) } else {
+    theor.mcol <- as.numeric(identlist[ , "MNeg"]) }
+  # apply correction using regression line obtained with ISses
+  theor.mcol <- (1+slope)*theor.mcol + incpt
+  
+  # get mz information from peaklist
+  mcol <- peaklist[ , "mzmed.pgrp"]
+  # if column with average intensities is missing, calculate it:
+  if (!("avg.int" %in% colnames(peaklist))){
+    mzmaxcol <- which(colnames(peaklist) == "mzmax.pgrp")
+    endcol <- ncol(peaklist)
+    peaklist[ , "avg.int"] <- apply(peaklist[ ,(mzmaxcol+1):(endcol)], 1, mean)
+  }
+  
+  #   # generate URL for Metlin:
+  #   for (p in 1:nrow(peaklist)) { # for each peak  # p <- 1
+  #     mzpeak <- as.numeric(mcol[p])
+  #     # resolution as function of mz:
+  #     resol.mz <- resol*(1/sqrt(2)^(log2(mzpeak/200)))
+  #     fwhm <- mzpeak/resol.mz
+  #     #    massmin <- mzpeak - (0.00003 + 0.000003*mzpeak) - (1.0078250321 - 0.00054858) - fwhm  # for Metlin db
+  #     #    massmax <- mzpeak - (0.00003 + 0.000003*mzpeak) - (1.0078250321 - 0.00054858) + fwhm  # for Metlin db
+  #     if (scanmode == "Positive") {
+  #       massmin <- mzpeak - (1.0078250321 - 0.00054858) - fwhm  # for Metlin db
+  #       massmax <- mzpeak - (1.0078250321 - 0.00054858) + fwhm  # for Metlin db
+  #     } else {
+  #       massmin <- mzpeak + (1.0078250321 - 0.00054858) - fwhm  # for Metlin db
+  #       massmax <- mzpeak + (1.0078250321 - 0.00054858) + fwhm  # for Metlin db
+  #     }
+  #     metlin[p] <- paste("http://metlin.scripps.edu/metabo_list.php?mass_min=", massmin, "&mass_max=", massmax, sep="")
+  #   }
+  
+  
+  # do indentification using own database:
+  for (t in 1:nrow(identlist)) { # theoretical mass  # t <- 45
+    # for (t in 1:169) {
+    #print(as.character(identlist[t,"CompoundName"]))
+    theor.mz <- theor.mcol[t]
+    
+    theor.comp <- as.character(identlist[t, "Composition"])
+    #theor.comp <- mol.formula(identlist[t, ])
+    
+    #     # if there's Deuterium, Tritium, 13C or 15N in the composition:
+    #     mass.incr <- 0 + (as.numeric(identlist[t,"nrD"])*Dmass) +
+    #       (as.numeric(identlist[t,"nrT"])*Tmass) +
+    #       (as.numeric(identlist[t,"nrC13"])*C13mass) +
+    #       (as.numeric(identlist[t,"nrN15"])*N15mass)
+    #     theor.comp <- strsplit(theor.comp, "iso")[[1]][1]
+    mass.incr <- 0
+    
+    # resolution as function of mz:
+    resol.mz <- resol*(1/sqrt(2)^(log2(theor.mz/200)))
+    # calculate fine-grained isotopic distribution using MIDAs
+    fwhm <- round(theor.mz/resol.mz,6)
+    
+    #system(paste("C:/Users/awillem7/tools/MIDAs_New/MIDAs_Example ", theor.comp, " 2  C00 \"\"  \"\"  ", fwhm, " 1 0 1e-50 2 tmp", sep=""),ignore.stderr=TRUE)
+    #system(paste("C:/Users/mraves/Metabolomics/MIDAs_New/MIDAs_Example ", theor.comp, " 2  C00 \"\"  \"\"  ", fwhm, " 1 0 1e-50 2 tmp", sep=""),ignore.stderr=TRUE)
+    #system(paste(path2MIDAS, theor.comp, " 2  C00 \"\"  \"\"  ", fwhm, " 1 0 1e-50 2 tmp", sep=""),ignore.stderr=TRUE)
+    
+    #system(paste("/data/home/luyf/Metabolomics/MIDAs/MIDAs_Example ", theor.comp, " 2  C00 \"\"  \"\"  ", fwhm, " 1 0 1e-50 2 tmp", sep=""),ignore.stderr=TRUE)
+    options(stringsAsFactors = FALSE)
+    #fgid <- read.table(file="tmp_Fine_Grained_Isotopic_Distribution", header=FALSE)
+    
+    #     res <- try(fgid <- read.table(file="tmp_Fine_Grained_Isotopic_Distribution", header=FALSE))
+    #     if(inherits(res, "try-error"))
+    #     {
+    #       #error handling code, maybe just skip this iteration using
+    #       message("Skipped")
+    #       next
+    #     }
+    #     
+    #     # correct mass for D, T, 13C and 15N
+    #     fgid[ ,1] <- as.numeric(fgid[ ,1]) + mass.incr
+    #     # calculate percentage intensities from relative intensities
+    #     firstone <- as.numeric(fgid[1,2])
+    #     fgid[ ,3] <- as.numeric(fgid[ , 2]) / firstone
+    #     #fgid <- as.matrix(fgid, ncol=3)
+    #     # the mz in the MIDAs file are of the neutral molecule
+    #     if (scanmode == "Positive") { mz.iso <- as.numeric(fgid[ , 1]) + Hmass - electron }
+    #     if (scanmode == "Negative") { mz.iso <- as.numeric(fgid[ , 1]) - Hmass + electron }
+    #     fgid <- cbind(fgid, mz.iso)
+    #     colnames(fgid) <- c("mz","rel.int", "perc.int", "mz.iso")
+    
+    # compensate mz for presence of adduct
+    #     Adduct.mass <- theor.mz - mz.iso[1]
+    #     fgid[ , "mz.iso"] <- as.numeric(fgid[ , "mz.iso"]) + Adduct.mass
+    
+    # set tolerance for mz accuracy of main peak
+    mtol <- theor.mz*ppm.fixed/1000000
+    # find main peak
+    selp <- which(mcol > (theor.mz - mtol) & mcol < (theor.mz + mtol))
+    # selp <- which(mcol > (theor.mz - 0.01) & mcol < (theor.mz + 0.01))
+    # peaklist[selp, c(1:4,(endcol+1))]
+    
+    # set tolerance for mz accuracy of isotope peaks
+    itol <- theor.mz*ppm.iso.fixed/1000000
+    
+    #     if (length(selp) > 1) { # more than one candidate peak for main; select best one based on isotope pattern
+    #       #cat(as.character(identlist[t, "CompoundName"])); print(" has >1 candidate peaks")
+    #       conf.local <- rep(0, length(selp))
+    #       for (p in 1:length(selp)) { # p <- 2
+    #         # determine isotope pattern for each candidate peak
+    #         # obs.mz <- peaklist[selp[p],"mzmed.pgrp"]
+    #         conf.local[p] <- match.isotope.pattern(peaklist, scanmode, selp[p], fgid, ppm.iso.fixed)
+    #       }
+    #       # selp <- selp[abs(mcol[selp] - theor.mz) == min(abs(mcol[selp] - theor.mz))] }
+    #       selp <- na.exclude(selp[conf.local == max(conf.local)])
+    #     }
+    if (length(selp) > 1) { # more than one candidate peak for main; select best one based on mz.diff
+      selp <- selp[abs(mcol[selp] - theor.mz) == min(abs(mcol[selp] - theor.mz))] 
+    }
+    if (length(selp) == 1) { # match for main
+      assi[selp] <- paste(assi[selp], as.character(identlist[t,"CompoundName"]), sep=";")
+      theormz[selp] <- theor.mz
+      # conf[selp] <- match.isotope.pattern(peaklist, scanmode, selp, fgid, ppm.iso.fixed)
+      
+      #       nrH[selp] <- identlist[t,"nrH"]
+      #       nrD[selp] <- identlist[t,"nrD"]
+      #       nrC[selp] <- identlist[t,"nrC"]
+      #       nr13C[selp] <- identlist[t,"nrC13"]
+      #       nrN[selp] <- identlist[t,"nrN"]
+      #       nr15N[selp] <- identlist[t,"nrN15"]
+      #       nrO[selp] <- identlist[t,"nrO"]
+      #       nrP[selp] <- identlist[t,"nrP"]
+      #       nrS[selp] <- identlist[t,"nrS"]
+      #       nrCl[selp] <- identlist[t,"nrCl"]   
+      
+      # assign isotope peaks
+      #       mz.main <- peaklist[selp, "mzmed.pgrp"]  # mz of main peak
+      #       int.main <- peaklist[selp, "avg.int"]  # intensity of main peak (= 100%)
+      #       # deviation from theoretical mass:
+      #       diff <- theor.mz - mz.main
+      #       # calculate expected intensities and select isotopes with exp.int > threshold
+      #       fgid[ , "exp.int"] <- fgid[ , "perc.int"] * int.main      
+      #       fgid.subset <- fgid[(fgid[ , "exp.int"] > thresh), ]
+      #       nisos[selp] <- nrow(fgid.subset) - 1
+      #       if (nrow(fgid.subset) > 1) { # avoid error message if fgid.subset has only 1 line
+      #         for (f in 2:nrow(fgid.subset)) { # f <- 2
+      #           mz.target <- fgid.subset[f, "mz.iso"] - diff
+      #           int.target <- fgid.subset[f, "exp.int"]
+      #           # print(itarget)
+      #           sel.iso <- peaklist[ , "mzmed.pgrp"] > (mz.target - itol) & peaklist[ , "mzmed.pgrp"] < (mz.target + itol)
+      #           # sum(sel.iso)
+      #           # sel.iso <- peaklist[ , "mzmed.pgrp"] > (mz.target - 0.01) & peaklist[ , "mzmed.pgrp"] < (mz.target + 0.01)
+      #           # peaklist[sel.iso, c(1:4,23)]
+      #           if (sum(sel.iso) == 1) { # 2 separate if-statements because of error if sum(sel.iso) = 0
+      #             if (peaklist[sel.iso, "avg.int"] >  (int.target/2)) { # match
+      #               iso[sel.iso] <- paste(paste(iso[sel.iso], as.character(identlist[t,"CompoundName"]), "iso", f, sep=" "),";", sep="")
+      #               # peaklist[sel.iso, ]
+      #               expint[sel.iso] <- fgid.subset[f, "exp.int"]
+      #             }
+      #           } else if (sum(sel.iso) > 1) {
+      #             nrs.iso <- which(sel.iso)
+      #             nr.iso <- nrs.iso[which(abs(peaklist[sel.iso, "avg.int"] - int.target) == min(abs(peaklist[sel.iso, "avg.int"] - int.target)))]
+      #             if (peaklist[nr.iso, "avg.int"] >  (int.target/2)) {  # match
+      #               # print(peaklist[nr.iso, "avg.int"])
+      #               iso[nr.iso] <- paste(paste(iso[nr.iso], as.character(identlist[t,"CompoundName"]), "iso", f, sep=" "),";", sep="")
+      #               expint[nr.iso] <- fgid.subset[f, "exp.int"]
+      #             } # end if
+      #           } # end else if
+      #         } # end for f 
+      #       } # end if
+    } # end if 
+  } # end for t
+  #   cbind(peaklist, nrH, nrD, nrC, nr13C, nrN, nr15N, nrO, nrP, nrS, nrCl, assi, theormz, conf, nisos, iso, expint, metlin)
+  cbind(peaklist, assi, theormz, conf, nisos, iso, expint, metlin)
+}
diff --git a/DIMS/AddOnFunctions/isWithinXppm.R b/DIMS/AddOnFunctions/isWithinXppm.R
new file mode 100644
index 0000000..5da43c6
--- /dev/null
+++ b/DIMS/AddOnFunctions/isWithinXppm.R
@@ -0,0 +1,51 @@
+isWithinXppm <- function(mean, scale, sigma, area, x2, x, ppm=4, resol, plot) {
+  #   mean=retVal$mean
+  #   scale=retVal$scale
+  #   sigma=retVal$sigma
+  #   area=retVal$area
+  #   ppm=3
+  
+  # sort!!!!!!!!!!!!!!!!
+  index = order(mean)
+  mean = mean[index]
+  scale = scale[index]
+  sigma = sigma[index]
+  area = area[index]
+  
+  summed = NULL
+  remove = NULL
+  
+  if (length(mean)>1){
+    for (i in 2:length(mean)){
+      if ((abs(mean[i-1]-mean[i])/mean[i-1])*10^6 < ppm) {
+        
+        # avoid double occurance in sum
+        if ((i-1) %in% summed) next
+        
+        retVal = sumCurves(mean[i-1], mean[i], scale[i-1], scale[i], sigma[i-1], sigma[i], x2, x, resol, plot)
+        summed = c(summed, i-1, i)
+        if (is.nan(retVal$mean)) retVal$mean=0 
+        mean[i-1] = retVal$mean
+        mean[i] = retVal$mean
+        area[i-1] = retVal$area
+        area[i] = retVal$area
+        scale[i-1] = retVal$scale
+        scale[i] = retVal$scale
+        sigma[i-1] = retVal$sigma
+        sigma[i] = retVal$sigma
+        
+        remove = c(remove, i)
+      }
+    }
+  }
+  
+  if (length(remove)!=0){
+    mean=mean[-c(remove)]
+    area=area[-c(remove)]
+    scale=scale[-c(remove)]
+    sigma=sigma[-c(remove)]
+  }
+  
+  return(list("mean"=mean, "area"=area, "scale"=scale, "sigma"=sigma, "qual"=NULL))
+  
+}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/is_diagnostic_patient.R b/DIMS/AddOnFunctions/is_diagnostic_patient.R
new file mode 100644
index 0000000..839f41d
--- /dev/null
+++ b/DIMS/AddOnFunctions/is_diagnostic_patient.R
@@ -0,0 +1,6 @@
+is_diagnostic_patient <- function(patient_column){
+  # Check for Diagnostics patients with correct patientnumber (e.g. starting with "P2024M")
+  diagnostic_patients <- grepl("^P[0-9]{4}M",patient_column)
+  
+  return(diagnostic_patients)
+}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/mergeDuplicatedRows.R b/DIMS/AddOnFunctions/mergeDuplicatedRows.R
new file mode 100644
index 0000000..39cb8b4
--- /dev/null
+++ b/DIMS/AddOnFunctions/mergeDuplicatedRows.R
@@ -0,0 +1,49 @@
+mergeDuplicatedRows <- function(peaklist) {
+  # peaklist = outlist.tot
+  # resultDir = "./results"
+  # scanmode = "positive"
+  
+  # peaklist_index=which(peaklist[,"mzmed.pgrp"]=="94.9984524624111")
+  # peaklist[peaklist_index,]
+  
+  collapse <- function(label,pklst,index){
+    # label = "iso_HMDB"
+    # pklst = peaklist
+    # index = peaklist_index
+    tmp2=as.vector(pklst[index,label])
+    if (length(which(is.na(tmp2)))>0) tmp2=tmp2[-which(is.na(tmp2))]
+    return(paste(tmp2,collapse = ";"))
+  } 
+  
+  options(digits=16)
+  
+  collect=NULL
+  remove=NULL
+  
+  index = which(duplicated(peaklist[, "mzmed.pgrp"]))
+  
+  while (length(index) > 0){
+    
+    peaklist_index = which(peaklist[, "mzmed.pgrp"] == peaklist[index[1], "mzmed.pgrp"])
+    # peaklist[peaklist_index,"iso_HMDB",drop=FALSE]
+    tmp=peaklist[peaklist_index[1],,drop=FALSE]
+    
+    tmp[,"assi_HMDB"]=collapse("assi_HMDB",peaklist,peaklist_index)
+    tmp[,"iso_HMDB"]=collapse("iso_HMDB",peaklist,peaklist_index)
+    tmp[,"HMDB_code"]=collapse("HMDB_code",peaklist,peaklist_index)
+    tmp[,"assi_noise"]=collapse("assi_noise",peaklist,peaklist_index)
+    if (tmp[,"assi_noise"]==";") tmp[,"assi_noise"]=NA
+    tmp[,"theormz_noise"]=collapse("theormz_noise",peaklist,peaklist_index)
+    if (tmp[,"theormz_noise"]=="0;0") tmp[,"theormz_noise"]=NA
+    
+    collect = rbind(collect, tmp)
+    remove = c(remove, peaklist_index)
+    
+    index=index[-which(peaklist[index, "mzmed.pgrp"] == peaklist[index[1], "mzmed.pgrp"])]
+  } 
+  
+  if (!is.null(remove)) peaklist = peaklist[-remove,]
+  peaklist = rbind(peaklist,collect)
+  
+  return(peaklist)
+}
diff --git a/DIMS/AddOnFunctions/normalization_2.1.R b/DIMS/AddOnFunctions/normalization_2.1.R
new file mode 100644
index 0000000..ed99964
--- /dev/null
+++ b/DIMS/AddOnFunctions/normalization_2.1.R
@@ -0,0 +1,75 @@
+# normalization_2.1(outlist.pos.id, "Intensity_all_peaks_positive_norm", groupNames.pos, on="total", assi_label="assi_HMDB")
+
+normalization_2.1 <- function(data, filename, groupNames, on="total", assi_label="assi"){
+  # data=outlist.pos.id
+  # filename = "Intensity_all_peaks_positive_norm"
+  # groupNames = groupNames.pos
+  # on="total"
+  # assi_label="assi_HMDB"
+  
+  lastcol = length(groupNames) + 6
+  before = data[ ,c(7:lastcol)]
+  
+  if (on=="total_IS") {
+    
+    assi = which(colnames(data)==assi_label)
+    data.int <- data[ ,c(assi,7:lastcol)]  # assi and samples columns
+    data.assi = data.int[grep("(IS", data.int[,1], ignore.case=FALSE, fixed = TRUE),]
+    
+  } else if (on=="total_ident"){
+    
+    assi = which(colnames(data)==assi_label)
+    assi.hmdb = which(colnames(data)=="assi.hmdb")
+    index = sort(union(which(data[,assi]!=""), which(data[,assi.hmdb]!="")))
+    
+    data.int = data[ ,c(assi,7:lastcol)]  # assi and samples columns
+    data.assi = data.int[index,]
+    
+  } else if (on=="total") {
+    
+    assi = which(colnames(data)==assi_label)
+    data.int = data[ ,c(assi,7:lastcol)]  # assi and samples columns
+    data.assi = data.int
+    
+  }
+  
+  sum <- 0
+  for (c in 2:ncol(data.assi)) {
+    sum <- sum + sum(as.numeric(data.assi[,c]))
+  }
+  average <- sum/(ncol(data.assi)-1)
+  for (c in 2:ncol(data.int)) {
+    factor <- sum(as.numeric(data.assi[,c]))/average
+    if (factor==0) {
+      data.int[ ,c]=0
+      cat(colnames(data.int)[c])
+      cat("factor==0 !!!")
+    } else {
+      data.int[ ,c] <- as.numeric(data.int[ ,c])/factor
+    }
+  }
+  
+  # colnames(data.int[,2:ncol(data.int)])
+  
+  if (dim(data)[2]==lastcol){
+    final.outlist.Pos.idpat.norm <- cbind(data[,1:6],data.int[,2:ncol(data.int)])
+  } else {
+    final.outlist.Pos.idpat.norm <- cbind(data[,1:6],data.int[,2:ncol(data.int)],data[,(lastcol + 1):ncol(data)])
+  }
+  
+  #outdir="./results/normalization"
+  #dir.create(outdir, showWarnings = FALSE)
+  
+  #CairoPNG(filename=paste(outdir, paste(filename, "_before.png", sep=""), sep="/"), width, height)
+  #sub=apply(before,2, function(x) sum(as.numeric(x)))
+  #barplot(as.vector(unlist(sub)), main="Not normalized",names.arg = colnames(before),las=2)
+  #dev.off()
+  
+  #CairoPNG(filename=paste(outdir, paste(filename, "_", on, ".png", sep=""), sep="/"), width, height)
+  #sub=apply(final.outlist.Pos.idpat.norm[,c(7:lastcol)],2, function(x) sum(as.numeric(x)))
+  #barplot(as.vector(unlist(sub)), main=filename,names.arg = colnames(final.outlist.Pos.idpat.norm[,c(7:lastcol)]),las=2)
+  #dev.off()
+  
+  return(final.outlist.Pos.idpat.norm)
+  
+}
diff --git a/DIMS/AddOnFunctions/optimizeGauss.R b/DIMS/AddOnFunctions/optimizeGauss.R
new file mode 100644
index 0000000..0fbce75
--- /dev/null
+++ b/DIMS/AddOnFunctions/optimizeGauss.R
@@ -0,0 +1,11 @@
+optimizeGauss <- function(x,y,sigma,mu) {
+  
+  f = function(p,x,y,sigma,mu) {
+    curve = p*dnorm(x,mu,sigma)
+    return((max(curve)-max(y))^2)
+  }
+  
+  rval = optimize(f, c(0, 100000), tol = 0.0001,x,y,sigma,mu)
+  
+  return(rval$minimum)
+}
diff --git a/DIMS/AddOnFunctions/output_helix.R b/DIMS/AddOnFunctions/output_helix.R
new file mode 100644
index 0000000..c098675
--- /dev/null
+++ b/DIMS/AddOnFunctions/output_helix.R
@@ -0,0 +1,31 @@
+output_for_helix <- function(protocol_name, df_metabs_helix){
+  
+  # Remove positive controls
+  df_metabs_helix <- df_metabs_helix %>% filter(is_diagnostic_patient(Patient))
+  
+  # Add 'Vial' column, each patient has unique ID
+  df_metabs_helix <- df_metabs_helix %>% 
+    group_by(Patient) %>% 
+    mutate(Vial = cur_group_id()) %>% 
+    ungroup()
+  
+  # Split patient number into labnummer and Onderzoeksnummer
+  df_metabs_helix <- add_lab_id_and_onderzoeksnummer(df_metabs_helix)
+  
+  # Add column with protocol name
+  df_metabs_helix$Protocol <- protocol_name
+  
+  # Change name Z_score and Helix_naam columns to Amount and Name
+  change_columns <- c(Amount = "Z_score", Name = "Helix_naam")
+  df_metabs_helix <- df_metabs_helix %>% rename(all_of(change_columns))
+  
+  # Select only necessary columns and set them in correct order
+  df_metabs_helix <- df_metabs_helix %>% 
+    select(c(Vial, labnummer, Onderzoeksnummer, Protocol, Name, Amount))
+  
+  # Remove duplicate patient-metabolite combinations ("leucine + isoleucine + allo-isoleucin_Z-score" is added 3 times)
+  df_metabs_helix <- df_metabs_helix %>% 
+    group_by(Onderzoeksnummer, Name) %>% distinct() %>% ungroup()
+  
+  return(df_metabs_helix)
+}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/peak.grouping.Gauss.HPC.R b/DIMS/AddOnFunctions/peak.grouping.Gauss.HPC.R
new file mode 100644
index 0000000..c36259a
--- /dev/null
+++ b/DIMS/AddOnFunctions/peak.grouping.Gauss.HPC.R
@@ -0,0 +1,88 @@
+peak.grouping.Gauss.HPC <- function(outdir, fileIn, scanmode, resol, groupNames) {
+  # fileIn="./results/specpks_all/positive_outlist_i_min_1.197.RData"
+  
+  # ppm/2
+  #range = 1.5e-06
+  range = 2e-06  
+  startcol=7
+  
+  # outlist.copy <- read.table(file=fileIn, sep="\t", header=TRUE)
+  load(fileIn)
+  outlist.copy = outlist_i_min_1
+  batch = strsplit(fileIn, ".",fixed = TRUE)[[1]][3]
+  
+  outpgrlist = NULL
+  
+  while (max(as.numeric(outlist.copy[ , "height.pkt"])) > 0 ) {
+    
+    sel = which(as.numeric(outlist.copy[ , "height.pkt"]) == max(as.numeric(outlist.copy[ , "height.pkt"])))[1]
+    
+    # 3ppm range around max
+    mzref = as.numeric(outlist.copy[sel, "mzmed.pkt"])
+    pkmin = -(range*mzref - mzref)
+    pkmax = 2*mzref-pkmin
+    
+    selp = as.numeric(outlist.copy[ , "mzmed.pkt"]) > pkmin & as.numeric(outlist.copy[ , "mzmed.pkt"]) < pkmax
+    tmplist = outlist.copy[selp, ]
+    
+    nrsamples = sum(selp)
+    if (nrsamples > 1) {
+      # 3ppm range around mean
+      mzmed.pgrp = mean(as.numeric(outlist.copy[selp, "mzmed.pkt"]))
+      mzmin.pgrp = -(range*mzmed.pgrp - mzmed.pgrp)
+      mzmax.pgrp = 2*mzmed.pgrp - mzmin.pgrp
+      
+      selp = as.numeric(outlist.copy[ , "mzmed.pkt"]) > mzmin.pgrp & as.numeric(outlist.copy[ , "mzmed.pkt"]) < mzmax.pgrp
+      tmplist = outlist.copy[selp, ]
+      
+      fq.worst.pgrp = as.numeric(max(outlist.copy[selp, "fq"]))
+      fq.best.pgrp = as.numeric(min(outlist.copy[selp, "fq"]))
+      ints.allsamps = rep(0, length(groupNames))
+      names(ints.allsamps) = groupNames # same order as sample list!!!  
+      
+      #       if (length(unique(as.vector(tmplist[,"samplenr"]))) != length(as.vector(tmplist[,"samplenr"]))) {
+      #         message(paste("bingo", sel))
+      #         break
+      #       }
+      
+      # Check for each sample if multiple peaks exists, if so take the sum! 
+      labels=unique(tmplist[,"samplenr"])
+      ints.allsamps[labels] = as.vector(unlist(lapply(labels, function(x) {sum(as.numeric(tmplist[which(tmplist[ , "samplenr"]==x), "height.pkt"]))})))
+      
+      outpgrlist = rbind(outpgrlist, c(mzmed.pgrp, fq.best.pgrp, fq.worst.pgrp, nrsamples, mzmin.pgrp, mzmax.pgrp, ints.allsamps))
+    }
+    outlist.copy[selp, "height.pkt"] = -1
+  }
+  
+  outpgrlist = as.data.frame(outpgrlist)  # ignore warnings of duplicate row names
+  colnames(outpgrlist)[1:6] = c("mzmed.pgrp", "fq.best", "fq.worst", "nrsamples", "mzmin.pgrp", "mzmax.pgrp")
+  
+  #   # filtering ##################################################################################################################
+  #   final.outlist=outpgrlist[,c("mzmed.pgrp", "fq.best", "fq.worst","nrsamples","mzmin.pgrp","mzmax.pgrp", sampleNames)]
+  #   # NB: in centroided mode, data files contains many "-1.000" values, from peak finding. Set these to zero.
+  #   final.outlist[final.outlist == -1] = 0
+  #   
+  #   # keep only peaks which occur in 3 out of 3 technical replicates in at least one sample in peak group list
+  # #   peakFiltering(repl.pattern, final.outlist, nsampgrps, outdir, scanmode, startcol=7)
+  # #   peakFiltering <- function(repl.pattern, final.outlist, nsampgrps, resultDir, scanmode, startcol){
+  #   nsamp = length(repl.pattern)
+  #   nsampgrps = length(repl.pattern[[1]])
+  #   
+  #   keep <- rep(0, nrow(final.outlist))
+  #   for (p in 1:nrow(final.outlist)) {  
+  #     for (g in 1:nsampgrps) {  # g <- 31
+  #       if (keep[p] == 0 & sum(final.outlist[p, repl.pattern[[g]]] > 0) == length(repl.pattern[[g]]) ) { keep[p] <- 1 }
+  #     }
+  #   }
+  #   
+  #   tmp <- cbind(final.outlist, keep)
+  #   final.outlist.filt <- tmp[keep == 1, ]
+  # 
+  #   # omit keep column
+  #   final.outlist.filt <- final.outlist.filt[ , -ncol(final.outlist.filt)]
+  
+  #save(outpgrlist_part, file=paste(outdir, paste(scanmode, "_", mzstart, "_", mzend, ".RData", sep=""), sep="/"))
+  # save(final.outlist.filt, file=paste(outdir, "peak_grouping", paste(scanmode, "_",batch,".RData", sep=""), sep="/"))
+  save(outpgrlist, file=paste(outdir, "peak_grouping", paste(scanmode, "_",batch,".RData", sep=""), sep="/"))
+  
+}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/prepare_alarmvalues.R b/DIMS/AddOnFunctions/prepare_alarmvalues.R
new file mode 100644
index 0000000..c09c536
--- /dev/null
+++ b/DIMS/AddOnFunctions/prepare_alarmvalues.R
@@ -0,0 +1,52 @@
+prepare_alarmvalues <- function(pt_name, dims_helix_table) {
+  # extract data for patient of interest (pt_name)
+  pt_metabs_helix <- dims_helix_table %>% filter(Patient == pt_name)
+  pt_metabs_helix$Z_score <- round(pt_metabs_helix$Z_score, 2)
+  
+  # Make empty dataframes for metabolites above or below alarmvalues
+  pt_list_high <- data.frame(HMDB_name = character(), Z_score = numeric())
+  pt_list_low <- data.frame(HMDB_name = character(), Z_score = numeric())
+  
+  # Loop over individual metabolites
+  for (metab in unique(pt_metabs_helix$HMDB_name)){
+    # Get data for individual metabolite
+    pt_metab <- pt_metabs_helix %>% filter(HMDB_name == metab)
+    # print(pt_metab)
+    
+    # Check if zscore is positive of negative
+    if(pt_metab$Z_score > 0) {
+      # Get specific alarmvalue for metabolite
+      high_zscore_cutoff_metab <- pt_metabs_helix %>% filter(HMDB_name == metab) %>% pull(high_zscore)
+      
+      # If zscore is above the alarmvalue, add to pt_list_high table
+      if(pt_metab$Z_score > high_zscore_cutoff_metab) {
+        pt_metab_high <- pt_metab %>% select(HMDB_name, Z_score)
+        pt_list_high <- rbind(pt_list_high, pt_metab_high)
+      }
+    } else {
+      # Get specific alarmvalue for metabolite
+      low_zscore_cutoff_metab <- pt_metabs_helix %>% filter(HMDB_name == metab) %>% pull(low_zscore)
+      
+      # If zscore is below the alarmvalue, add to pt_list_low table
+      if(pt_metab$Z_score < low_zscore_cutoff_metab) {
+        pt_metab_low <- pt_metab %>% select(HMDB_name, Z_score)
+        pt_list_low <- rbind(pt_list_low, pt_metab_low)
+      }
+    }
+  }
+  
+  # sort tables on zscore
+  pt_list_high <- pt_list_high %>% arrange(desc(Z_score))
+  pt_list_low <- pt_list_low %>% arrange(Z_score)
+  # add lines for increased, decreased
+  extra_line1 <- c("Increased", "") 
+  extra_line2 <- c("Decreased", "")
+  # combine the two lists
+  top_metab_pt <- rbind(extra_line1, pt_list_high, extra_line2, pt_list_low)
+  # remove row names
+  rownames(top_metab_pt) <- NULL
+  # change column names for display
+  colnames(top_metab_pt) <- c("Metabolite", "Z-score")
+  
+  return(top_metab_pt)
+}
diff --git a/DIMS/AddOnFunctions/prepare_data.R b/DIMS/AddOnFunctions/prepare_data.R
new file mode 100644
index 0000000..07a8805
--- /dev/null
+++ b/DIMS/AddOnFunctions/prepare_data.R
@@ -0,0 +1,42 @@
+prepare_data <- function(metab_list_all, zscore_patients_local) {
+  # remove "_Zscore" from column (patient) names
+  colnames(zscore_patients_local) <- gsub("_Zscore", "", colnames(zscore_patients_local))
+  # put data into pages, max 20 violin plots per page in PDF
+  metab_interest_sorted <- list()
+  metab_category <- c()
+  for (metab_class_index in 1:length(metab_list_all)) { # "acyl_carnitines" "amino_acids" "crea_gua"
+    metab_class <- names(metab_list_all)[metab_class_index]
+    metab_list <- metab_list_all[[metab_class_index]]
+    if (ncol(metab_list) > 2) {
+      # third column are the alarm values, so reduce the data frame to 2 columns and save list
+      metab_list_alarm <- metab_list
+      metab_list <- metab_list[ , c(1,2)]
+    }
+    # make sure that all HMDB_names have 45 characters 
+    for (metab_index in 1:length(metab_list$HMDB_name)) {
+      if (is.character(metab_list$HMDB_name[metab_index])) { 
+        HMDB_name_separated <- strsplit(metab_list$HMDB_name[metab_index], "")[[1]]
+      } else { HMDB_name_separated <- "strspliterror" }
+      if (length(HMDB_name_separated) <= 45) {
+        HMDB_name_separated <- c(HMDB_name_separated, rep(" ", 45-length(HMDB_name_separated)))
+      } else {
+        HMDB_name_separated <- c(HMDB_name_separated[1:42], "...")
+      }
+      metab_list$HMDB_name[metab_index] <- paste0(HMDB_name_separated, collapse = "")
+    }
+    # find metabolites and ratios in data frame zscore_patients_local
+    metab_interest <- inner_join(metab_list, zscore_patients_local[-2], by = "HMDB_code")
+    # remove column "HMDB_code"
+    metab_interest <- metab_interest[ , -which(colnames(metab_interest) == "HMDB_code")]
+    # put the data frame in long format
+    metab_interest_melt <- reshape2::melt(metab_interest, id.vars = "HMDB_name")
+    # sort on metabolite names (HMDB_name)
+    sort_order <- order(metab_interest_melt$HMDB_name)
+    metab_interest_sorted[[metab_class_index]] <- metab_interest_melt[sort_order, ]
+    metab_category <- c(metab_category, metab_class)
+  }
+  names(metab_interest_sorted) <- metab_category
+ 
+  return(metab_interest_sorted)
+   
+} 
diff --git a/DIMS/AddOnFunctions/prepare_data_perpage.R b/DIMS/AddOnFunctions/prepare_data_perpage.R
new file mode 100644
index 0000000..9f112f3
--- /dev/null
+++ b/DIMS/AddOnFunctions/prepare_data_perpage.R
@@ -0,0 +1,47 @@
+prepare_data_perpage <- function(metab_interest_sorted, metab_interest_contr, nr_plots_perpage, nr_pat=20, nr_contr=30) {
+  total_nr_pages <- 0
+  metab_perpage <- list()
+  metab_category <- c()
+  for (metab_class_index in 1:length(metab_interest_sorted)) { # "acyl_carnitines" "amino_acids" "crea_gua"
+    # split list into pages, each page containing max nr_plots_perpage (20) compounds
+    metab_interest_perclass <- metab_interest_sorted[[metab_class_index]]
+    metab_class <- names(metab_interest_sorted)[metab_class_index]
+    # add controls
+    metab_interest_contr_perclass <- metab_interest_contr[[metab_class_index]]
+    # number of pages for this class
+    nr_pages <- ceiling(length(unique(metab_interest_perclass$HMDB_name)) / nr_plots_perpage)
+    for (page_nr in 1:nr_pages) {
+      total_nr_pages <- total_nr_pages + 1
+      select_rows_start <- (nr_pat * nr_plots_perpage * (page_nr-1)) + 1
+      select_rows_end   <-  nr_pat * nr_plots_perpage * page_nr
+      metab_onepage_pat <- metab_interest_perclass[select_rows_start:select_rows_end, ]
+      # same for controls
+      select_rows_start_contr <- (nr_contr * nr_plots_perpage * (page_nr-1)) + 1
+      select_rows_end_contr   <-  nr_contr * nr_plots_perpage * page_nr
+      metab_onepage_pcontr <- metab_interest_contr_perclass[select_rows_start_contr:select_rows_end_contr, ]
+      # add controls
+      metab_onepage <- rbind(metab_onepage_pat, metab_onepage_pcontr)
+      # if a page has fewer than nr_plots_perpage plots, fill page with empty plots
+      NA_rows <- which(is.na(metab_onepage$HMDB_name))
+      if (length(NA_rows) > 0) {
+        # repeat the patient and control variables
+        metab_onepage$variable[NA_rows] <- metab_onepage$variable[1:(nr_pat + nr_contr)]
+        # for HMDB name, substitute a number of spaces
+        for (row_nr in NA_rows) {
+          metab_onepage$HMDB_name[row_nr] <- paste0(rep("_", ceiling(row_nr/(nr_pat + nr_contr))), collapse = "")
+        }
+        metab_onepage$HMDB_name <- gsub("_", " ", metab_onepage$HMDB_name)
+        # leave the values at NA
+      }
+      # put data for one page into object with data for all pages
+      metab_perpage[[total_nr_pages]] <- metab_onepage
+      # create list of page headers
+      metab_category <- c(metab_category, paste(metab_class, page_nr, sep="_"))
+    }
+  }
+  # add page headers to list
+  names(metab_perpage) <- metab_category
+  
+  return(metab_perpage)
+  
+}
diff --git a/DIMS/AddOnFunctions/prepare_toplist.R b/DIMS/AddOnFunctions/prepare_toplist.R
new file mode 100644
index 0000000..369f887
--- /dev/null
+++ b/DIMS/AddOnFunctions/prepare_toplist.R
@@ -0,0 +1,29 @@
+prepare_toplist <- function(pt_name, zscore_patients_copy) {
+  # set parameters for table
+  top_highest <- 20
+  top_lowest <- 10
+  
+  # extract data for patient of interest (pt_name)
+  pt_list <- zscore_patients_copy[ , c(1,2, which(colnames(zscore_patients_copy) == pt_name))]
+  # sort metabolites on Z-scores for this patient
+  pt_list_sort <- sort(pt_list[ , 3], index.return=TRUE)
+  pt_list_sorted <- pt_list[pt_list_sort$ix, ]
+  # determine top highest and lowest Z-scores for this patient
+  pt_list_sort <- sort(pt_list[ , 3], index.return=TRUE)
+  pt_list_low  <- pt_list[pt_list_sort$ix[1:top_lowest], ]
+  pt_list_high <- pt_list[pt_list_sort$ix[length(pt_list_sort$ix):(length(pt_list_sort$ix)-top_highest+1)], ]
+  # round off Z-scores
+  pt_list_low[ , 3]  <- round(as.numeric(pt_list_low[ , 3]), 2)
+  pt_list_high[ , 3] <- round(as.numeric(pt_list_high[ , 3]), 2)
+  # add lines for increased, decreased
+  extra_line1 <- c("Increased", "", "") 
+  extra_line2 <- c("Decreased", "", "")
+  top_metab_pt <- rbind(extra_line1, pt_list_high, extra_line2, pt_list_low)
+  # remove row names
+  rownames(top_metab_pt) <- NULL
+  
+  # change column names for display
+  colnames(top_metab_pt) <- c("HMDB_ID", "Metabolite", "Z-score")
+  
+  return(top_metab_pt)
+}
diff --git a/DIMS/AddOnFunctions/remove.dupl.2.1.R b/DIMS/AddOnFunctions/remove.dupl.2.1.R
new file mode 100644
index 0000000..89e307d
--- /dev/null
+++ b/DIMS/AddOnFunctions/remove.dupl.2.1.R
@@ -0,0 +1,32 @@
+remove.dupl.2.1 <- function(peaklist) {
+  # peaklist = outlist.tot
+  # resultDir = "./results"  
+  # scanmode = "positive"  
+  
+  # peaklist=peaklist[1:100000,]
+  
+  collect=NULL
+  remove=NULL
+  
+  index = which(duplicated(peaklist[, "mzmed.pgrp"]))
+  
+  while (length(index) > 0){
+    
+    peaklist_index = which(peaklist[, "mzmed.pgrp"] == peaklist[index[1], "mzmed.pgrp"])
+    tmp=peaklist[peaklist_index[1],,drop=FALSE]
+    if (!is.na(peaklist[peaklist_index[1],"assi_HMDB"])) tmp[,"assi_HMDB"]=paste(peaklist[peaklist_index,"assi_HMDB"],collapse = ";") else tmp[,"assi_HMDB"]=NA
+    if (!is.na(peaklist[peaklist_index[1],"iso_HMDB"])) tmp[,"iso_HMDB"]=paste(peaklist[peaklist_index,"iso_HMDB"],collapse = ";") else tmp[,"iso_HMDB"]=NA
+    if (!is.na(peaklist[peaklist_index[1],"HMDB_code"])) tmp[,"HMDB_code"]=paste(peaklist[peaklist_index,"HMDB_code"],collapse = ";") else tmp[,"HMDB_code"]=NA
+    if (peaklist[peaklist_index[1],"assi_noise"]!="") tmp[,"assi_noise"]=paste(peaklist[peaklist_index,"assi_noise"],collapse = ";") else tmp[,"assi_noise"]=""
+    
+    collect = rbind(collect, tmp)
+    remove = c(remove, peaklist_index)
+    
+    index=index[-which(index==index[1])]
+  } 
+  
+  peaklist = peaklist[-remove,]
+  peaklist = rbind(peaklist,collect)
+  
+  return(peaklist)
+}
diff --git a/DIMS/AddOnFunctions/remove.dupl.R b/DIMS/AddOnFunctions/remove.dupl.R
new file mode 100644
index 0000000..227f207
--- /dev/null
+++ b/DIMS/AddOnFunctions/remove.dupl.R
@@ -0,0 +1,39 @@
+# remove duplicates, peak groups with mz within a few ppm
+# ppmdef should be 2 times bigger than during identification!!!
+remove.dupl <- function(peaklist, ppmdef=4, tolint=0.01) {
+  
+  #   peaklist = outpgrlist
+  #   ppmdef = 2
+  #   tolint = 0.01
+  
+  int.cols = 7:ncol(peaklist)
+  
+  p <- 1
+  while (p < nrow(peaklist)) {
+    mzref <- peaklist[p, "mzmed.pgrp"]
+    # print(mzref)
+    dist.ppm <- ppmdef * mzref / 1000000 
+    sel <- peaklist[ , "mzmed.pgrp"] > (mzref - dist.ppm) & peaklist[ , "mzmed.pgrp"] < (mzref + dist.ppm)
+    subset <- peaklist[sel, ]
+    if (nrow(subset) > 1) {
+      avi <- rep(1, max(int.cols))
+      for (c in int.cols) { avi[c] <- max(subset[ ,c])/mean(subset[ ,c]) }
+      
+      # remove NaN
+      avi[which(is.nan(avi))] = 1
+      
+      if (mean(avi) > (1-tolint) & mean(avi < (1+tolint))) { 
+        peaklist <- peaklist[-which(sel), ]
+        newline <- subset[1, ]
+        newline[ , "mzmed.pgrp"] <- mean(subset[ , "mzmed.pgrp"])
+        newline[ , "mzmin.pgrp"] <- min(subset[ , "mzmin.pgrp"])
+        newline[ , "mzmax.pgrp"] <- max(subset[ , "mzmax.pgrp"])
+        newline[ , int.cols] <- apply(subset[ , int.cols], 2, max)
+        # newline[ , "avg.int"] <- mean(as.numeric(newline[ , int.cols]))
+        peaklist <- rbind(peaklist, newline)
+      }
+    }
+    p <- p + 1
+  }
+  peaklist
+}
diff --git a/DIMS/AddOnFunctions/removeFromRepl.pat.R b/DIMS/AddOnFunctions/removeFromRepl.pat.R
new file mode 100644
index 0000000..ad5b1c8
--- /dev/null
+++ b/DIMS/AddOnFunctions/removeFromRepl.pat.R
@@ -0,0 +1,31 @@
+removeFromRepl.pat <- function(bad_samples, repl_pattern, nr_replicates) {
+
+  tmp = repl_pattern
+
+  removeFromGroup=NULL
+
+  for (i in 1:length(tmp)){
+    tmp2 = repl_pattern[[i]]
+
+    remove=NULL
+
+    for (j in 1:length(tmp2)){
+      if (tmp2[j] %in% bad_samples){
+        #cat(tmp2[j])
+        #cat(paste("remove",tmp2[j]))
+        #cat(paste("remove i",i))
+        #cat(paste("remove j",j))
+        remove = c(remove, j)
+      }
+    }
+
+    if (length(remove)==nr_replicates) removeFromGroup=c(removeFromGroup,i)
+    if (!is.null(remove)) repl_pattern[[i]]=repl_pattern[[i]][-remove]
+  }
+
+  if (length(removeFromGroup)!=0) {
+    repl_pattern=repl_pattern[-removeFromGroup]
+  }
+
+  return(list("pattern"=repl_pattern))
+}
diff --git a/DIMS/AddOnFunctions/replaceZeros.R b/DIMS/AddOnFunctions/replaceZeros.R
new file mode 100644
index 0000000..234d961
--- /dev/null
+++ b/DIMS/AddOnFunctions/replaceZeros.R
@@ -0,0 +1,90 @@
+replaceZeros <- function(outpgrlist, repl_pattern, scanmode, resol, outdir, thresh, ppm) {
+  # file="./results/grouping_rest/negative_1.RData"
+  # scanmode= "negative"
+  # scriptDir="./scripts"
+  # resol=140000
+  # thresh=2000
+  # outdir="./results"
+  
+  # control_label="C"
+  
+  # source(paste(scriptDir, "AddOnFunctions/sourceDir.R", sep="/"))
+  # sourceDir(paste(scriptDir, "AddOnFunctions", sep="/"))
+  
+  # dir.create(paste(outdir, "9-samplePeaksFilled", sep="/"), showWarnings = FALSE)
+  
+  # int.factor=1*10^5 # Number of x used to calc area under Gaussian (is not analytic)
+  # scale=2 # Initial value used to estimate scaling parameter
+  # width=1024
+  # height=768
+  
+  # load(paste0(outdir, "/repl.pattern.",scanmode, ".RData"))
+ 
+  # batch_number = strsplit(basename(HMDB_part_file), ".",fixed = TRUE)[[1]][2]
+  # name = as.vector(unlist(strsplit(file, "/", fixed=TRUE)))
+  # name = name[length(name)]
+  # message(paste("File name: ", name))
+  
+  # load samplePeaks
+  # load  outpgrlist
+  # load(file)
+  
+  # # filter on at least signal in two control samples
+  # int.cols = grep(control_label, colnames(outpgrlist),fixed = TRUE)
+  # # barplot(as.numeric(outpgrlist[753, int.cols]))
+  # keep = NULL
+  # keep = apply(outpgrlist, 1, function(x) if (length(which(as.numeric(x[int.cols]) > 0)) > 1) keep=c(keep,TRUE) else keep=c(keep,FALSE))
+  # outpgrlist = outpgrlist[keep,]
+  
+  # replace zeros
+  if (!is.null(outpgrlist)) {
+	  print(dim(outpgrlist))
+	  print(colnames(outpgrlist))
+    for (i in 1:length(names(repl_pattern))){
+	    print(names(repl_pattern)[i])
+      samplePeaks=outpgrlist[,names(repl_pattern)[i]]
+      index=which(samplePeaks<=0)
+      if (!length(index)){
+        next
+      }
+      for (j in 1:length(index)){
+        area = generateGaussian(outpgrlist[index[j],"mzmed.pgrp"],thresh,resol,FALSE,scanmode,int.factor=1*10^5,1,1)$area
+        # for testing purposes, add a fixed random seed
+        # set.seed(123)
+        outpgrlist[index[j], names(repl_pattern)[i]] = rnorm(n=1, mean=area, sd=0.25*area)
+      }
+    }
+  
+  
+    # Identification 
+    
+    # Add average column
+    outpgrlist = cbind(outpgrlist, "avg.int"=apply(outpgrlist[, 7:(ncol(outpgrlist)-4)], 1, mean))
+    
+    if (scanmode=="negative"){
+      label = "MNeg"
+      label2 = "Negative"
+      # take out multiple NaCl adducts
+      look4.add2 <- c("Cl", "Cl37", "For", "NaCl","KCl","H2PO4","HSO4","Na-H","K-H","H2O","I") # ,"min2H","min3H"
+      # HMDB_add_iso=HMDB_add_iso.Neg
+    } else {
+      label = "Mpos"
+      label2 = "Positive"
+      # take out NaCl adducts
+      look4.add2 <- c("Na", "K", "NaCl", "NH4","2Na-H","CH3OH","KCl","NaK-H") # ,"NaCl2","NaCl3","NaCl4","NaCl5")
+      # HMDB_add_iso=HMDB_add_iso.Pos
+    }
+    
+    # Identify noise peaks
+    noise.MZ <- read.table(file="/hpc/dbg_mz/tools/db/TheoreticalMZ_NegPos_incNaCl.txt", sep="\t", header=TRUE, quote = "")
+    noise.MZ <- noise.MZ[(noise.MZ[ , label] != 0), 1:4]
+    final.outlist.idpat2 = ident.hires.noise.HPC(outpgrlist, allAdducts, scanmode=label2, noise.MZ, look4=look4.add2, resol=resol, slope=0, incpt=0, ppm.fixed=ppm, ppm.iso.fixed=ppm)
+    tmp <- final.outlist.idpat2[ , c("assi", "theormz")]
+    colnames(tmp) <- c("assi_noise",  "theormz_noise")
+    
+    final.outlist.idpat3 <- cbind(outpgrlist, tmp)
+   
+    return(final.outlist.idpat3) 
+    # save(final.outlist.idpat3, file=paste("./", name, sep=""))
+  }
+}
diff --git a/DIMS/AddOnFunctions/replaceZeros_setseed.R b/DIMS/AddOnFunctions/replaceZeros_setseed.R
new file mode 100644
index 0000000..4fafc9a
--- /dev/null
+++ b/DIMS/AddOnFunctions/replaceZeros_setseed.R
@@ -0,0 +1,108 @@
+replaceZeros <- function(file,scanmode,resol,outdir,thresh,scriptDir,ppm){
+  # file="./results/grouping_rest/negative_1.RData"
+  # scanmode= "negative"
+  # scriptDir="./scripts"
+  # resol=140000
+  # thresh=2000
+  # outdir="./results"
+  
+  control_label="C"
+  
+  source(paste(scriptDir, "AddOnFunctions/sourceDir.R", sep="/"))
+  sourceDir(paste(scriptDir, "AddOnFunctions", sep="/"))
+  
+  dir.create(paste(outdir, "9-samplePeaksFilled", sep="/"), showWarnings = FALSE)
+  
+  # int.factor=1*10^5 # Number of x used to calc area under Gaussian (is not analytic)
+  # scale=2 # Initial value used to estimate scaling parameter
+  # width=1024
+  # height=768
+  
+  # message(paste("file", file))
+  # message(paste("scanmode", scanmode))
+  # message(paste("resol", resol))
+  # message(paste("outdir", outdir))
+  # message(paste("thresh", thresh))
+  # message(paste("scriptDir", scriptDir))
+  
+  load(paste0(outdir, "/repl.pattern.",scanmode, ".RData"))
+  
+  name = as.vector(unlist(strsplit(file, "/", fixed=TRUE)))
+  name = name[length(name)]
+  # message(paste("File name: ", name))
+  
+  # load samplePeaks
+  # load  outpgrlist
+  load(file)
+  
+  # #################################################################################
+  # # filter on at least signal in two control samples
+  # int.cols = grep(control_label, colnames(outpgrlist),fixed = TRUE)
+  # # barplot(as.numeric(outpgrlist[753, int.cols]))
+  # keep = NULL
+  # keep = apply(outpgrlist, 1, function(x) if (length(which(as.numeric(x[int.cols]) > 0)) > 1) keep=c(keep,TRUE) else keep=c(keep,FALSE))
+  # outpgrlist = outpgrlist[keep,]
+  # #################################################################################
+  
+  ################################################################################
+  # For now only replace zeros
+  if (!is.null(outpgrlist)) {
+    for (i in 1:length(names(repl.pattern.filtered))){
+      samplePeaks=outpgrlist[,names(repl.pattern.filtered)[i]]
+      index=which(samplePeaks<=0)
+      if (!length(index)){
+        next
+      }
+      for (j in 1:length(index)){
+        area = generateGaussian(outpgrlist[index[j],"mzmed.pgrp"],thresh,resol,FALSE,scanmode,int.factor=1*10^5,1,1)$area
+        # for testing purposes, add a fixed random seed
+        set.seed(123)
+        outpgrlist[index[j], names(repl.pattern.filtered)[i]] = rnorm(n=1, mean=area, sd=0.25*area)
+      }
+    }
+  
+  ################################################################################
+  
+  
+  #################### identification #########################################################
+    # load(paste(scriptDir, "../db/HMDB_add_iso_corrNaCl.RData", sep="/")) # E:\Metabolomics\LargeDataBase\Apr25_2016
+    
+    # Add average column
+    outpgrlist = cbind(outpgrlist, "avg.int"=apply(outpgrlist[, 7:(ncol(outpgrlist)-4)], 1, mean))
+    
+    if (scanmode=="negative"){
+      label = "MNeg"
+      label2 = "Negative"
+      # take out multiple NaCl adducts
+      look4.add2 <- c("Cl", "Cl37", "For", "NaCl","KCl","H2PO4","HSO4","Na-H","K-H","H2O","I") # ,"min2H","min3H"
+      # HMDB_add_iso=HMDB_add_iso.Neg
+    } else {
+      label = "Mpos"
+      label2 = "Positive"
+      # take out NaCl adducts
+      look4.add2 <- c("Na", "K", "NaCl", "NH4","2Na-H","CH3OH","KCl","NaK-H") # ,"NaCl2","NaCl3","NaCl4","NaCl5")
+      # HMDB_add_iso=HMDB_add_iso.Pos
+    }
+    
+    # # Identification using large database
+    # final.outlist.idpat = iden.code(outpgrlist, HMDB_add_iso, ppm=2, label)
+    # message(paste(sum(final.outlist.idpat[ , "assi_HMDB"] != ""), "assigned peakgroups"))
+    # message(paste(sum(final.outlist.idpat[ , "iso_HMDB"] != ""), "assigned isomeres"))
+    
+    # Identify noise peaks
+    noise.MZ <- read.table(file="/hpc/dbg_mz/tools/db/TheoreticalMZ_NegPos_incNaCl.txt", sep="\t", header=TRUE, quote = "")
+    noise.MZ <- noise.MZ[(noise.MZ[ , label] != 0), 1:4]
+    
+    # Replace "Negative" by "negative" in ident.hires.noise
+    final.outlist.idpat2 = ident.hires.noise.HPC(outpgrlist, allAdducts, scanmode=label2, noise.MZ, look4=look4.add2, resol=resol, slope=0, incpt=0, ppm.fixed=ppm, ppm.iso.fixed=ppm)
+    # message(paste(sum(final.outlist.idpat2[ , "assi"] != ""), "assigned noise peaks"))
+    tmp <- final.outlist.idpat2[ , c("assi", "theormz")]
+    colnames(tmp) <- c("assi_noise",  "theormz_noise")
+    
+    final.outlist.idpat3 <- cbind(outpgrlist, tmp)
+    #############################################################################################
+    
+    # message(paste("File saved: ", paste(outdir, "/samplePeaksFilled/", name, sep="")))
+    save(final.outlist.idpat3, file=paste(outdir, "/9-samplePeaksFilled/", name, sep=""))
+  }
+}
diff --git a/DIMS/AddOnFunctions/run.vbs b/DIMS/AddOnFunctions/run.vbs
new file mode 100644
index 0000000..640116a
--- /dev/null
+++ b/DIMS/AddOnFunctions/run.vbs
@@ -0,0 +1,22 @@
+Option Explicit
+On Error Resume Next
+RunExcelMacro
+
+Sub RunExcelMacro() 
+
+Dim xlApp 
+Dim xlBook 
+Dim xlBook_persenal
+
+Set xlApp = CreateObject("Excel.Application")
+xlApp.DisplayAlerts = FALSE
+Set xlBook_persenal = xlApp.Workbooks.Open("C:\Users\awillem8\AppData\Roaming\Microsoft\Excel\XLSTART\PERSONAL.XLSB", 0, True)
+Set xlBook = xlApp.Workbooks.Open("C:\Users\mkerkho7\testmap\xls\P181_Aberant_HMDB.xlsx", 0, True)
+xlApp.Run "PERSONAL.XLSB!FixAndResize"
+xlBook.SaveAs "C:\Users\mkerkho7\testmap\results\P181_Aberant_HMDB.xlsx"
+xlBook.Close False 
+xlApp.Quit
+Set xlBook = Nothing
+Set xlBook_persenal = Nothing 
+Set xlApp = Nothing 
+End Sub
diff --git a/DIMS/AddOnFunctions/runVBAMacro.R b/DIMS/AddOnFunctions/runVBAMacro.R
new file mode 100644
index 0000000..3836b2f
--- /dev/null
+++ b/DIMS/AddOnFunctions/runVBAMacro.R
@@ -0,0 +1,48 @@
+runVBAMacro <- function(dir, dir2, vb_script) {
+  
+#   dir="E:\\Metabolomics\\Lynne_BSP-2015-08-05\\results\\xls\\"
+#   dir2="E:\\Metabolomics\\Lynne_BSP-2015-08-05\\results\\xls_fixed\\"  
+  
+dir.create(dir2)
+files=list.files(dir)
+
+script_1 = paste("Option Explicit
+On Error Resume Next
+RunExcelMacro
+
+Sub RunExcelMacro() 
+
+Dim xlApp 
+Dim xlBook 
+Dim xlBook_persenal
+
+Set xlApp = CreateObject(\"Excel.Application\")
+xlApp.DisplayAlerts = FALSE
+Set xlBook_persenal = xlApp.Workbooks.Open(\"C:\\Users\\awillem8\\AppData\\Roaming\\Microsoft\\Excel\\XLSTART\\PERSONAL.XLSB\", 0, True)
+Set xlBook = xlApp.Workbooks.Open(\"", dir, sep="") 
+
+script_2 = "\", 0, True)
+xlApp.Run \"PERSONAL.XLSB!FixAndResize\"
+xlBook.SaveAs \""
+
+script_3 = "\"
+xlBook.Close False 
+xlApp.Quit
+Set xlBook = Nothing
+Set xlBook_persenal = Nothing 
+Set xlApp = Nothing 
+End Sub"
+
+for (i in 1:length(files)){
+  script = paste(script_1, files[i], script_2, dir2, files[i], script_3, sep="")  # dir2, files[i], 
+  
+  message(script)
+  
+  fileConn = file("./src/run.vbs")
+  writeLines(script, fileConn)
+  close(fileConn)
+  
+  system(vb_script)
+}
+}
+
diff --git a/DIMS/AddOnFunctions/searchMZRange.R b/DIMS/AddOnFunctions/searchMZRange.R
new file mode 100644
index 0000000..43a7171
--- /dev/null
+++ b/DIMS/AddOnFunctions/searchMZRange.R
@@ -0,0 +1,187 @@
+searchMZRange <- function(range,values,int.factor,scale,resol,outdir,sampname,scanmode,plot,width,height,thresh){
+  # range=sub_range
+  
+  end=NULL
+  index=as.vector(which(range!=0))
+  
+  # bad infusion
+  if (length(index)==0) return(values)
+  
+  start=index[1]
+  subRangeLength = 15
+  
+  for (i in 1:length(index)){
+    # for (i in 1:129000){
+    
+    if (i<length(index) & (index[i+1] - index[i]) > 1){
+      
+      end=index[i]
+      
+      # start=395626
+      # end=395640
+      # i=25824
+      
+      # gaan met de banaan
+      # 128836
+      # start 1272853
+      # mz start 316.83302
+      # end 1272870
+      # mz end 316.84269
+      
+      x = as.numeric(names(range)[c(start:end)])
+      y = as.vector(range[c(start:end)])
+      #       # Trim zeros
+      #       x = as.vector(trimZeros(x,y)[[1]])
+      #       y = as.vector(trimZeros(x,y)[[2]])
+      
+      if (length(y)!=0) {
+        
+        # check if intensity above thresh
+        if (max(y) < thresh | is.nan(max(y))) {
+          start=index[i+1]
+          next
+        }
+        
+        # cat("gaan met de banaan")
+        # cat(i)
+        # cat(paste("start", start, sep=" "))
+        # cat(paste("mz start", x[1], sep=" "))
+        # cat(paste("end", end, sep=" "))
+        # cat(paste("mz end", x[length(x)], sep=" "))
+        
+        if (length(y)>subRangeLength) {
+          
+          y[which(y<min(y)*1.1)] = 0
+          sub_range = y
+          names(sub_range) = x
+          
+          # Range to long!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+          values = searchMZRange(sub_range,values,int.factor,scale,resol,outdir,sampname,scanmode,plot,width,height,thresh)
+          
+        } else if (length(y)>3) {
+          # Check only zeros
+          if (sum(y)==0) next
+          
+          rval = fitGaussianInit(x,y,int.factor,scale,resol,outdir,sampname, scanmode, plot,width,height,i,start,end)
+          
+          if (rval$qual[1]==1) {
+            
+            rval = generateGaussian(x,y,resol,plot,scanmode,int.factor, width, height)
+            
+            values$mean = c(values$mean, rval$mean)
+            values$area = c(values$area, rval$area)
+            values$nr = c(values$nr, sampname)
+            values$min = c(values$min, rval$min)
+            values$max = c(values$max, rval$max)
+            values$qual = c(values$qual, 0)
+            
+            values$spikes = values$spikes + 1
+            
+          } else {
+            for (j in 1:length(rval$mean)){
+              values$mean = c(values$mean, rval$mean[j])
+              values$area = c(values$area, rval$area[j])
+              values$nr = c(values$nr, sampname)
+              values$min = c(values$min, rval$min[1])
+              values$max = c(values$max, rval$max[1])
+              values$qual = c(values$qual, rval$qual[1])
+            }
+          }
+          
+        } else {
+          
+          rval = generateGaussian(x,y,resol,plot,scanmode,int.factor, width, height)
+          
+          values$mean = c(values$mean, rval$mean)
+          values$area = c(values$area, rval$area)
+          values$nr = c(values$nr, sampname)
+          values$min = c(values$min, rval$min)
+          values$max = c(values$max, rval$max)
+          values$qual = c(values$qual, 0)
+          
+          values$spikes = values$spikes + 1
+        }
+      }
+      start=index[i+1]
+    }
+  }
+  
+  # last little range
+  end = index[length(index)]
+  x = as.numeric(names(range)[c(start:end)])
+  y = as.vector(range[c(start:end)])
+  
+  #   x = as.vector(trimZeros(x,y)[[1]])
+  #   y = as.vector(trimZeros(x,y)[[2]])
+  
+  if (length(y)!=0) {
+    
+    # check if intensity above thresh
+    if (max(y) < thresh | is.nan(max(y))) {
+      #start=index[i+1]
+      # do nothing!!
+    } else {
+      
+      #       cat("gaan met de banaan")
+      #       cat(paste("start", start, sep=" "))
+      #       cat(paste("mz start", x[1], sep=" "))
+      #       cat(paste("end", end, sep=" "))
+      #       cat(paste("mz end", x[length(x)], sep=" "))
+      
+      if (length(y)>subRangeLength) {
+        
+        y[which(y<min(y)*1.1)] = 0
+        sub_range = y
+        names(sub_range) = x
+        
+        # Range to long!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+        values = searchMZRange(sub_range,values,int.factor,scale,resol,outdir,sampname,scanmode,plot,width,height,thresh)
+        
+      } else if (length(y)>3) {
+        
+        # Check only zeros
+        if (sum(y)==0) next
+        
+        rval = fitGaussianInit(x,y,int.factor,scale,resol,outdir,sampname,scanmode,plot,width,height,i,start,end)
+        
+        if (rval$qual[1]==1) {
+          #cat("Quality = 1!!!")
+          rval = generateGaussian(x,y,resol,plot,scanmode,int.factor, width, height)
+          
+          values$mean = c(values$mean, rval$mean)
+          values$area = c(values$area, rval$area)
+          values$nr = c(values$nr, sampname)
+          values$min = c(values$min, rval$min)
+          values$max = c(values$max, rval$max)
+          values$qual = c(values$qual, 0)
+          
+          values$spikes = values$spikes + 1
+          
+        } else {
+          for (j in 1:length(rval$mean)){
+            values$mean = c(values$mean, rval$mean[j])
+            values$area = c(values$area, rval$area[j])
+            values$nr = c(values$nr, sampname)
+            values$min = c(values$min, rval$min[1])
+            values$max = c(values$max, rval$max[1])
+            values$qual = c(values$qual, rval$qual[1])
+          }
+        }
+      } else {
+        rval = generateGaussian(x,y,resol,plot,scanmode,int.factor, width, height)
+        
+        values$mean = c(values$mean, rval$mean)
+        values$area = c(values$area, rval$area)
+        values$nr = c(values$nr, sampname)
+        values$min = c(values$min, rval$min)
+        values$max = c(values$max, rval$max)
+        values$qual = c(values$qual, 0)
+        
+        values$spikes = values$spikes + 1
+      }
+    }
+  }
+  
+  return(values)
+  #return(list("mean"=retVal$mean, "area"=retVal$area, "qual"=retVal$qual, "min"=retVal$min, "max"=retVal$max, "nr"=sample.nr, "spikes"=spikes))
+}
diff --git a/DIMS/AddOnFunctions/sourceDir.R b/DIMS/AddOnFunctions/sourceDir.R
new file mode 100644
index 0000000..abf4c9e
--- /dev/null
+++ b/DIMS/AddOnFunctions/sourceDir.R
@@ -0,0 +1,8 @@
+# source add on functions
+sourceDir <- function(path, trace = TRUE, ...) {
+  for (nm in list.files(path, pattern = "[.][RrSsQq]$")) {
+    #if(trace) cat(nm,":")
+    source(file.path(path, nm), ...)
+    #if(trace) cat("\n")
+  }
+}
diff --git a/DIMS/AddOnFunctions/statistics_z.R b/DIMS/AddOnFunctions/statistics_z.R
new file mode 100644
index 0000000..0a92d3d
--- /dev/null
+++ b/DIMS/AddOnFunctions/statistics_z.R
@@ -0,0 +1,69 @@
+statistics_z <- function(peaklist, sortCol, adducts){
+  # peaklist=as.data.frame(outlist.adducts.HMDB)
+  # plotdir="./results/plots/adducts"
+  # filename="./results/allpgrps_stats.txt"
+  # adducts=TRUE
+  
+  # peaklist=outlist.tot
+  # sortCol=NULL
+  # adducts=FALSE
+  
+  case_label = "P"
+  control_label = "C"
+  startcol = dim(peaklist)[2]+3 
+  
+  # calculate mean and sd for Control group
+  ctrl.cols <- grep(control_label, colnames(peaklist),fixed = TRUE) # 5:41
+  int.cols <- c(grep(control_label, colnames(peaklist),fixed = TRUE), grep(case_label, colnames(peaklist),fixed = TRUE))
+  peaklist[,int.cols][peaklist[,int.cols]==0] = NA
+  
+  # tmp = data.matrix(peaklist[ , ctrl.cols], rownames.force = TRUE)
+  tmp = peaklist[ , ctrl.cols, drop=FALSE]
+  
+  peaklist$avg.ctrls = apply(tmp, 1, function(x) mean(as.numeric(x),na.rm = TRUE))
+  peaklist$sd.ctrls = apply(tmp, 1, function(x) sd(as.numeric(x),na.rm = TRUE))
+  
+  cnames.z = NULL
+  
+  for (i in int.cols) {
+    # message(i)
+    cname = colnames(peaklist)[i]
+    cnames.z = c(cnames.z, paste(cname, "Zscore", sep="_"))
+    zscores.1col = (as.numeric(as.vector(unlist(peaklist[ , i]))) - peaklist$avg.ctrls) / peaklist$sd.ctrls
+    peaklist = cbind(peaklist, zscores.1col)
+  }
+  
+  colnames(peaklist)[startcol:ncol(peaklist)] = cnames.z
+  
+  z.cols = grep("Zscore", colnames(peaklist),fixed = TRUE)
+  
+  if (!adducts){
+    if ((dim(peaklist[, z.cols])[2]+6)!=(startcol-1)){
+      ppmdev=array(1:dim(peaklist)[1], dim=c(dim(peaklist)[1]))
+      
+      # calculate ppm deviation
+      for (i in 1:dim(peaklist)[1]){
+        if (!is.na(peaklist$theormz_HMDB[i]) & !is.null(peaklist$theormz_HMDB[i]) & (peaklist$theormz_HMDB[i]!="")){
+          ppmdev[i] = 10^6*(as.numeric(as.vector(peaklist$mzmed.pgrp[i]))-as.numeric(as.vector(peaklist$theormz_HMDB[i])))/as.numeric(as.vector(peaklist$theormz_HMDB[i]))
+        } else {
+          ppmdev[i]=NA
+        }
+      }
+      
+      peaklist = cbind(peaklist[, 1:6], ppmdev=ppmdev, peaklist[ , 7:ncol(peaklist)])
+    }  
+  }
+  
+  #peaklist = peaklist[order(peaklist[,sortCol]),]
+  
+  #   # Order on average Z-score
+  #   tmp = peaklist[,grep("Zscore", colnames(peaklist))]
+  #   tmp.p = tmp[,grep("P", colnames(tmp)),drop=FALSE]
+  #   tmp.p.avg = apply(tmp.p, 1, mean)
+  #   
+  #   peaklist = cbind(peaklist, "avg.z.score"=tmp.p.avg)
+  #   peaklist = peaklist[order(abs(tmp.p.avg), decreasing = TRUE),]
+  
+  return(peaklist)
+  
+}  
diff --git a/DIMS/AddOnFunctions/sumCurves.R b/DIMS/AddOnFunctions/sumCurves.R
new file mode 100644
index 0000000..bcc1485
--- /dev/null
+++ b/DIMS/AddOnFunctions/sumCurves.R
@@ -0,0 +1,39 @@
+sumCurves <- function(mean1, mean2, scale1, scale2, sigma1, sigma2, x2, x, resol, plot) {
+  #   mean1=mean[i-1]
+  #   mean2=mean[i]
+  #   scale1=scale[i-1]
+  #   scale2=scale[i]
+  #   sigma1=sigma[i-1]
+  #   sigma2=sigma[i]
+  
+  # message("=============================================================> sum 2 curves!")
+  
+  sumFit=(scale1*dnorm(x2,mean1,sigma1))+(scale2*dnorm(x2,mean2,sigma2))
+  if (plot) lines(x2,sumFit,col="brown")
+  
+  #mean1plus2 = mean(c(mean1,mean2))
+  mean1plus2 = weighted.mean(c(mean1,mean2),c(max(scale1*dnorm(x2,mean1,sigma1)),max(scale2*dnorm(x2,mean2,sigma2))))
+  
+  if (plot) abline(v = mean1plus2, col="brown")
+  fwhm = getFwhm(mean1plus2, resol)
+  half_max = max(sumFit)*0.5
+  if (plot) lines(c(mean1plus2 - 0.5*fwhm, mean1plus2 + 0.5*fwhm),c(half_max,half_max),col="orange")
+  
+  # sumFit=(scale1*dnorm(x,mean1,sigma1))+(scale2*dnorm(x,mean2,sigma2))
+  #  fq=abs(sum(y) - sum(sumFit))/sum(y)
+  
+  #   h2=c(paste("mean =", mean1plus2, sep=" "),
+  #        paste("fq =", fq, sep=" "))
+  #   
+  #   legend("topright", legend=h2)
+  
+  # I assume that the sum of the distributions if also normal, which is not
+  #area = sum(scale1*dnorm(x2,mean1,sigma1))+sum(scale2*dnorm(x2,mean2,sigma2))
+  #area = max(scale1*dnorm(x2,mean1,sigma1))+max(scale2*dnorm(x2,mean2,sigma2))
+  area = max(sumFit) 
+  scale = scale1 + scale2
+  sigma = (fwhm/2)*0.85
+  
+  return(list("mean"= mean1plus2,"area"=area, "scale"=scale, "sigma"=sigma)) # "qual"=fq
+  
+} 
diff --git a/DIMS/AddOnFunctions/trimZeros.R b/DIMS/AddOnFunctions/trimZeros.R
new file mode 100644
index 0000000..0fc5ea3
--- /dev/null
+++ b/DIMS/AddOnFunctions/trimZeros.R
@@ -0,0 +1,8 @@
+trimZeros <- function(x, y) {
+  tmp = which(y==0)
+  if (length(tmp)!=0){
+    y = y[-tmp]
+    x = x[-tmp]
+  }
+  return(list(x,y))
+}
diff --git a/DIMS/AssignToBins.R b/DIMS/AssignToBins.R
new file mode 100644
index 0000000..4ca7fce
--- /dev/null
+++ b/DIMS/AssignToBins.R
@@ -0,0 +1,128 @@
+## adapted from 2-DIMS.R
+
+# load required packages
+suppressPackageStartupMessages(library("xcms"))
+
+# define parameters
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+mzml_filepath <- cmd_args[1]
+breaks_filepath <- cmd_args[2]
+resol <- as.numeric(cmd_args[3])
+trim <- 0.1
+dims_thresh <- 100
+
+# load breaks_fwhm
+load(breaks_filepath)
+
+# get sample name
+sample_name <- sub("\\..*$", "", basename(mzml_filepath))
+
+options(digits = 16)
+
+# Initialize
+pos_results <- NULL
+neg_results <- NULL
+
+# read in the data for 1 sample
+raw_data <- suppressMessages(xcms::xcmsRaw(mzml_filepath))
+
+# for TIC plots: prepare txt files with data for plots
+tic_intensity_persample <- cbind(round(raw_data@scantime, 2), raw_data@tic)
+colnames(tic_intensity_persample) <- c("retention_time", "tic_intensity")
+write.table(tic_intensity_persample, file = paste0(sample_name, "_TIC.txt"))
+
+# Create empty placeholders for later use
+bins <- rep(0, length(breaks_fwhm) - 1)
+pos_bins <- bins
+neg_bins <- bins
+
+# Generate a matrix
+raw_data_matrix <- xcms::rawMat(raw_data)
+
+# Get time values for positive and negative scans
+pos_times <- raw_data@scantime[raw_data@polarity == "positive"]
+neg_times <- raw_data@scantime[raw_data@polarity == "negative"]
+# Select scans between trim_left and trim_right
+pos_times <- pos_times[pos_times > trim_left & pos_times < trim_right]
+neg_times <- neg_times[neg_times > trim_left & neg_times < trim_right]
+
+# Generate an index with which to select values for each mode
+pos_index <- which(raw_data_matrix[, "time"] %in% pos_times)
+neg_index <- which(raw_data_matrix[, "time"] %in% neg_times)
+# Separate each mode into its own matrix
+pos_raw_data_matrix <- raw_data_matrix[pos_index, ]
+neg_raw_data_matrix <- raw_data_matrix[neg_index, ]
+
+# Get index for binning intensity values
+bin_indices_pos <- cut(
+  pos_raw_data_matrix[, "mz"], 
+  breaks_fwhm,
+  include.lowest = TRUE, 
+  right = TRUE, 
+  labels = FALSE
+)
+bin_indices_neg <- cut(
+  neg_raw_data_matrix[, "mz"], 
+  breaks_fwhm, 
+  include.lowest = TRUE, 
+  right = TRUE, 
+  labels = FALSE
+)
+
+# Get the list of intensity values for each bin, and add the
+# intensity values which are in the same bin
+if (nrow(pos_raw_data_matrix) > 0) {
+  # set NA in intensities to zero
+  pos_raw_data_matrix[is.na(pos_raw_data_matrix[, "intensity"]), "intensity"] <- 0
+  # aggregate intensities, calculate mean, use only values above dims_thresh
+  aggr_int_pos <- stats::aggregate(pos_raw_data_matrix[, "intensity"],
+				   list(bin_indices_pos),
+				   FUN = function(x) { mean(x[which(x > dims_thresh)]) })
+  # set NA to zero in second column
+  aggr_int_pos[is.na(aggr_int_pos[, 2]), 2] <- 0
+  pos_bins[aggr_int_pos[, 1]] <- aggr_int_pos[, 2]
+}
+if (nrow(neg_raw_data_matrix) > 0) {
+  # set NA in intensities to zero
+  neg_raw_data_matrix[is.na(neg_raw_data_matrix[, "intensity"]), "intensity"] <- 0
+  # aggregate intensities, calculate mean, use only values above dims_thresh
+  aggr_int_neg <- stats::aggregate(neg_raw_data_matrix[, "intensity"],
+				   list(bin_indices_neg),
+				   FUN = function(x) { mean(x[which(x > dims_thresh)]) })
+  # set NA to zero in second column
+  aggr_int_neg[is.na(aggr_int_neg[, 2]), 2] <- 0
+  neg_bins[aggr_int_neg[, 1]] <- aggr_int_neg[, 2]
+}
+
+# Zero any values that are below the threshold
+pos_bins[pos_bins < dims_thresh] <- 0
+neg_bins[neg_bins < dims_thresh] <- 0
+
+pos_results <- cbind(pos_results, pos_bins)
+neg_results <- cbind(neg_results, neg_bins)
+
+# transpose
+pos_results_transpose <- t(pos_results)
+neg_results_transpose <- t(neg_results)
+
+# Add file names as row names
+rownames(pos_results_transpose) <- sample_name
+rownames(neg_results_transpose) <- sample_name
+
+# delete the last value of breaks_fwhm_avg to match dimensions of pos_results and neg_results
+breaks_fwhm_avg_minuslast <- breaks_fwhm_avg[-length(breaks_fwhm_avg)]
+# Format as string and show precision of float to 5 digits
+breaks_fwhm_avg_minuslast <- sprintf("%.5f", breaks_fwhm_avg_minuslast)
+
+# Use this as the column names
+colnames(pos_results_transpose) <- breaks_fwhm_avg_minuslast
+colnames(neg_results_transpose) <- breaks_fwhm_avg_minuslast
+
+# transpose back
+pos_results_final <- t(pos_results_transpose)
+neg_results_final <- t(neg_results_transpose)
+
+peak_list <- list("pos" = pos_results_final, "neg" = neg_results_final, "breaksFwhm" = breaks_fwhm)
+
+save(peak_list, file = paste0(sample_name, ".RData"))
diff --git a/DIMS/AssignToBins.nf b/DIMS/AssignToBins.nf
new file mode 100644
index 0000000..d0a7098
--- /dev/null
+++ b/DIMS/AssignToBins.nf
@@ -0,0 +1,20 @@
+process AssignToBins {
+    tag "DIMS AssignToBins ${file_id}"
+    label 'AssignToBins'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       tuple(val(file_id), path(mzML_file), path(breaks_file))
+
+    output:
+       path("${file_id}.RData"), emit: rdata_file
+       path("${file_id}_TIC.txt"), emit: tic_txt_file
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/AssignToBins.R $mzML_file $breaks_file $params.resolution
+        """
+}
+
+
diff --git a/DIMS/AverageTechReplicates.R b/DIMS/AverageTechReplicates.R
new file mode 100644
index 0000000..6a773f5
--- /dev/null
+++ b/DIMS/AverageTechReplicates.R
@@ -0,0 +1,190 @@
+# adapted from 3-AverageTechReplicates.R
+
+# load packages
+library("ggplot2")
+library("gridExtra")
+
+# define parameters
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+init_file <- cmd_args[1]
+nr_replicates <- as.numeric(cmd_args[2])
+run_name <- cmd_args[3]
+dims_matrix <- cmd_args[4]
+highest_mz_file <- cmd_args[5]
+highest_mz <- get(load(highest_mz_file))
+thresh2remove <- 1000000000
+dims_thresh <- 100
+
+remove_from_repl_pattern <- function(bad_samples, repl_pattern, nr_replicates) {
+  # collect list of samples to remove from replication pattern
+  remove_from_group <- NULL
+  for (sample_nr in 1:length(repl_pattern)){
+    repl_pattern_1sample <- repl_pattern[[sample_nr]]
+    remove <- NULL
+    for (file_nr in 1:length(repl_pattern_1sample)) {
+      if (repl_pattern_1sample[file_nr] %in% bad_samples) {
+        remove <- c(remove, file_nr)
+      }
+    }
+    if (length(remove) == nr_replicates) {
+      remove_from_group <- c(remove_from_group, sample_nr)
+    }
+    if (!is.null(remove)) {
+      repl_pattern[[sample_nr]] <- repl_pattern[[sample_nr]][-remove]
+    }
+  }
+  if (length(remove_from_group) != 0) {
+    repl_pattern <- repl_pattern[-remove_from_group]
+  }
+  return(list("pattern" = repl_pattern))
+}
+
+# get repl_pattern
+load(init_file)
+
+# lower the threshold below which a sample will be removed for DBS and for high m/z
+if (dims_matrix == "DBS") {
+  thresh2remove <- 500000000
+}
+if (highest_mz > 700) {
+  thresh2remove <- 1000000
+}
+
+# remove technical replicates which are below the threshold
+remove_neg <- NULL
+remove_pos <- NULL
+cat("Pklist sum threshold to remove technical replicate:", thresh2remove, "\n")
+for (sample_nr in 1:length(repl_pattern)) {
+  tech_reps <- as.vector(unlist(repl_pattern[sample_nr]))
+  tech_reps_array_pos <- NULL
+  tech_reps_array_neg <- NULL
+  sum_neg <- 0
+  sum_pos <- 0
+  nr_pos <- 0
+  nr_neg <- 0
+  for (file_nr in 1:length(tech_reps)) {
+    load(paste(tech_reps[file_nr], ".RData", sep = ""))
+    cat("\n\nParsing", tech_reps[file_nr])
+    # negative scanmode
+    cat("\n\tNegative peak_list sum", sum(peak_list$neg[, 1]))
+    if (sum(peak_list$neg[, 1]) < thresh2remove) {
+      cat(" ... Removed")
+      remove_neg <- c(remove_neg, tech_reps[file_nr])
+    } else {
+      nr_neg <- nr_neg + 1
+      sum_neg <- sum_neg + peak_list$neg
+    }
+    tech_reps_array_neg <- cbind(tech_reps_array_neg, peak_list$neg)
+    # positive scanmode
+    cat("\n\tPositive peak_list sum", sum(peak_list$pos[, 1]))
+    if (sum(peak_list$pos[, 1]) < thresh2remove) {
+      cat(" ... Removed")
+      remove_pos <- c(remove_pos, tech_reps[file_nr])
+    } else {
+      nr_pos <- nr_pos + 1
+      sum_pos <- sum_pos + peak_list$pos
+    }
+    tech_reps_array_pos <- cbind(tech_reps_array_pos, peak_list$pos)
+  }
+  # save to file  
+  if (nr_neg != 0) {
+    sum_neg[, 1] <- sum_neg[, 1] / nr_neg
+    colnames(sum_neg) <- names(repl_pattern)[sample_nr]
+    save(sum_neg, file = paste0(names(repl_pattern)[sample_nr], "_neg_avg.RData"))
+  }
+  if (nr_pos != 0) {
+    sum_pos[, 1] <- sum_pos[, 1] / nr_pos
+    colnames(sum_pos) <- names(repl_pattern)[sample_nr]
+    save(sum_pos, file = paste0(names(repl_pattern)[sample_nr], "_pos_avg.RData"))
+  }
+}
+
+pattern_list <- remove_from_repl_pattern(remove_neg, repl_pattern, nr_replicates)
+repl_pattern_filtered <- pattern_list$pattern
+save(repl_pattern_filtered, file = "negative_repl_pattern.RData")
+write.table(
+  remove_neg, 
+  file = "miss_infusions_negative.txt", 
+  row.names = FALSE, 
+  col.names = FALSE, 
+  sep = "\t"
+)
+
+pattern_list <- remove_from_repl_pattern(remove_pos, repl_pattern, nr_replicates)
+repl_pattern_filtered <- pattern_list$pattern
+save(repl_pattern_filtered, file = "positive_repl_pattern.RData")
+write.table(
+  remove_pos, 
+  file = "miss_infusions_positive.txt", 
+  row.names = FALSE, 
+  col.names = FALSE, 
+  sep = "\t"
+)
+
+## generate TIC plots
+# get all txt files
+tic_files <- list.files("./", full.names = TRUE, pattern = "*TIC.txt")
+all_samps <- sub("_TIC\\..*$", "", basename(tic_files))
+
+# determine maximum intensity
+highest_tic_max <- 0
+for (file in tic_files) {
+  tic <- read.table(file)
+  this_tic_max <- max(tic$tic_intensity)
+  if (this_tic_max > highest_tic_max) {
+    highest_tic_max <- this_tic_max
+    max_sample <- sub("_TIC\\..*$", "", basename(file))
+  }
+}
+
+# create a list with information for all TIC plots
+tic_plot_list <- list()
+plot_nr <-  0
+for (sample_nr in c(1:length(repl_pattern))) {
+  tech_reps <- as.vector(unlist(repl_pattern[sample_nr]))
+  sample_name <- names(repl_pattern)[sample_nr]
+  for (file_nr in 1:length(tech_reps)) {
+    plot_nr <- plot_nr + 1
+    # repl1_nr <- read.table(paste(paste(outdir, "2-pklist/", sep = "/"), tech_reps[file_nr], "_TIC.txt", sep = ""))
+    repl1_nr <- read.table(paste0(tech_reps[file_nr], "_TIC.txt"))
+    bad_color_pos <- tech_reps[file_nr] %in% remove_pos[[1]]
+    bad_color_neg <- tech_reps[file_nr] %in% remove_neg[[1]]
+    if (bad_color_neg & bad_color_pos) {
+      plot_color <- "#F8766D"
+    } else if (bad_color_pos) {
+      plot_color <- "#ED8141"
+    } else if (bad_color_neg) {
+      plot_color <- "#BF80FF"
+    } else {
+      plot_color <- "white"
+    }
+    tic_plot <- ggplot(repl1_nr, aes(retention_time, tic_intensity)) +
+      geom_line(linewidth = 0.3) +
+      geom_hline(yintercept = highest_tic_max, col = "grey", linetype = 2, linewidth = 0.3) +
+      labs(x = "t (s)", y = "tic_intensity", title = paste0(tech_reps[file_nr], "  ||  ", sample_name)) +
+      theme(plot.background = element_rect(fill = plot_color),
+            axis.text = element_text(size = 4),
+            axis.title = element_text(size = 4),
+            plot.title = element_text(size = 6))
+    tic_plot_list[[plot_nr]] <- tic_plot
+  }
+}
+
+# create a layout matrix dependent on number of replicates
+layout <- matrix(1:(10 * nr_replicates), 10, nr_replicates, TRUE)
+# put TIC plots in matrix
+tic_plot_pdf <- marrangeGrob(
+  grobs = tic_plot_list,
+  nrow = 10, ncol = nr_replicates,
+  layout_matrix = layout,
+  top = quote(paste(
+    "TICs of run", run_name,
+    " \n colors: red = both modes misinjection, orange = pos mode misinjection, purple = neg mode misinjection \n ",
+    g, "/", npages
+  ))
+)
+
+# save to file
+ggsave(filename = paste0(run_name, "_TICplots.pdf"),
+       tic_plot_pdf, width = 21, height = 29.7, units = "cm")
diff --git a/DIMS/AverageTechReplicates.nf b/DIMS/AverageTechReplicates.nf
new file mode 100644
index 0000000..5387e61
--- /dev/null
+++ b/DIMS/AverageTechReplicates.nf
@@ -0,0 +1,33 @@
+process AverageTechReplicates {
+    tag "DIMS AverageTechReplicates"
+    label 'AverageTechReplicates'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       path(rdata_file)
+       path(tic_txt_files)
+       path(init_file)
+       val(nr_replicates)
+       val(analysis_id)
+       val(matrix)
+       path(highest_mz_file)
+
+    output:
+       path('*_repl_pattern.RData'), emit: pattern_files
+       path('*_avg.RData'), emit: binned_files
+       path('miss_infusions_negative.txt')
+       path('miss_infusions_positive.txt')
+       path('*_TICplots.pdf')
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/AverageTechReplicates.R $init_file \
+                                                                      $params.nr_replicates \
+                                                                      $analysis_id \
+                                                                      $matrix \
+                                                                      $highest_mz_file
+        """
+}
+
+
diff --git a/DIMS/CollectFilled.R b/DIMS/CollectFilled.R
new file mode 100755
index 0000000..a0ad358
--- /dev/null
+++ b/DIMS/CollectFilled.R
@@ -0,0 +1,71 @@
+## adapted from 10-collectSamplesFilled.R
+
+# define parameters
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+scripts_dir <- cmd_args[1]
+ppm <- as.numeric(cmd_args[2])
+z_score <- as.numeric(cmd_args[3])
+
+source(paste0(scripts_dir, "merge_duplicate_rows.R"))
+source(paste0(scripts_dir, "calculate_zscores.R"))
+
+# for each scan mode, collect all filled peak group lists
+scanmodes <- c("positive", "negative")
+for (scanmode in scanmodes) {
+  # get list of files
+  filled_files <- list.files("./", full.names = TRUE, pattern = paste0(scanmode, "_identified_filled"))
+  # load files and combine into one object
+  outlist_total <- NULL
+  for (file_nr in 1:length(filled_files)) {
+    peakgrouplist_filled <- get(load(filled_files[file_nr]))
+    outlist_total <- rbind(outlist_total, peakgrouplist_filled)
+  }
+  # remove duplicates; peak groups with exactly the same m/z
+  outlist_total <- merge_duplicate_rows(outlist_total)
+  # sort on mass
+  outlist_total <- outlist_total[order(outlist_total[, "mzmed.pgrp"]), ]
+  # load replication pattern
+  pattern_file <- paste0(scanmode, "_repl_pattern.RData")
+  repl_pattern <- get(load(pattern_file))
+  # calculate Z-scores
+  if (z_score == 1) {
+    outlist_stats <- calculate_zscores(outlist_total, adducts = FALSE)
+    nr_removed_samples <- length(which(repl_pattern[] == "character(0)"))
+    order_index_int <- order(colnames(outlist_stats)[8:(length(repl_pattern) - nr_removed_samples + 7)])
+    outlist_stats_more <- cbind(
+      outlist_stats[, 1:7],
+      outlist_stats[, (length(repl_pattern) - nr_removed_samples + 8):(length(repl_pattern) - nr_removed_samples + 8 + 6)],
+      outlist_stats[, 8:(length(repl_pattern) - nr_removed_samples + 7)][order_index_int],
+      outlist_stats[, (length(repl_pattern) - nr_removed_samples + 5 + 10):ncol(outlist_stats)]
+    )
+    # sort Z-score columns and append to peak group list
+    tmp_index <- grep("_Zscore", colnames(outlist_stats_more), fixed = TRUE)
+    tmp_index_order <- order(colnames(outlist_stats_more[, tmp_index]))
+    tmp <- outlist_stats_more[, tmp_index[tmp_index_order]]
+    outlist_stats_more <- outlist_stats_more[, -tmp_index]
+    outlist_stats_more <- cbind(outlist_stats_more, tmp)
+    outlist_total <- outlist_stats_more
+  }
+  
+  # make a copy of the outlist
+  outlist_ident <- outlist_total
+  # select identified peak groups if ppm deviation is within limits
+  if (z_score == 1) {
+    outlist_ident$ppmdev <- as.numeric(outlist_ident$ppmdev)
+    outlist_ident <- outlist_ident[which(outlist_ident["ppmdev"] >= -ppm & outlist_ident["ppmdev"] <= ppm), ]
+  }
+  # take care of NAs in theormz_noise
+  outlist_ident$theormz_noise[which(is.na(outlist_ident$theormz_noise))] <- 0
+  outlist_ident$theormz_noise <- as.numeric(outlist_ident$theormz_noise)
+  outlist_ident$theormz_noise[which(is.na(outlist_ident$theormz_noise))] <- 0
+  outlist_ident$theormz_noise <- as.numeric(outlist_ident$theormz_noise)
+
+  # Extra output in Excel-readable format:
+  remove_columns <- c("fq.best", "fq.worst", "mzmin.pgrp", "mzmax.pgrp")
+  remove_colindex <- which(colnames(outlist_ident) %in% remove_columns)
+  outlist_ident <- outlist_ident[, -remove_colindex]
+  write.table(outlist_ident, file = paste0("outlist_identified_", scanmode, ".txt"), sep = "\t", row.names = FALSE)
+  # output in RData format
+  save(outlist_ident, file = paste0("outlist_identified_", scanmode, ".RData"))
+}
diff --git a/DIMS/CollectFilled.nf b/DIMS/CollectFilled.nf
new file mode 100644
index 0000000..53b1967
--- /dev/null
+++ b/DIMS/CollectFilled.nf
@@ -0,0 +1,19 @@
+process CollectFilled {
+    tag "DIMS CollectFilled"
+    label 'CollectFilled'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       path(filled_files)
+       each path(replication_pattern)
+
+    output:
+       path('outlist*.txt')
+       path('outlist*.RData'), emit: filled_pgrlist
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/CollectFilled.R $params.scripts_dir $params.ppm $params.zscore
+        """
+}
diff --git a/DIMS/CollectSumAdducts.R b/DIMS/CollectSumAdducts.R
new file mode 100755
index 0000000..f4f4723
--- /dev/null
+++ b/DIMS/CollectSumAdducts.R
@@ -0,0 +1,21 @@
+## adapted from 12-collectSamplesAdded.R
+
+# define parameters 
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+# collect all AdductSums part files for each scanmode
+scanmodes <- c("positive", "negative")
+
+for (scanmode in scanmodes) {
+  adductsum_part_files <- list.files("./", pattern = scanmode)
+
+  outlist.tot <- NULL
+  for (i in 1:length(adductsum_part_files)) {
+    load(adductsum_part_files[i])
+    outlist.tot <- rbind(outlist.tot, adductsum)
+  }
+
+  # save output file
+  save(outlist.tot, file = paste0("AdductSums_", scanmode, ".RData"))
+}
+
diff --git a/DIMS/CollectSumAdducts.nf b/DIMS/CollectSumAdducts.nf
new file mode 100644
index 0000000..2bd43ff
--- /dev/null
+++ b/DIMS/CollectSumAdducts.nf
@@ -0,0 +1,17 @@
+process CollectSumAdducts {
+    tag "DIMS CollectSumAdducts"
+    label 'CollectSumAdducts'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       path(collect_files)
+
+    output:
+       path('AdductSums_*.RData')
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/CollectSumAdducts.R 
+        """
+}
diff --git a/DIMS/FillMissing.R b/DIMS/FillMissing.R
new file mode 100755
index 0000000..a76c163
--- /dev/null
+++ b/DIMS/FillMissing.R
@@ -0,0 +1,44 @@
+## adapted from 9-runFillMissing.R
+
+# define parameters
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+peakgrouplist_file <- cmd_args[1]
+scripts_dir <- cmd_args[2]
+thresh <- as.numeric(cmd_args[3])
+resol <- as.numeric(cmd_args[4])
+ppm <- as.numeric(cmd_args[5])
+outdir <- "./"
+
+# load in function scripts
+source(paste0(scripts_dir, "replace_zeros.R"))
+source(paste0(scripts_dir, "fit_optim.R"))
+source(paste0(scripts_dir, "get_fwhm.R"))
+source(paste0(scripts_dir, "get_stdev.R"))
+source(paste0(scripts_dir, "estimate_area.R"))
+source(paste0(scripts_dir, "optimize_gaussfit.R"))
+source(paste0(scripts_dir, "identify_noisepeaks.R"))
+source(paste0(scripts_dir, "get_element_info.R"))
+source(paste0(scripts_dir, "atomic_info.R"))
+
+# determine scan mode
+if (grepl("_pos", peakgrouplist_file)) {
+  scanmode <- "positive"
+} else if (grepl("_neg", peakgrouplist_file)) {
+  scanmode <- "negative"
+}
+
+# get replication pattern for sample names
+pattern_file <- paste0(scanmode, "_repl_pattern.RData")
+repl_pattern <- get(load(pattern_file))
+
+# load peak group list and determine output file name
+outpgrlist_identified <- get(load(peakgrouplist_file))
+
+outputfile_name <- gsub(".RData", "_filled.RData", peakgrouplist_file)
+
+# replace missing values (zeros) with random noise
+peakgrouplist_filled <- replace_zeros(outpgrlist_identified, repl_pattern, scanmode, resol, outdir, thresh, ppm)
+
+# save output
+save(peakgrouplist_filled, file = outputfile_name)
diff --git a/DIMS/FillMissing.nf b/DIMS/FillMissing.nf
new file mode 100644
index 0000000..b0d1850
--- /dev/null
+++ b/DIMS/FillMissing.nf
@@ -0,0 +1,18 @@
+process FillMissing {
+    tag "DIMS FillMissing ${peakgrouplist_file}"
+    label 'FillMissing'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       path(peakgrouplist_file)
+       each path(replication_pattern)
+
+    output:
+       path('*_filled.RData')
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/FillMissing.R $peakgrouplist_file $params.scripts_dir $params.thresh $params.resolution $params.ppm
+        """
+}
diff --git a/DIMS/GenerateBreaks.R b/DIMS/GenerateBreaks.R
new file mode 100644
index 0000000..6aacfe8
--- /dev/null
+++ b/DIMS/GenerateBreaks.R
@@ -0,0 +1,51 @@
+## adapted from 1-generateBreaksFwhm.HPC.R ##
+
+# load required package
+suppressPackageStartupMessages(library("xcms"))
+
+# define parameters
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+filepath <- cmd_args[1]
+outdir <- cmd_args[2]
+trim <- as.numeric(cmd_args[3])
+resol <- as.numeric(cmd_args[4])
+
+# initialize
+trim_left <- NULL
+trim_right <- NULL
+breaks_fwhm <- NULL
+breaks_fwhm_avg <- NULL
+bins <- NULL
+
+# read in mzML file
+raw_data <- suppressMessages(xcms::xcmsRaw(filepath))
+
+# trim (remove) scans at the start and end
+trim_left  <- round(raw_data@scantime[length(raw_data@scantime) * trim])
+trim_right <- round(raw_data@scantime[length(raw_data@scantime) * (1 - trim)])
+
+# Mass range m/z
+low_mz  <- raw_data@mzrange[1]
+high_mz <- raw_data@mzrange[2]
+
+# determine number of segments (bins)
+nr_segments <- 2 * (high_mz - low_mz)
+segment <- seq(from = low_mz, to = high_mz, length.out = nr_segments + 1)
+
+# determine start and end of each bin.
+for (i in 1:nr_segments) {
+  start_segment <- segment[i]
+  end_segment <- segment[i+1]
+  resol_mz <- resol * (1 / sqrt(2) ^ (log2(start_segment / 200)))
+  fwhm_segment <- start_segment / resol_mz
+  breaks_fwhm <- c(breaks_fwhm, seq(from = (start_segment + fwhm_segment), to = end_segment, by = 0.2 * fwhm_segment))
+  # average the m/z instead of start value
+  range <- seq(from = (start_segment + fwhm_segment), to = end_segment, by = 0.2 * fwhm_segment)
+  delta_mz <- range[2] - range[1]
+  breaks_fwhm_avg <- c(breaks_fwhm_avg, range + 0.5 * delta_mz)
+}
+
+# generate output file
+save(breaks_fwhm, breaks_fwhm_avg, trim_left, trim_right, file = "breaks.fwhm.RData")
+save(high_mz, file = "highest_mz.RData")
diff --git a/DIMS/GenerateBreaks.nf b/DIMS/GenerateBreaks.nf
new file mode 100644
index 0000000..af617a8
--- /dev/null
+++ b/DIMS/GenerateBreaks.nf
@@ -0,0 +1,19 @@
+process GenerateBreaks {
+    tag "DIMS GenerateBreaks"
+    label 'GenerateBreaks'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       tuple(val(file_id), path(mzML_file))
+
+
+    output:
+       path('breaks.fwhm.RData'), emit: breaks
+       path('highest_mz.RData'), emit: highest_mz
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/GenerateBreaks.R $mzML_file ./ $params.trim $params.resolution 
+        """
+}
diff --git a/DIMS/GenerateExcel.R b/DIMS/GenerateExcel.R
new file mode 100644
index 0000000..d36ac29
--- /dev/null
+++ b/DIMS/GenerateExcel.R
@@ -0,0 +1,643 @@
+## adapted from 13-excelExport.R
+
+# load required packages
+library("ggplot2")
+library("reshape2")
+library("openxlsx")
+library("loder")
+suppressMessages(library("dplyr"))
+suppressMessages(library("stringr"))
+
+# define parameters
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+init_file <- cmd_args[1]
+project <- cmd_args[2]
+dims_matrix <- cmd_args[3]
+hmdb_file <- cmd_args[4]
+z_score <- as.numeric(cmd_args[5])
+
+round_df <- function(df, digits) {
+  #' Round numbers to a set number of digits for numeric values
+  #'
+  #' @param df: Dataframe containing numeric values
+  #' @param digits: Number of digits to round off to (integer)
+  #'
+  #' @return df: Dataframe with rounded numbers
+  numeric_columns <- sapply(df, mode) == "numeric"
+  df[numeric_columns] <- round(df[numeric_columns], digits)
+  return(df)
+}
+
+robust_scaler <- function(control_intensities, control_col_ids, perc = 5) {
+  #' Calculate robust scaler: Z-score based on controls without outliers
+  #'
+  #' @param control_intensities: Matrix with intensities for control samples
+  #' @param control_col_ids: Vector with column names for control samples
+  #' @param perc: Percentage of outliers which will be removed from controls (float)
+  #'
+  #' @return trimmed_control_intensities: Intensities trimmed for outliers
+  nr_to_remove <- ceiling(length(control_col_ids) * perc / 100)
+  sorted_control_intensities <- sort(as.numeric(control_intensities))
+  trimmed_control_intensities <- sorted_control_intensities[(nr_to_remove + 1) : 
+							    (length(sorted_control_intensities) - nr_to_remove)]
+  return(trimmed_control_intensities)
+}
+
+# Initialise
+plot <- TRUE
+export <- TRUE
+control_label <- "C"
+case_label <- "P"
+imagesize_multiplier <- 2
+# setting outdir to export files to the working directory
+outdir <- "./"
+# percentage of outliers to remove from calculation of robust scaler
+perc <- 5
+
+# load information on samples
+load(init_file)
+# load the HMDB file with info on biological relevance of metabolites
+load(hmdb_file)
+
+# get current date
+rundate <- Sys.Date()
+
+# create a directory for plots in project directory
+dir.create(paste0(outdir, "/plots"), showWarnings = FALSE)
+plot_dir <- paste0(outdir, "/plots/adducts")
+dir.create(plot_dir, showWarnings = FALSE)
+
+# set the number of digits for floats
+options(digits = 16)
+
+# load positive and negative adduct sums
+load("AdductSums_negative.RData")
+outlist_neg_adducts_hmdb <- outlist.tot
+load("AdductSums_positive.RData")
+outlist_pos_adducts_hmdb <- outlist.tot
+rm(outlist.tot)
+
+# Only continue with patients (columns) that are in both pos and neg, so patients that are in both
+tmp <- intersect(colnames(outlist_neg_adducts_hmdb), colnames(outlist_pos_adducts_hmdb))
+outlist_neg_adducts_hmdb <- outlist_neg_adducts_hmdb[, tmp]
+outlist_pos_adducts_hmdb <- outlist_pos_adducts_hmdb[, tmp]
+
+# Find indexes of neg hmdb code that are also found in pos and vice versa
+index_neg <- which(rownames(outlist_neg_adducts_hmdb) %in% rownames(outlist_pos_adducts_hmdb))
+index_pos <- which(rownames(outlist_pos_adducts_hmdb) %in% rownames(outlist_neg_adducts_hmdb))
+
+# Only continue with HMDB codes (rows) that were found in both positive mode and remove last column (hmdb_name)
+tmp_pos <- outlist_pos_adducts_hmdb[rownames(outlist_pos_adducts_hmdb)[index_pos], 1:(dim(outlist_pos_adducts_hmdb)[2] - 1)]
+tmp_hmdb_name_pos <- outlist_pos_adducts_hmdb[rownames(outlist_pos_adducts_hmdb)[index_pos], dim(outlist_pos_adducts_hmdb)[2]]
+tmp_pos_left <- outlist_pos_adducts_hmdb[-index_pos, ]
+# same for negative mode
+tmp_neg <- outlist_neg_adducts_hmdb[rownames(outlist_pos_adducts_hmdb)[index_pos], 1:(dim(outlist_neg_adducts_hmdb)[2] - 1)]
+tmp_neg_left <- outlist_neg_adducts_hmdb[-index_neg, ]
+
+# Combine positive and negative numbers and paste back HMDB column
+tmp <- apply(tmp_pos, 2, as.numeric) + apply(tmp_neg, 2, as.numeric)
+rownames(tmp) <- rownames(tmp_pos)
+tmp <- cbind(tmp, "HMDB_name" = tmp_hmdb_name_pos)
+outlist <- rbind(tmp, tmp_pos_left, tmp_neg_left)
+
+# Filter for biological relevance
+peaks_in_list <- which(rownames(outlist) %in% rownames(rlvnc))
+outlist <- cbind(outlist[peaks_in_list, ], as.data.frame(rlvnc[rownames(outlist)[peaks_in_list], ]))
+# filter out all irrelevant HMDBs
+outlist <- outlist %>%
+  tibble::rownames_to_column("rowname") %>%
+  filter(!grepl("Exogenous|Drug|exogenous", relevance)) %>%
+  tibble::column_to_rownames("rowname")
+
+# Add HMDB_code column with all the HMDB ID and sort on it
+outlist <- cbind(outlist, "HMDB_code" = rownames(outlist))
+outlist <- outlist[order(outlist[, "HMDB_code"]), ]
+
+# Create excel
+filelist <- "AllPeakGroups"
+wb <- createWorkbook("SinglePatient")
+addWorksheet(wb, filelist)
+
+# Add Z-scores and create plots
+if (z_score == 1) {
+  # add a column for plots
+  outlist <- cbind(plots = NA, outlist)
+  # two columns will be added for mean and stdev of controls; Z-scores start at ncol + 3
+  startcol <- ncol(outlist) + 3
+
+  # Get columns with control intensities
+  control_col_ids <- grep(control_label, colnames(outlist), fixed = TRUE)
+  control_columns <- as.data.frame(outlist[, control_col_ids])
+  colnames(control_columns) <- colnames(outlist)[control_col_ids]
+
+  # Get columns with patient intensities
+  patient_col_ids <- grep(case_label, colnames(outlist), fixed = TRUE)
+  patient_columns <- as.data.frame(outlist[, patient_col_ids])
+  colnames(patient_columns) <- colnames(outlist)[patient_col_ids]
+
+  intensity_col_ids <- c(control_col_ids, patient_col_ids)
+
+  # if there are any intensities of 0 left, set them to NA for stats
+  outlist[, intensity_col_ids][outlist[, intensity_col_ids] == 0] <- NA
+
+  # save outlist as it is and use it to calculate robust scaler
+  outlist_noZ <- outlist
+
+  # calculate mean and sd for Control group
+  outlist$avg.ctrls <- apply(control_columns, 1, function(x) mean(as.numeric(x), na.rm = TRUE))
+  outlist$sd.ctrls  <- apply(control_columns, 1, function(x) sd(as.numeric(x), na.rm = TRUE))
+
+  # Make and add columns with zscores
+  colnames_z <- NULL
+  for (i in intensity_col_ids) {
+    cname <- colnames(outlist)[i]
+    colnames_z <- c(colnames_z, paste(cname, "Zscore", sep = "_"))
+    zscores_1col <- (as.numeric(as.vector(unlist(outlist[, i]))) - outlist$avg.ctrls) / outlist$sd.ctrls
+    outlist <- cbind(outlist, zscores_1col)
+  }
+  colnames(outlist)[startcol:ncol(outlist)] <- colnames_z
+
+  # calculate robust scaler (Zscores minus outliers in Controls)
+  outlist_noZ$avg.ctrls <- 0
+  outlist_noZ$sd.ctrls  <- 0
+
+  # only calculate robust Z-scores if there are enough Controls
+  if (length(control_col_ids) > 10) {
+    for (metabolite_index in 1:nrow(outlist)) {
+      outlist_noZ$avg.ctrls[metabolite_index] <- mean(robust_scaler(outlist_noZ[metabolite_index, control_col_ids],
+                                                                    control_col_ids, perc))
+      outlist_noZ$sd.ctrls[metabolite_index]  <-   sd(robust_scaler(outlist_noZ[metabolite_index, control_col_ids],
+                                                                    control_col_ids, perc))
+    }
+  }
+
+  # Make and add columns with robust zscores
+  cnames_robust <- gsub("_Zscore", "_RobustZscore", colnames_z)
+  for (i in intensity_col_ids) {
+    zscores_1col <- (as.numeric(as.vector(unlist(outlist_noZ[, i]))) - outlist_noZ$avg.ctrls) / outlist_noZ$sd.ctrls
+    outlist_noZ <- cbind(outlist_noZ, zscores_1col)
+  }
+  colnames(outlist_noZ)[startcol:ncol(outlist_noZ)] <- cnames_robust
+
+  # output metabolites filtered on relevance
+  save(outlist, file = paste0("AdductSums_filtered_Zscores.RData"))
+  write.table(outlist, file = paste0("AdductSums_filtered_Zscores.txt"), sep = "\t", row.names = FALSE)
+  # output filtered metabolites with robust scaled Zscores
+  save(outlist_noZ, file = paste0("AdductSums_filtered_robustZ.RData"))
+  write.table(outlist_noZ, file = paste0("AdductSums_filtered_robustZ.txt"), sep = "\t", row.names = FALSE)
+
+  # get the IDs of the patients and sort
+  patient_ids <- unique(as.vector(unlist(lapply(strsplit(colnames(patient_columns), ".", fixed = TRUE), function(x) x[1]))))
+  patient_ids <- patient_ids[order(nchar(patient_ids), patient_ids)]
+
+  # for every row, make boxplot, insert into excel, and calculate Zscore for every patient
+  temp_png <- NULL
+  for (p in 1:nrow(outlist)) {
+    # get HMDB ID
+    hmdb_name <- rownames(outlist[p, ])
+    # get intensities per metabolite for box plot for control samples
+    intensities <- list(as.numeric(as.vector(unlist(control_columns[p, ]))))
+    labels <- c("C", patient_ids)
+    # get intensities per metabolite for box plot for patient samples
+    for (i in 1:length(patient_ids)) {
+      id <- patient_ids[i]
+      # combine all intensities that start with the same string for patients
+      # exception: if there is only one patient id, skip this step; nothing to combine
+      if (ncol(patient_columns) > 1) {
+        patient_int <- as.numeric(as.vector(unlist(outlist[p, names(patient_columns[1, ])
+  						 [startsWith(names(patient_columns[1, ]), paste0(id, "."))]])))
+      } else {
+	patient_int <- as.numeric(unlist(as.vector(patient_columns)))
+      }
+      intensities[[i + 1]] <- patient_int
+    }
+    intensities <- setNames(intensities, labels)
+
+    plot_width <- length(labels) * 16 + 90
+
+    plot.new()
+    if (export) {
+      png(filename = paste0(plot_dir, "/", hmdb_name, "_box.png"),
+          width = plot_width,
+          height = 280)
+    }
+    # set margins
+    par(oma = c(2, 0, 0, 0))
+    boxplot(intensities,
+            col = c("green", rep("red", length(intensities) - 1)),
+            names.arg = labels,
+            las = 2,
+            main = hmdb_name)
+    dev.off()
+
+    file_png <- paste0(plot_dir, "/", hmdb_name, "_box.png")
+    if (is.null(temp_png)) {
+      temp_png <- loder::readPng(file_png)
+      img_dim <- dim(temp_png)[c(1, 2)]
+      cell_dim <- img_dim * imagesize_multiplier
+      setColWidths(wb, filelist, cols = 1, widths = cell_dim[2] / 20)
+    }
+
+    openxlsx::insertImage(wb,
+                          filelist,
+                          file_png,
+                          startRow = p + 1,
+                          startCol = 1,
+                          height = cell_dim[1],
+                          width = cell_dim[2],
+                          units = "px")
+
+    if (p %% 100 == 0) {
+      cat("at row: ", p, "\n")
+    }
+  }
+
+  openxlsx::setRowHeights(wb, filelist, rows = c(1:nrow(outlist) + 1), heights = cell_dim[1] / 4)
+  openxlsx::setColWidths(wb, filelist, cols = c(2:ncol(outlist)), widths = 20)
+} else {
+  openxlsx::setRowHeights(wb, filelist, rows = c(1:nrow(outlist)), heights = 18)
+  openxlsx::setColWidths(wb, filelist, cols = c(1:ncol(outlist)), widths = 20)
+}
+
+# write Excel file
+openxlsx::writeData(wb, sheet = 1, outlist, startCol = 1)
+xlsx_name <- paste0(outdir, "/", project, ".xlsx")
+openxlsx::saveWorkbook(wb, xlsx_name, overwrite = TRUE)
+rm(wb)
+
+#### INTERNAL STANDARDS ####
+is_list <- outlist[grep("Internal standard", outlist[, "relevance"], fixed = TRUE), ]
+is_codes <- rownames(is_list)
+
+# if all data from one samplename (for example P195.1) is filtered out in 3-averageTechReplicates 
+# because of too little data (threshold parameter)i, the init.RData (repl_pattern) will contain more sample_names 
+# than the peak data (IS), so this data needs to be removed first, before the retrieval of the summed adducts. 
+# Write sample_names to a log file, to let user know that this sample_name contained no data.
+sample_names_nodata <- setdiff(names(repl_pattern), names(is_list))
+if (!is.null(sample_names_nodata)) {
+  write.table(sample_names_nodata, file = paste(outdir, "sample_names_nodata.txt", sep = "/"), 
+	      row.names = FALSE, col.names = FALSE, quote = FALSE)
+  cat(sample_names_nodata, "\n")
+  for (sample_name in sample_names_nodata) {
+    repl_pattern[[sample_name]] <- NULL
+  }
+}
+
+# Retrieve IS summed adducts
+is_summed <- is_list[c(names(repl_pattern), "HMDB_code")]
+is_summed$HMDB.name <- is_list$name
+is_summed <- reshape2::melt(is_summed, id.vars = c("HMDB_code", "HMDB.name"))
+colnames(is_summed) <- c("HMDB.code", "HMDB.name", "Sample", "Intensity")
+is_summed$Matrix <- dims_matrix
+is_summed$Rundate <- rundate
+is_summed$Project <- project
+is_summed$Intensity <- as.numeric(as.character(is_summed$Intensity))
+
+# Retrieve IS positive mode
+is_pos <- as.data.frame(subset(outlist_pos_adducts_hmdb, rownames(outlist_pos_adducts_hmdb) %in% is_codes))
+is_pos$HMDB_name <- is_list[match(row.names(is_pos), is_list$HMDB_code, nomatch = NA), "name"]
+is_pos$HMDB.code <- row.names(is_pos)
+is_pos <- reshape2::melt(is_pos, id.vars = c("HMDB.code", "HMDB_name"))
+colnames(is_pos) <- c("HMDB.code", "HMDB.name", "Sample", "Intensity")
+is_pos$Matrix <- dims_matrix
+is_pos$Rundate <- rundate
+is_pos$Project <- project
+is_pos$Intensity <- as.numeric(as.character(is_pos$Intensity))
+
+# Retrieve IS negative mode
+is_neg <- as.data.frame(subset(outlist_neg_adducts_hmdb, rownames(outlist_neg_adducts_hmdb) %in% is_codes))
+is_neg$HMDB_name <- is_list[match(row.names(is_neg), is_list$HMDB_code, nomatch = NA), "name"]
+is_neg$HMDB.code <- row.names(is_neg)
+is_neg <- reshape2::melt(is_neg, id.vars = c("HMDB.code", "HMDB_name"))
+colnames(is_neg) <- c("HMDB.code", "HMDB.name", "Sample", "Intensity")
+is_neg$Matrix <- dims_matrix
+is_neg$Rundate <- rundate
+is_neg$Project <- project
+is_neg$Intensity <- as.numeric(as.character(is_neg$Intensity))
+
+# Save results
+save(is_pos, is_neg, is_summed, file = paste0(outdir, "/", project, "_IS_results.RData"))
+
+# number of samples, for plotting length and width
+sample_count <- length(repl_pattern)
+
+# change the order of the x-axis summed plots to a natural sorted one
+sample_naturalorder <- unique(as.character(is_summed$Sample))
+sample_naturalorder <- stringr::str_sort(sample_naturalorder, numeric = TRUE)
+is_summed$Sample_level <- factor(is_summed$Sample, levels = c(sample_naturalorder))
+is_pos$Sample_level <- factor(is_pos$Sample, levels = c(sample_naturalorder))
+is_neg$Sample_level <- factor(is_neg$Sample, levels = c(sample_naturalorder))
+
+## bar plots with all IS
+# theme for all IS bar plots
+theme_is_bar <- function(my_plot) {
+  my_plot +
+    ggplot2::scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
+    ggplot2::theme(legend.position = "none", 
+	           axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 6), 
+               	   axis.text.y = element_text(size = 6)
+    )
+}
+
+# ggplot functions
+is_neg_bar_plot <- ggplot(is_neg, aes(Sample_level, Intensity)) +
+  ggtitle("Interne Standaard (Neg)") +
+  geom_bar(aes(fill = HMDB.name), stat = "identity") +
+  labs(x = "", y = "Intensity") +
+  facet_wrap(~HMDB.name, scales = "free_y")
+
+is_pos_bar_plot <- ggplot(is_pos, aes(Sample_level, Intensity)) +
+  ggtitle("Interne Standaard (Pos)") +
+  geom_bar(aes(fill = HMDB.name), stat = "identity") +
+  labs(x = "", y = "Intensity") +
+  facet_wrap(~HMDB.name, scales = "free_y")
+
+is_sum_bar_plot <- ggplot(is_summed, aes(Sample_level, Intensity)) +
+  ggtitle("Interne Standaard (Summed)") +
+  geom_bar(aes(fill = HMDB.name), stat = "identity") +
+  labs(x = "", y = "Intensity") +
+  facet_wrap(~HMDB.name, scales = "free_y")
+
+# add theme to ggplot functions
+is_neg_bar_plot <- theme_is_bar(is_neg_bar_plot)
+is_pos_bar_plot <- theme_is_bar(is_pos_bar_plot)
+is_sum_bar_plot <- theme_is_bar(is_sum_bar_plot)
+
+# save plots to disk
+plot_width <- 9 + 0.35 * sample_count
+ggsave(paste0(outdir, "/plots/IS_bar_all_neg.png"), 
+       plot = is_neg_bar_plot, height = plot_width / 2.5, width = plot_width, units = "in")
+ggsave(paste0(outdir, "/plots/IS_bar_all_pos.png"), 
+       plot = is_pos_bar_plot, height = plot_width / 2.5, width = plot_width, units = "in")
+ggsave(paste0(outdir, "/plots/IS_bar_all_sum.png"), 
+       plot = is_sum_bar_plot, height = plot_width / 2.5, width = plot_width, units = "in")
+
+## Line plots with all IS
+# function for ggplot theme
+# add smaller legend in the "all IS line plots", otherwise out-of-range when more than 13 IS lines
+theme_is_line <- function(my_plot) {
+  my_plot +
+    guides(shape = guide_legend(override.aes = list(size = 0.5)), 
+	   color = guide_legend(override.aes = list(size = 0.5))
+    ) +
+    theme(legend.title = element_text(size = 8), 
+	  legend.text = element_text(size = 6), 
+	  legend.key.size = unit(0.7, "line"), 
+	  axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 8)
+    )
+}
+
+# ggplot functions
+is_neg_line_plot <- ggplot(is_neg, aes(Sample_level, Intensity)) +
+  ggtitle("Interne Standaard (Neg)") +
+  geom_point(aes(col = HMDB.name)) +
+  geom_line(aes(col = HMDB.name, group = HMDB.name)) +
+  labs(x = "", y = "Intensity")
+
+is_pos_line_plot <- ggplot(is_pos, aes(Sample_level, Intensity)) +
+  ggtitle("Interne Standaard (Pos)") +
+  geom_point(aes(col = HMDB.name)) +
+  geom_line(aes(col = HMDB.name, group = HMDB.name)) +
+  labs(x = "", y = "Intensity")
+
+is_sum_line_plot <- ggplot(is_summed, aes(Sample_level, Intensity)) +
+  ggtitle("Interne Standaard (Sum)") +
+  geom_point(aes(col = HMDB.name)) +
+  geom_line(aes(col = HMDB.name, group = HMDB.name)) +
+  labs(x = "", y = "Intensity")
+
+# add theme to ggplot functions
+is_sum_line_plot <- theme_is_line(is_sum_line_plot)
+is_neg_line_plot <- theme_is_line(is_neg_line_plot)
+is_pos_line_plot <- theme_is_line(is_pos_line_plot)
+
+# save plots to disk
+plot_width <- 8 + 0.2 * sample_count
+ggsave(paste0(outdir, "/plots/IS_line_all_neg.png"), 
+       plot = is_neg_line_plot, height = plot_width / 2.5, width = plot_width, units = "in")
+ggsave(paste0(outdir, "/plots/IS_line_all_pos.png"), 
+       plot = is_pos_line_plot, height = plot_width / 2.5, width = plot_width, units = "in")
+ggsave(paste0(outdir, "/plots/IS_line_all_sum.png"), 
+       plot = is_sum_line_plot, height = plot_width / 2.5, width = plot_width, units = "in")
+
+## bar plots with a selection of IS
+is_neg_selection <- c("2H2-Ornithine (IS)", "2H3-Glutamate (IS)", "2H2-Citrulline (IS)", "2H4_13C5-Arginine (IS)", 
+		      "13C6-Tyrosine (IS)")
+is_pos_selection <- c("2H4-Alanine (IS)", "13C6-Phenylalanine (IS)", "2H4_13C5-Arginine (IS)", "2H3-Propionylcarnitine (IS)", 
+		      "2H9-Isovalerylcarnitine (IS)")
+is_sum_selection <- c("2H8-Valine (IS)", "2H3-Leucine (IS)", "2H3-Glutamate (IS)", "2H4_13C5-Arginine (IS)", 
+		      "13C6-Tyrosine (IS)")
+
+# add minimal intensity lines based on matrix (DBS or Plasma) and machine mode (neg, pos, sum)
+if (dims_matrix == "DBS") {
+  hline_data_neg <-
+    data.frame(
+      z = c(15000, 200000, 130000, 18000, 50000),
+      HMDB.name = is_neg_selection
+    )
+  hline_data_pos <-
+    data.frame(
+      z = c(150000, 3300000, 1750000, 150000, 270000),
+      HMDB.name = is_pos_selection
+    )
+  hline_data_sum <-
+    data.frame(
+      z = c(1300000, 2500000, 500000, 1800000, 1400000),
+      HMDB.name = is_sum_selection
+    )
+} else if (dims_matrix == "Plasma") {
+  hline_data_neg <-
+    data.frame(
+      z = c(6500, 100000, 75000, 7500, 25000),
+      HMDB.name = is_neg_selection
+    )
+  hline_data_pos <-
+    data.frame(
+      z = c(85000, 1000000, 425000, 70000, 180000),
+      HMDB.name = is_pos_selection
+    )
+  hline_data_sum <-
+    data.frame(
+      z = c(700000, 1250000, 150000, 425000, 300000),
+      HMDB.name = is_sum_selection
+    )
+}
+
+# ggplot functions
+is_neg_selection_barplot <- ggplot(subset(is_neg, HMDB.name %in% is_neg_selection), aes(Sample_level, Intensity)) +
+  ggtitle("Interne Standaard (Neg)") +
+  geom_bar(aes(fill = HMDB.name), stat = "identity") +
+  labs(x = "", y = "Intensity") +
+  facet_wrap(~HMDB.name, scales = "free", ncol = 2) +
+  if (exists("hline_data_neg")) {
+    geom_hline(aes(yintercept = z), subset(hline_data_neg, HMDB.name %in% is_neg$HMDB.name))
+  }
+
+is_pos_selection_barplot <- ggplot(subset(is_pos, HMDB.name %in% is_pos_selection), aes(Sample_level, Intensity)) +
+  ggtitle("Interne Standaard (Pos)") +
+  geom_bar(aes(fill = HMDB.name), stat = "identity") +
+  labs(x = "", y = "Intensity") +
+  facet_wrap(~HMDB.name, scales = "free", ncol = 2) +
+  if (exists("hline_data_pos")) {
+    geom_hline(aes(yintercept = z), subset(hline_data_pos, HMDB.name %in% is_pos$HMDB.name))
+  }
+
+is_sum_selection_barplot <- ggplot(subset(is_summed, HMDB.name %in% is_sum_selection), aes(Sample_level, Intensity)) +
+  ggtitle("Interne Standaard (Sum)") +
+  geom_bar(aes(fill = HMDB.name), stat = "identity") +
+  labs(x = "", y = "Intensity") +
+  facet_wrap(~HMDB.name, scales = "free", ncol = 2) +
+  if (exists("hline_data_sum")) {
+    geom_hline(aes(yintercept = z), subset(hline_data_sum, HMDB.name %in% is_summed$HMDB.name))
+  }
+
+# add theme to ggplot functions
+is_neg_selection_barplot <- theme_is_bar(is_neg_selection_barplot)
+is_pos_selection_barplot <- theme_is_bar(is_pos_selection_barplot)
+is_sum_selection_barplot <- theme_is_bar(is_sum_selection_barplot)
+
+# save plots to disk
+plot_width <- 9 + 0.35 * sample_count
+ggsave(paste0(outdir, "/plots/IS_bar_select_neg.png"), 
+       plot = is_neg_selection_barplot, height = plot_width / 2.0, width = plot_width, units = "in")
+ggsave(paste0(outdir, "/plots/IS_bar_select_pos.png"), 
+       plot = is_pos_selection_barplot, height = plot_width / 2.0, width = plot_width, units = "in")
+ggsave(paste0(outdir, "/plots/IS_bar_select_sum.png"), 
+       plot = is_sum_selection_barplot, height = plot_width / 2.0, width = plot_width, units = "in")
+
+## line plots with a selection of IS
+# ggplot functions
+is_neg_selection_lineplot <- ggplot(subset(is_neg, HMDB.name %in% is_neg_selection), aes(Sample_level, Intensity)) +
+  ggtitle("Interne Standaard (Neg)") +
+  geom_point(aes(col = HMDB.name)) +
+  geom_line(aes(col = HMDB.name, group = HMDB.name)) +
+  labs(x = "", y = "Intensity")
+
+is_pos_selection_lineplot <- ggplot(subset(is_pos, HMDB.name %in% is_pos_selection), aes(Sample_level, Intensity)) +
+  ggtitle("Interne Standaard (Pos)") +
+  geom_point(aes(col = HMDB.name)) +
+  geom_line(aes(col = HMDB.name, group = HMDB.name)) +
+  labs(x = "", y = "Intensity")
+
+is_sum_selection_lineplot <- ggplot(subset(is_summed, HMDB.name %in% is_sum_selection), aes(Sample_level, Intensity)) +
+  ggtitle("Interne Standaard (Sum)") +
+  geom_point(aes(col = HMDB.name)) +
+  geom_line(aes(col = HMDB.name, group = HMDB.name)) +
+  labs(x = "", y = "Intensity")
+
+# add theme to ggplot functions
+is_neg_selection_lineplot <- theme_is_line(is_neg_selection_lineplot)
+is_pos_selection_lineplot <- theme_is_line(is_pos_selection_lineplot)
+is_sum_selection_lineplot <- theme_is_line(is_sum_selection_lineplot)
+
+# save plots to disk
+plot_width <- 8 + 0.2 * sample_count
+ggsave(paste0(outdir, "/plots/IS_line_select_neg.png"), 
+       plot = is_neg_selection_lineplot, height = plot_width / 2.5, width = plot_width, units = "in")
+ggsave(paste0(outdir, "/plots/IS_line_select_pos.png"), 
+       plot = is_pos_selection_lineplot, height = plot_width / 2.5, width = plot_width, units = "in")
+ggsave(paste0(outdir, "/plots/IS_line_select_sum.png"), 
+       plot = is_sum_selection_lineplot, height = plot_width / 2.5, width = plot_width, units = "in")
+
+
+### POSITIVE CONTROLS CHECK
+# these positive controls need to be in the samplesheet, in order to make the positive_control.RData file
+# Positive control samples all have the format P1002.x, P1003.x and P1005.x (where x is a number)
+
+column_list <- colnames(outlist)
+patterns <- c("^(P1002\\.)[[:digit:]]+_", "^(P1003\\.)[[:digit:]]+_", "^(P1005\\.)[[:digit:]]+_")
+positive_controls_index <- grepl(pattern = paste(patterns, collapse = "|"), column_list)
+positive_control_list <- column_list[positive_controls_index]
+
+if (z_score == 1) {
+  # find if one or more positive control samples are missing
+  pos_contr_warning <- c()
+  if (any(grep("^(P1002\\.)[[:digit:]]+_", positive_control_list)) &&
+    any(grep("^(P1003\\.)[[:digit:]]+_", positive_control_list)) &&
+    any(grep("^(P1005\\.)[[:digit:]]+_", positive_control_list))) {
+    cat("All three positive controls are present")
+  } else {
+    pos_contr_warning <- paste0(c("positive controls list is not complete. Only ", 
+				  positive_control_list, " is/are present"), collapse = " ")
+  }
+  # you need all positive control samples, thus starting the script only if all are available
+  if (length(pos_contr_warning) == 0) {
+    # make positive control excel with specific HMDB_codes in combination with specific control samples
+    pa_sample_name <- positive_control_list[grepl("P1002", positive_control_list)]
+    pku_sample_name <- positive_control_list[grepl("P1003", positive_control_list)]
+    lpi_sample_name <- positive_control_list[grepl("P1005", positive_control_list)]
+
+    pa_codes <- c("HMDB00824", "HMDB00783", "HMDB00123")
+    pku_codes <- c("HMDB00159")
+    lpi_codes <- c("HMDB00904", "HMDB00641", "HMDB00182")
+
+    pa_data <- outlist[pa_codes, c("HMDB_code", "name", pa_sample_name)]
+    pa_data <- reshape2::melt(pa_data, id.vars = c("HMDB_code", "name"))
+    colnames(pa_data) <- c("HMDB.code", "HMDB.name", "Sample", "Zscore")
+
+    pku_data <- outlist[pku_codes, c("HMDB_code", "name", pku_sample_name)]
+    pku_data <- reshape2::melt(pku_data, id.vars = c("HMDB_code", "name"))
+    colnames(pku_data) <- c("HMDB.code", "HMDB.name", "Sample", "Zscore")
+
+    lpi_data <- outlist[lpi_codes, c("HMDB_code", "name", lpi_sample_name)]
+    lpi_data <- reshape2::melt(lpi_data, id.vars = c("HMDB_code", "name"))
+    colnames(lpi_data) <- c("HMDB.code", "HMDB.name", "Sample", "Zscore")
+
+    positive_control <- rbind(pa_data, pku_data, lpi_data)
+    positive_control$Zscore <- as.numeric(positive_control$Zscore)
+    # extra information added to excel for future reference. made in beginning of this script
+    positive_control$Matrix <- dims_matrix
+    positive_control$Rundate <- rundate
+    positive_control$Project <- project
+
+    # Save results
+    save(positive_control, file = paste0(outdir, "/", project, "_positive_control.RData"))
+    # round the Z-scores to 2 digits
+    positive_control$Zscore <- round_df(positive_control$Zscore, 2)
+    write.xlsx(positive_control, file = paste0(outdir, "/", project, "_positive_control.xlsx"), 
+	       sheetName = "Sheet1", col.names = TRUE, row.names = TRUE, append = FALSE)
+  } else {
+    write.table(pos_contr_warning, file = paste(outdir, "positive_controls_warning.txt", sep = "/"), 
+		row.names = FALSE, col.names = FALSE, quote = FALSE)
+  }
+}
+
+### MISSING M/Z CHECK
+# check the outlist_identified_(negative/positive).RData files for missing m/z values and mention in the results mail
+# Load the outlist_identified files + remove the loaded files
+load(paste0(outdir, "/outlist_identified_negative.RData"))
+outlist_ident_neg <- outlist_ident
+load(paste0(outdir, "/outlist_identified_positive.RData"))
+outlist_ident_pos <- outlist_ident
+rm(outlist_ident)
+# check for missing m/z in negative and positive mode
+scanmode <- c("Negative", "Positive")
+index <- 1
+results_ident <- c() 
+outlist_ident_list <- list(outlist_ident_neg, outlist_ident_pos)
+for (outlist_ident in outlist_ident_list) {
+  current_mode <- scanmode[index]
+  # retrieve all unique m/z values in whole numbers and check if all are available
+  mz_values <- as.numeric(unique(format(outlist_ident$mzmed.pgrp, digits = 0)))
+  # m/z range for a standard run = 70-600
+  mz_range <- seq(70, 599, by = 1)
+  mz_missing <- c()
+  for (mz in mz_range) {
+    if (!mz %in% mz_values) {
+      mz_missing <- c(mz_missing, mz)
+    }
+  }
+  y <- mz_missing
+  # check if m/z are missing and make an .txt file with information
+  group_ident <- cumsum(c(1, abs(y[-length(y)] - y[-1]) > 1))
+  if (length(group_ident) > 1) {
+    results_ident <- c(results_ident, paste0("Missing m/z values ", current_mode, " mode"))
+    results_ident <- c(results_ident, by(y, group_ident, identity))
+  } else {
+    results_ident <- c(results_ident, paste0(current_mode, " mode did not have missing mz values"))
+  }
+  # change to other scanmode
+  index <- index + 1
+}
+lapply(results_ident, write, file = paste0(outdir, "/missing_mz_warning.txt"), append = TRUE, ncolumns = 1000)
diff --git a/DIMS/GenerateExcel.nf b/DIMS/GenerateExcel.nf
new file mode 100644
index 0000000..6134766
--- /dev/null
+++ b/DIMS/GenerateExcel.nf
@@ -0,0 +1,24 @@
+process GenerateExcel {
+    tag "DIMS GenerateExcel"
+    label 'GenerateExcel'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       path(collect_files)
+       path(identified_files)
+       path(init_file) 
+       val(analysis_id) 
+       path(relevance_file)
+
+    output:
+       path('*.RData')
+       path('*.txt')
+       path('*.xlsx'), emit: excel_files 
+       path('plots/*.png'), emit: plot_files 
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/GenerateExcel.R $init_file $analysis_id $params.matrix $relevance_file $params.zscore 
+        """
+}
diff --git a/DIMS/GenerateViolinPlots.R b/DIMS/GenerateViolinPlots.R
new file mode 100644
index 0000000..29f41cf
--- /dev/null
+++ b/DIMS/GenerateViolinPlots.R
@@ -0,0 +1,487 @@
+# For untargeted metabolomics, this tool calculates probability scores for
+# metabolic disorders. In addition, it provides visual support with violin plots
+# of the DIMS measurements for the lab specialists.
+# Input needed:
+# 1. Excel file in which metabolites are listed with their intensities for
+#    controls (with C in samplename) and patients (with P in samplename) and their
+#    corresponding Z-scores.
+# 2. All files from github: https://github.com/UMCUGenetics/DIMS
+
+## adapted from 15-dIEM_violin.R
+
+# load packages
+suppressPackageStartupMessages(library("dplyr"))
+library(reshape2)
+library(openxlsx)
+library(ggplot2)
+suppressPackageStartupMessages(library("gridExtra"))
+library(stringr)
+
+# define parameters
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+run_name <- cmd_args[1]
+scripts_dir <- cmd_args[2]
+z_score <- as.numeric(cmd_args[3])
+path_metabolite_groups <- cmd_args[4]
+file_ratios_metabolites <- cmd_args[5]
+file_expected_biomarkers_iem <- cmd_args[6]
+file_explanation <- cmd_args[7]
+file_isomers <- cmd_args[8]
+
+if (z_score == 1){
+  # path: output folder for dIEM and violin plots
+  output_dir <- "./"
+
+  file.copy(file_isomers, output_dir)
+
+  # load functions
+  source(paste0(scripts_dir, "check_same_samplename.R"))
+  source(paste0(scripts_dir, "prepare_data.R"))
+  source(paste0(scripts_dir, "prepare_data_perpage.R"))
+  source(paste0(scripts_dir, "prepare_toplist.R"))
+  source(paste0(scripts_dir, "create_violin_plots.R"))
+  source(paste0(scripts_dir, "prepare_alarmvalues.R"))
+  source(paste0(scripts_dir, "output_helix.R"))
+  source(paste0(scripts_dir, "get_patient_data_to_helix.R"))
+  source(paste0(scripts_dir, "add_lab_id_and_onderzoeksnummer.R"))
+  source(paste0(scripts_dir, "is_diagnostic_patient.R"))
+
+  # number of diseases that score highest in algorithm to plot
+  top_nr_iem <- 5
+  # probability score cut-off for plotting the top diseases
+  threshold_iem <- 5
+  # z-score cutoff of axis on the left for top diseases
+  ratios_cutoff <- -5
+  # number of violin plots per page in PDF
+  nr_plots_perpage <- 20
+
+  # binary variable: run function, yes(1) or no(0)
+  if (z_score == 1) { 
+    algorithm <- ratios <- violin <- 1 
+  } else {
+    algorithm <- ratios <- violin <- 0 
+  }	
+  # are the sample names headers on row 1 or row 2 in the DIMS excel? (default 1)
+  header_row <- 1
+  # column name where the data starts (default B)
+  col_start <- "B"
+  zscore_cutoff <- 5
+  xaxis_cutoff <- 20
+  protocol_name <- "DIMS_PL_DIAG"
+
+  #### STEP 1: Preparation ####
+  # in: run_name, path_dims_file, header_row ||| out: output_dir, DIMS
+
+  # path to DIMS excel file
+  path_dims_file <- paste0(run_name, ".xlsx")
+
+  # Load the excel file.
+  dims_xls <- readWorkbook(xlsxFile = path_dims_file, sheet = 1, startRow = header_row)
+  if (exists("dims_xls")) {
+    cat(paste0("\nThe excel file is succesfully loaded:\n -> ", path_dims_file))
+  } else {
+    cat(paste0("\n\n**** Error: Could not find an Excel file. Please check location of file:\n -> ", path_dims_file, "\n"))
+  }
+
+  #### STEP 2: Edit DIMS data #####
+  # in: dims_xls ||| out: Data, nr_contr, nr_pat
+  # Input: the xlsx file that comes out of the pipeline with format:
+  # [plots] [C] [P] [summary columns] [C_Zscore] [P_Zscore]
+  # Output: "_CSV.csv" file that is suited for the algorithm in shiny.
+
+  # Determine the number of Contols and Patients in column names:
+  nr_contr <- length(grep("C", names(dims_xls))) / 2
+  nr_pat   <- length(grep("P", names(dims_xls))) / 2
+  # total number of samples
+  nrsamples <- nr_contr + nr_pat
+  # check whether the number of intensity columns equals the number of Zscore columns
+  if (nr_contr + nr_pat != length(grep("_Zscore", names(dims_xls)))) {
+    cat("\n**** Error: there aren't as many intensities listed as Zscores")
+  }
+  cat(paste0("\n\n------------\n", nr_contr, " controls \n", nr_pat, " patients\n------------\n\n"))
+
+  # Move the columns HMDB_code and HMDB_name to the beginning.
+  hmdb_info_cols <- c(which(colnames(dims_xls) == "HMDB_code"), which(colnames(dims_xls) == "HMDB_name"))
+  other_cols <- seq_along(1:ncol(dims_xls))[-hmdb_info_cols]
+  dims_xls_copy <- dims_xls[, c(hmdb_info_cols, other_cols)]
+  # Remove the columns from 'name' to 'pathway'
+  from_col <- which(colnames(dims_xls_copy) == "name")
+  to_col <- which(colnames(dims_xls_copy) == "pathway")
+  dims_xls_copy <- dims_xls_copy[, -c(from_col:to_col)]
+  # in case the excel had an empty "plots" column, remove it
+  if ("plots" %in% colnames(dims_xls_copy)) {
+    dims_xls_copy <- dims_xls_copy[, -grep("plots", colnames(dims_xls_copy))]
+  }
+  # Rename columns
+  names(dims_xls_copy) <- gsub("avg.ctrls", "Mean_controls", names(dims_xls_copy))
+  names(dims_xls_copy) <- gsub("sd.ctrls",  "SD_controls", names(dims_xls_copy))
+  names(dims_xls_copy) <- gsub("HMDB_code", "HMDB.code", names(dims_xls_copy))
+  names(dims_xls_copy) <- gsub("HMDB_name", "HMDB.name", names(dims_xls_copy))
+
+  # intensity columns and mean and standard deviation of controls
+  numeric_cols <- c(3:ncol(dims_xls_copy))
+  # make sure all values are numeric
+  dims_xls_copy[, numeric_cols] <- sapply(dims_xls_copy[, numeric_cols], as.numeric)
+
+  if (exists("dims_xls_copy") & (length(dims_xls_copy) < length(dims_xls))) {
+    cat("\n### Step 2 # Edit dims data is done.\n")
+  } else {
+    cat("\n**** Error: Could not execute step 2 \n")
+  }
+
+  #### STEP 3: Calculate ratios of intensities for metabolites ####
+  # in: ratios, file_ratios_metabolites, dims_xls_copy, nr_contr, nr_pat ||| out: Zscore (+file)
+  # This script loads the file with Ratios (file_ratios_metabolites) and calculates
+  # the ratios of the intensities of the given metabolites. It also calculates
+  # Zs-cores based on the avg and sd of the ratios of the controls.
+
+  # Input: dataframe with intenstities and Zscores of controls and patients:
+  # [HMDB.code] [HMDB.name] [C] [P] [Mean_controls] [SD_controls] [C_Zscore] [P_Zscore]
+
+  # Output: "_CSV.csv" file that is suited for the algorithm, with format:
+  # "_Ratios_CSV.csv" file, same file as above, but with ratio rows added.
+
+  if (ratios == 1) {
+    cat(paste0("\nloading ratios file:\n ->  ", file_ratios_metabolites, "\n"))
+    ratio_input <- read.csv(file_ratios_metabolites, sep = ";", stringsAsFactors = FALSE)
+
+    # Prepare empty data frame to fill with ratios
+    ratio_list <- setNames(data.frame(matrix(
+      ncol = ncol(dims_xls_copy),
+      nrow = nrow(ratio_input)
+    )), colnames(dims_xls_copy))
+    ratio_list <- as.data.frame(ratio_list)
+
+    # put HMDB info into first two columns of ratio_list
+    ratio_list[, 1:2] <- ratio_input[, 1:2]
+
+    # look for intensity columns (exclude Zscore columns)
+    control_cols <- grep("C", colnames(ratio_list)[1:which(colnames(ratio_list) == "Mean_controls")])
+    patient_cols <- grep("P", colnames(ratio_list)[1:which(colnames(ratio_list) == "Mean_controls")])
+    intensity_cols <- c(control_cols, patient_cols)
+    # calculate each of the ratios of intensities
+    for (ratio_index in 1:nrow(ratio_input)) {
+      ratio_numerator <- ratio_input[ratio_index, "HMDB_numerator"]
+      ratio_numerator <- strsplit(ratio_numerator, "plus")[[1]]
+      ratio_denominator <- ratio_input[ratio_index, "HMDB_denominator"]
+      ratio_denominator <- strsplit(ratio_denominator, "plus")[[1]]
+      # find these HMDB IDs in dataset. Could be a sum of multiple metabolites
+      sel_denominator <- sel_numerator <- c()
+      for (numerator_index in 1:length(ratio_numerator)) {
+        sel_numerator <- c(sel_numerator, which(dims_xls_copy[, "HMDB.code"] == ratio_numerator[numerator_index]))
+      }
+      for (denominator_index in 1:length(ratio_denominator)) {
+        # special case for sum of metabolites (dividing by one)
+        if (ratio_denominator[denominator_index] != "one") {
+          sel_denominator <- c(sel_denominator, which(dims_xls_copy[, "HMDB.code"] == ratio_denominator[denominator_index]))
+        }
+      }
+      # calculate ratio
+      if (ratio_denominator[denominator_index] != "one") {
+        ratio_list[ratio_index, intensity_cols] <- apply(dims_xls_copy[sel_numerator, intensity_cols], 2, sum) /
+          apply(dims_xls_copy[sel_denominator, intensity_cols], 2, sum)
+      } else {
+        # special case for sum of metabolites (dividing by one)
+        ratio_list[ratio_index, intensity_cols] <- apply(dims_xls_copy[sel_numerator, intensity_cols], 2, sum)
+      }
+      # calculate log of ratio
+      ratio_list[ratio_index, intensity_cols] <- log2(ratio_list[ratio_index, intensity_cols])
+    }
+
+    # Calculate means and SD's of the calculated ratios for Controls
+    ratio_list[, "Mean_controls"] <- apply(ratio_list[, control_cols], 1, mean)
+    ratio_list[, "SD_controls"]   <- apply(ratio_list[, control_cols], 1, sd)
+
+    # Calc z-scores with the means and SD's of Controls
+    zscore_cols <- grep("Zscore", colnames(ratio_list))
+    for (sample_index in 1:length(zscore_cols)) {
+      zscore_col <- zscore_cols[sample_index]
+      # matching intensity column
+      int_col <- intensity_cols[sample_index]
+      # test on column names
+      if (check_same_samplename(colnames(ratio_list)[int_col], colnames(ratio_list)[zscore_col])) {
+        # calculate Z-scores
+        ratio_list[, zscore_col] <- (ratio_list[, int_col] - ratio_list[, "Mean_controls"]) / ratio_list[, "SD_controls"]
+      }
+    }
+
+    # Add rows of the ratio hmdb codes to the data of zscores from the pipeline.
+    dims_xls_ratios <- rbind(ratio_list, dims_xls_copy)
+
+    # Edit the DIMS output Zscores of all patients in format:
+    # HMDB_code patientname1  patientname2
+    names(dims_xls_ratios) <- gsub("HMDB.code", "HMDB_code", names(dims_xls_ratios))
+    names(dims_xls_ratios) <- gsub("HMDB.name", "HMDB_name", names(dims_xls_ratios))
+
+    # for debugging:
+    write.table(dims_xls_ratios, file = paste0(output_dir, "/ratios.txt"), sep = "\t")
+
+    # Select only the cols with zscores of the patients
+    zscore_patients <- dims_xls_ratios[, c(1, 2, zscore_cols[grep("P", colnames(dims_xls_ratios)[zscore_cols])])]
+    # Select only the cols with zscores of the controls
+    zscore_controls <- dims_xls_ratios[, c(1, 2, zscore_cols[grep("C", colnames(dims_xls_ratios)[zscore_cols])])]
+
+  }
+
+  #### STEP 4: Run the IEM algorithm #########
+  # in: algorithm, file_expected_biomarkers_iem, zscore_patients ||| out: prob_score (+file)
+  # algorithm taken from DOI: 10.3390/ijms21030979
+
+  if (algorithm == 1) {
+    # Load data
+    cat(paste0("\nloading expected file:\n ->  ", file_expected_biomarkers_iem, "\n"))
+    expected_biomarkers <- read.csv(file_expected_biomarkers_iem, sep = ";", stringsAsFactors = FALSE)
+    # modify column names
+    names(expected_biomarkers) <- gsub("HMDB.code", "HMDB_code",  names(expected_biomarkers))
+    names(expected_biomarkers) <- gsub("Metabolite", "HMDB_name", names(expected_biomarkers))
+
+    # prepare dataframe scaffold rank_patients
+    rank_patients <- zscore_patients
+    # Fill df rank_patients with the ranks for each patient
+    for (patient_index in 3:ncol(zscore_patients)) {
+      # number of positive zscores in patient
+      pos <- sum(zscore_patients[, patient_index] > 0)
+      # sort the column on zscore; NB: this sorts the entire object, not just one column
+      rank_patients <- rank_patients[order(-rank_patients[patient_index]), ]
+      # Rank all positive zscores highest to lowest
+      rank_patients[1:pos, patient_index] <- as.numeric(ordered(-rank_patients[1:pos, patient_index]))
+      # Rank all negative zscores lowest to highest
+      rank_patients[(pos + 1):nrow(rank_patients), patient_index] <- as.numeric(ordered(rank_patients[(pos + 1):
+                            nrow(rank_patients), patient_index]))
+    }
+
+    # Calculate metabolite score, using the dataframes with only values, and later add the cols without values (1&2).
+    expected_zscores <- merge(x = expected_biomarkers, y = zscore_patients, by.x = c("HMDB_code"), by.y = c("HMDB_code"))
+    expected_zscores_original <- expected_zscores
+
+    # determine which columns contain Z-scores and which contain disease info
+    select_zscore_cols <- grep("_Zscore", colnames(expected_zscores))
+    select_info_cols <- 1:(min(select_zscore_cols) - 1)
+    # set some zscores to zero
+    select_incr_indisp <- which(expected_zscores$Change == "Increase" & expected_zscores$Dispensability == "Indispensable")
+    expected_zscores[select_incr_indisp, select_zscore_cols] <- lapply(expected_zscores[select_incr_indisp, 
+                      select_zscore_cols], function(x) ifelse (x <= 1.6, 0, x))
+    select_decr_indisp <- which(expected_zscores$Change == "Decrease" & expected_zscores$Dispensability == "Indispensable")
+    expected_zscores[select_decr_indisp, select_zscore_cols] <- lapply(expected_zscores[select_decr_indisp, 
+                      select_zscore_cols], function(x) ifelse (x >= -1.2, 0, x))
+
+    # calculate rank score:
+    expected_ranks <- merge(x = expected_biomarkers, y = rank_patients, by.x = c("HMDB_code"), by.y = c("HMDB_code"))
+    rank_scores <- expected_zscores[order(expected_zscores$HMDB_code), select_zscore_cols] / 
+            (expected_ranks[order(expected_ranks$HMDB_code), select_zscore_cols] * 0.9)
+    # combine disease info with rank scores
+    expected_metabscore <- cbind(expected_ranks[order(expected_zscores$HMDB_code), select_info_cols], rank_scores)
+
+    # multiply weight score and rank score
+    weight_score <- expected_zscores
+    weight_score[, select_zscore_cols] <- expected_metabscore$Total_Weight * expected_metabscore[, select_zscore_cols]
+
+    # sort table on Disease and Absolute_Weight
+    weight_score <- weight_score[order(weight_score$Disease, weight_score$Absolute_Weight, decreasing = TRUE), ]
+
+    # select columns to check duplicates
+    dup <- weight_score[, c("Disease", "M.z")]
+    uni <- weight_score[!duplicated(dup) | !duplicated(dup, fromLast = FALSE), ]
+
+    # calculate probability score
+    prob_score <-  aggregate(uni[, select_zscore_cols], uni["Disease"], sum)
+
+    # list of all diseases that have at least one metabolite Zscore at 0
+    for (patient_index in 2:ncol(prob_score)) {
+      patient_zscore_colname <- colnames(prob_score)[patient_index]
+      matching_colname_expected <- which(colnames(expected_zscores) == patient_zscore_colname)
+      # determine which Zscores are 0 for this patient
+      zscores_zero <- which(expected_zscores[, matching_colname_expected] == 0)
+      # get Disease for these
+      disease_zero <- unique(expected_zscores[zscores_zero, "Disease"])
+      # set the probability score of these diseases to 0
+      prob_score[which(prob_score$Disease %in% disease_zero), patient_index] <- 0
+    }
+
+    # determine disease rank per patient
+    disease_rank <- prob_score
+    # rank diseases in decreasing order
+    disease_rank[2:ncol(disease_rank)] <- lapply(2:ncol(disease_rank), function(x) 
+                  as.numeric(ordered(-disease_rank[1:nrow(disease_rank), x])))
+    # modify column names, Zscores have now been converted to probability scores
+    colnames(prob_score) <- gsub("_Zscore", "_prob_score", colnames(prob_score))
+    colnames(disease_rank) <- gsub("_Zscore", "", colnames(disease_rank))
+
+    # Create conditional formatting for output Excel sheet. Colors according to values.
+    wb <- createWorkbook()
+    addWorksheet(wb, "Probability Scores")
+    writeData(wb, "Probability Scores", prob_score)
+    conditionalFormatting(wb, "Probability Scores", cols = 2:ncol(prob_score), rows = 1:nrow(prob_score), 
+        type = "colourScale", style = c("white", "#FFFDA2", "red"), rule = c(1, 10, 100))
+    saveWorkbook(wb, file = paste0(output_dir, "/dIEM_algoritme_output_", run_name, ".xlsx"), overwrite = TRUE)
+    # check whether prob_score df exists and has expected dimensions.
+    if (exists("expected_biomarkers") & (length(disease_rank) == length(prob_score))) {
+      cat("\n### Step 4 # Running the IEM algorithm is done.\n\n")
+    } else {
+      cat("\n**** Error: Could not run IEM algorithm. Check if path to expected_biomarkers csv-file is correct. \n")
+    }
+
+    rm(wb)
+  }
+
+  #### STEP 5: Make violin plots #####
+  # in: algorithm / zscore_patients, violin, nr_contr, nr_pat, Data, path_textfiles, zscore_cutoff, xaxis_cutoff, 
+  # top_diseases, top_metab, output_dir ||| out: pdf file, Helix csv file
+
+  if (violin == 1) {
+
+    # preparation
+    zscore_patients_copy <- zscore_patients
+    # for robust scaler, rename Z-score columns
+    colnames(zscore_patients) <- gsub("_RobustZscore", "_Zscore", colnames(zscore_patients))
+    colnames(zscore_controls) <- gsub("_RobustZscore", "_Zscore", colnames(zscore_controls))
+    colnames(zscore_patients) <- gsub("_Zscore", "", colnames(zscore_patients))
+    colnames(zscore_controls) <- gsub("_Zscore", "", colnames(zscore_controls))
+
+    # Make patient list for violin plots
+    patient_list <- names(zscore_patients)[-c(1, 2)]
+
+    # from table expected_biomarkers, choose selected columns
+    select_columns <- c("Disease", "HMDB_code", "HMDB_name")
+    select_col_nrs <- which(colnames(expected_biomarkers) %in% select_columns)
+    expected_biomarkers_select <- expected_biomarkers[, select_col_nrs]
+    # remove duplicates
+    expected_biomarkers_select <- expected_biomarkers_select[!duplicated(expected_biomarkers_select[, c(1, 2)]), ]
+
+    # load file with explanatory information to be included in PDF.
+    explanation <- readLines(file_explanation)
+
+    # first step: normal violin plots
+    # Find all text files in the given folder, which contain metabolite lists of which
+    # each file will be a page in the pdf with violin plots.
+    # Make a PDF file for each of the categories in metabolite_dirs
+    metabolite_dirs <- list.files(path = path_metabolite_groups, full.names = FALSE, recursive = FALSE)
+    for (metabolite_dir in metabolite_dirs) {
+      # create a directory for the output PDFs
+      pdf_dir <- paste(output_dir, metabolite_dir, sep = "/")
+      dir.create(pdf_dir, showWarnings = FALSE)
+      cat("making plots in category:", metabolite_dir, "\n")
+
+      # get a list of all metabolite files
+      metabolite_files <- list.files(path = paste(path_metabolite_groups, metabolite_dir, sep = "/"), 
+            pattern = "*.txt", full.names = FALSE, recursive = FALSE)
+      # put all metabolites into one list
+      metab_list_all <- list()
+      metab_list_names <- c()
+      cat("making plots from the input files:")
+      # open the text files and add each to a list of dataframes (metab_list_all)
+      for (file_index in seq_along(metabolite_files)) {
+        infile <- metabolite_files[file_index]
+        metab_list <- read.table(paste(path_metabolite_groups, metabolite_dir, infile, sep = "/"), 
+              sep = "\t", header = TRUE, quote = "")
+        # put into list of all lists
+        metab_list_all[[file_index]] <- metab_list
+        metab_list_names <- c(metab_list_names, strsplit(infile, ".txt")[[1]][1])
+        cat(paste0("\n", infile))
+      }
+      # include list of classes in metabolite list
+      names(metab_list_all) <- metab_list_names
+
+      # prepare list of metabolites; max nr_plots_perpage on one page
+      metab_interest_sorted <- prepare_data(metab_list_all, zscore_patients)
+      metab_interest_controls <- prepare_data(metab_list_all, zscore_controls)
+      metab_perpage <- prepare_data_perpage(metab_interest_sorted, metab_interest_controls, nr_plots_perpage, nr_pat, nr_contr)
+
+      # for Diagnostics metabolites to be saved in Helix
+      if(grepl("Diagnost", pdf_dir)) {
+        # get table that combines DIMS results with stofgroepen/Helix table
+        dims_helix_table <- get_patient_data_to_helix(metab_interest_sorted, metab_list_all)
+        
+        # check if run contains Diagnostics patients (e.g. "P2024M"), not for research runs
+        if(any(is_diagnostic_patient(dims_helix_table$Patient))){
+          # get output file for Helix
+          output_helix <- output_for_helix(protocol_name, dims_helix_table)
+          # write output to file
+          path_helixfile <- paste0(output_dir, "/output_Helix_", run_name,".csv")
+          write.csv(output_helix, path_helixfile, quote = F, row.names = F)
+        }
+      }
+      
+      # make violin plots per patient
+      for (pt_nr in 1:length(patient_list)) {
+        pt_name <- patient_list[pt_nr]
+        # for category Diagnostics, make list of metabolites that exceed alarm values for this patient
+        # for category Other, make list of top highest and lowest Z-scores for this patient
+        if (grepl("Diagnost", pdf_dir)) {
+          top_metab_pt <- prepare_alarmvalues(pt_name, dims_helix_table)
+        } else {
+          top_metab_pt <- prepare_toplist(pt_name, zscore_patients)
+        }
+
+        # generate normal violin plots
+        create_violin_plots(pdf_dir, pt_name, metab_perpage, top_metab_pt)
+
+      }
+
+    }
+
+    # Second step: dIEM plots in separate directory
+    diem_plot_dir <- paste(output_dir, "dIEM_plots", sep = "/")
+    dir.create(diem_plot_dir)
+
+    # Select the metabolites that are associated with the top highest scoring IEM, for each patient
+    # disease_rank is from step 4: the dIEM algorithm. The lower the value, the more likely.
+    for (pt_nr in 1:length(patient_list)) {
+      pt_name <- patient_list[pt_nr]
+      # get top diseases for this patient
+      pt_colnr <- which(colnames(disease_rank) == pt_name)
+      pt_top_indices <- which(disease_rank[, pt_colnr] <= top_nr_iem)
+      pt_iems <- disease_rank[pt_top_indices, "Disease"]
+      pt_top_iems <- pt_prob_score_top_iems <- c()
+      for (single_iem in pt_iems) {
+        # get the probability score
+        prob_score_iem <- prob_score[which(prob_score$Disease == single_iem), pt_colnr]
+        # use only diseases for which probability score is above threshold
+        if (prob_score_iem >= threshold_iem) {
+          pt_top_iems <- c(pt_top_iems, single_iem)
+          pt_prob_score_top_iems <- c(pt_prob_score_top_iems, prob_score_iem)
+        }
+      }
+
+      # prepare data for plotting dIEM violin plots
+      # If prob_score_top_iem is an empty list, don't make a plot
+      if (length(pt_top_iems) > 0) {
+        # Sorting from high to low, both prob_score_top_iems and pt_top_iems.
+        pt_prob_score_order <- order(-pt_prob_score_top_iems)
+        pt_prob_score_top_iems <- round(pt_prob_score_top_iems, 1)
+        pt_prob_score_top_iem_sorted <- pt_prob_score_top_iems[pt_prob_score_order]
+        pt_top_iem_sorted <- pt_top_iems[pt_prob_score_order]
+        # getting metabolites for each top_iem disease exactly like in metab_list_all
+        metab_iem_all <- list()
+        metab_iem_names <- c()
+        for (single_iem_index in 1:length(pt_top_iem_sorted)) {
+          single_iem <- pt_top_iem_sorted[single_iem_index]
+          single_prob_score <- pt_prob_score_top_iem_sorted[single_iem_index]
+          select_rows <- which(expected_biomarkers_select$Disease == single_iem)
+          metab_list <- expected_biomarkers_select[select_rows, ]
+          metab_iem_names <- c(metab_iem_names, paste0(single_iem, ", probability score ", single_prob_score))
+          metab_list <- metab_list[, -1]
+          metab_iem_all[[single_iem_index]] <- metab_list
+        }
+        # put all metabolites into one list
+        names(metab_iem_all) <- metab_iem_names
+
+        # get Zscore information from zscore_patients_copy, similar to normal violin plots
+        metab_iem_sorted <- prepare_data(metab_iem_all, zscore_patients_copy)
+        metab_iem_controls <- prepare_data(metab_iem_all, zscore_controls)
+        # make sure every page has 20 metabolites
+        diem_metab_perpage <- prepare_data_perpage(metab_iem_sorted, metab_iem_controls, nr_plots_perpage, nr_pat)
+
+        # generate dIEM violin plots
+        create_violin_plots(diem_plot_dir, pt_name, diem_metab_perpage, top_metab_pt)
+
+      } else {
+        cat(paste0("\n\n**** This patient had no prob_scores higher than ", threshold_iem, ".
+                    Therefore, this pdf was not made:\t ", pt_name, "_iem \n"))
+      }
+
+    }
+
+  }
+}
\ No newline at end of file
diff --git a/DIMS/GenerateViolinPlots.nf b/DIMS/GenerateViolinPlots.nf
new file mode 100644
index 0000000..44323ac
--- /dev/null
+++ b/DIMS/GenerateViolinPlots.nf
@@ -0,0 +1,27 @@
+process GenerateViolinPlots {
+    tag "DIMS GenerateViolinPlots"
+    label 'GenerateViolinPlots'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       path(excel_file)
+       val(analysis_id)
+
+    output:
+       path('Diagnostics/*.pdf'), emit: diag_plot_files, optional: true
+       path('Other/*.pdf'), emit: other_plot_files, optional: true
+       path('dIEM_plots/*.pdf'), emit: diem_plot_files, optional: true
+       path('*.xlsx'), emit: excel_file, optional: true
+       path('*.csv'), emit: helix_file, optional: true
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/GenerateViolinPlots.R $analysis_id $params.scripts_dir $params.zscore \
+                                                                    $params.path_metabolite_groups \
+                                                                    $params.file_ratios_metabolites \
+                                                                    $params.file_expected_biomarkers_IEM \
+                                                                    $params.file_explanation \
+                                                                    $params.file_isomers
+        """
+}
diff --git a/DIMS/HMDBparts.R b/DIMS/HMDBparts.R
new file mode 100644
index 0000000..3e7356d
--- /dev/null
+++ b/DIMS/HMDBparts.R
@@ -0,0 +1,110 @@
+# adapted from hmdb_parts.R
+
+# define parameters 
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+db_file <- cmd_args[1] 
+breaks_file <- cmd_args[2] 
+standard_run <- cmd_args[4] 
+
+# load file with binning breaks 
+load(breaks_file)
+min_mz <- round(breaks_fwhm[1])
+max_mz <- round(breaks_fwhm[length(breaks_fwhm)])
+
+# In case of a standard run use external HMDB parts
+# m/z is approximately 70 to 600: set limits between 68-71 for min and 599-610 for max
+if (standard_run == "yes" & min_mz > 68 & min_mz < 71 & max_mz > 599 & max_mz < 610) {
+  # skip generating HMDB parts
+  hmdb_parts_path <- cmd_args[3] 
+  # find all files containing hmdb in file name
+  hmdb_parts <- list.files(hmdb_parts_path, pattern = "hmdb") 
+  for (hmdb_file in hmdb_parts) {
+    file.copy(paste(hmdb_parts_path, hmdb_file, sep = "/"), "./", recursive = TRUE)
+  }
+} else { 
+  # generate HMDB parts in case of non-standard mz range
+  load(db_file)
+  ppm <- as.numeric(cmd_args[5])
+
+  scanmodes <- c("positive", "negative")
+  for (scanmode in scanmodes) {
+    if (scanmode == "negative") {
+      column_label <- "MNeg"
+      hmdb_add_iso <- hmdb_add_iso.Neg
+    } else if (scanmode == "positive") {
+      column_label <- "Mpos"
+      hmdb_add_iso <- hmdb_add_iso.Pos
+    }
+
+    # filter mass range meassured
+    hmdb_add_iso = hmdb_add_iso[which(hmdb_add_iso[ , column_label] >= breaks_fwhm[1] &
+                                      hmdb_add_iso[ , column_label] <= breaks_fwhm[length(breaks_fwhm)]), ]
+
+    # sort on mass
+    outlist <- hmdb_add_iso[order(as.numeric(hmdb_add_iso[ , column_label])),]
+    nr_rows <- dim(outlist)[1]
+
+    # maximum number of rows per file
+    sub <- 20000 
+    end <- 0
+    last_line <- sub
+    check <- 0
+    outlist_part <- NULL
+
+    # create parts and save to file
+    if (nr_rows < sub) {
+      outlist_part <- outlist
+      save(outlist_part, file = paste0(scanmode, "_hmdb.1.RData"))
+    } else if (nr_rows >= sub & (floor(nr_rows / sub) - 1) >= 2) {
+      for (i in 2:floor(nr_rows / sub) - 1) {
+        start <- -(sub - 1) + i * sub
+        end <- i * sub
+
+        if (i > 1){
+          outlist_i = outlist[c(start:end),]
+          nr_moved = 0
+          # Use ppm to replace border to avoid cut within peak group
+          while ((as.numeric(outlist_i[1, column_label]) - as.numeric(outlist_part[last_line, column_label])) * 1e+06 / 
+                  as.numeric(outlist_i[1, column_label]) < ppm) {
+            outlist_part <- rbind(outlist_part, outlist_i[1, ])
+            outlist_i <- outlist_i[-1, ]
+            nr_moved <- nr_moved + 1
+          }
+
+          save(outlist_part, file = paste(scanmode, "_", paste("hmdb", i-1, "RData", sep = "."), sep = ""))
+          check <- check + dim(outlist_part)[1]
+
+          outlist_part <- outlist_i
+          last_line <- dim(outlist_part)[1]
+
+        } else {
+          outlist_part <- outlist[c(start:end),]
+        }
+      }
+
+      start <- end + 1
+      end <- nr_rows
+      outlist_i <- outlist[c(start:end), ]
+      nr_moved <- 0
+
+      if (!is.null(outlist_part)) {
+        # Calculate ppm and replace border, avoid cut within peak group
+        while ((as.numeric(outlist_i[1, column_label]) - as.numeric(outlist_part[last_line, column_label])) * 1e+06 / 
+                as.numeric(outlist_i[1, column_label]) < ppm) {
+          outlist_part <- rbind(outlist_part, outlist_i[1, ])
+          outlist_i <- outlist_i[-1, ]
+          nr_moved <- nr_moved + 1
+        }
+
+        save(outlist_part, file = paste0(scanmode, "_hmdb_", i, ".RData"))
+        check <- check + dim(outlist_part)[1]
+      }
+
+      outlist_part <- outlist_i
+      save(outlist_part, file = paste0(scanmode, "_hmdb_", i + 1, ".RData"))
+      check <- check + dim(outlist_part)[1]
+    }
+  }
+}
+
diff --git a/DIMS/HMDBparts.nf b/DIMS/HMDBparts.nf
new file mode 100644
index 0000000..5f19f75
--- /dev/null
+++ b/DIMS/HMDBparts.nf
@@ -0,0 +1,18 @@
+process HMDBparts {
+    tag "DIMS HMDBparts"
+    label 'HMDBparts'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+        path(hmdb_db_file)
+        path(breaks_file)
+
+    output:
+        path('*.RData')
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/HMDBparts.R $hmdb_db_file $breaks_file $params.hmdb_parts_files $params.standard_run $params.ppm
+        """
+}
diff --git a/DIMS/HMDBparts_main.R b/DIMS/HMDBparts_main.R
new file mode 100644
index 0000000..14335bf
--- /dev/null
+++ b/DIMS/HMDBparts_main.R
@@ -0,0 +1,56 @@
+## adapted from hmdb_part_adductSums.R
+
+# define parameters 
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+db_file <- cmd_args[1]
+breaks_file <- cmd_args[2]
+
+load(db_file)
+load(breaks_file)
+
+# Cut up HMDB minus adducts minus isotopes into small parts 
+scanmodes <- c("positive", "negative")
+for (scanmode in scanmodes) {
+  if (scanmode == "negative") {
+    column_label <- "MNeg"
+    HMDB_add_iso <- HMDB_add_iso.Neg
+  } else if (scanmode == "positive") {
+    column_label <- "Mpos"
+    HMDB_add_iso <- HMDB_add_iso.Pos
+  }
+
+  # filter mass range measured
+  outlist <- HMDB_add_iso[which(HMDB_add_iso[ , column_label] >= breaks_fwhm[1] & 
+             HMDB_add_iso[ ,column_label] <= breaks_fwhm[length(breaks_fwhm)]), ]
+
+  # remove adducts and isotopes, put internal standard at the beginning
+  outlist <- outlist[grep("HMDB", rownames(outlist), fixed = TRUE), ]
+  outlist <- outlist[-grep("_", rownames(outlist), fixed = TRUE), ]
+  # sort on m/z value
+  outlist <- outlist[order(outlist[ , column_label]), ]
+  nr_rows <- dim(outlist)[1]
+
+  # size of hmdb parts in lines:
+  sub <- 1000
+  end <- 0
+  check <- 0
+
+  # generate hmdb parts
+  if (nr_rows >= sub & (floor(nr_rows / sub)) >= 2) {
+    for (i in 1:floor(nr_rows / sub)) {
+      start <- -(sub - 1) + i * sub
+      end <- i * sub
+      outlist_part <- outlist[c(start:end), ]
+      save(outlist_part, file=paste0(scanmode, "_hmdb_main.", i, ".RData"))
+    }
+  }
+
+  # finish last hmdb part
+  start <- end + 1
+  end <- nr_rows
+
+  outlist_part <- outlist[c(start:end), ]
+  save(outlist_part, file = paste0(scanmode, "_hmdb_main.", i + 1, ".RData"))
+
+}
\ No newline at end of file
diff --git a/DIMS/HMDBparts_main.nf b/DIMS/HMDBparts_main.nf
new file mode 100644
index 0000000..b38bac0
--- /dev/null
+++ b/DIMS/HMDBparts_main.nf
@@ -0,0 +1,19 @@
+process HMDBparts_main {
+    tag "DIMS HMDBparts_main"
+    label 'HMDBparts_main'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+        path(hmdb_db_file)
+        path(breaks_file)
+
+    output:
+        path('*.RData')
+
+    script:
+
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/HMDBparts_main.R $hmdb_db_file $breaks_file 
+        """
+}
diff --git a/DIMS/MakeInit.R b/DIMS/MakeInit.R
new file mode 100644
index 0000000..44d4996
--- /dev/null
+++ b/DIMS/MakeInit.R
@@ -0,0 +1,31 @@
+## adapted from makeInit in old pipeline
+
+# define parameters
+args <- commandArgs(trailingOnly = TRUE)
+
+sample_sheet <- read.csv(args[1], sep = "\t")
+nr_replicates <- as.numeric(args[2])
+
+sample_names <- trimws(as.vector(unlist(sample_sheet[1])))
+nr_sample_groups <- length(sample_names) / nr_replicates
+group_names <- trimws(as.vector(unlist(sample_sheet[2])))
+group_names <- gsub("[^-.[:alnum:]]", "_", group_names)
+group_names_unique <- unique(group_names)
+
+# generate the replication pattern
+repl_pattern <- c()
+for (sample_group in 1:nr_sample_groups) {
+  tmp <- c()
+  for (repl in nr_replicates:1) {
+    index <- ((sample_group * nr_replicates) - repl) + 1
+    tmp <- c(tmp, sample_names[index])
+  }
+  repl_pattern <- c(repl_pattern, list(tmp))
+}
+
+names(repl_pattern) <- group_names_unique
+
+# preview the replication pattern
+print(tail(repl_pattern))
+
+save(repl_pattern, file = "init.RData")
diff --git a/DIMS/MakeInit.nf b/DIMS/MakeInit.nf
new file mode 100644
index 0000000..7aae0e4
--- /dev/null
+++ b/DIMS/MakeInit.nf
@@ -0,0 +1,18 @@
+process MakeInit {
+    tag "DIMS MakeInit"
+    label 'MakeInit'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       path(samplesheet) 
+       val(nr_replicates)
+
+    output:
+       path('init.RData')
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/MakeInit.R $samplesheet $nr_replicates 
+        """
+}
diff --git a/DIMS/PeakFinding.R b/DIMS/PeakFinding.R
new file mode 100644
index 0000000..697f8f7
--- /dev/null
+++ b/DIMS/PeakFinding.R
@@ -0,0 +1,53 @@
+## adapted from 4-peakFinding.R
+
+# define parameters
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+sample_file <- cmd_args[1]
+breaks_file <- cmd_args[2]
+resol <- as.numeric(cmd_args[3])
+scripts_dir <- cmd_args[4]
+thresh <- 2000
+outdir <- "./"
+
+# load in function scripts
+source(paste0(scripts_dir, "do_peakfinding.R"))
+source(paste0(scripts_dir, "check_overlap.R"))
+source(paste0(scripts_dir, "search_mzrange.R"))
+source(paste0(scripts_dir, "fit_optim.R"))
+source(paste0(scripts_dir, "fit_gaussian.R"))
+source(paste0(scripts_dir, "fit_init.R"))
+source(paste0(scripts_dir, "get_fwhm.R"))
+source(paste0(scripts_dir, "get_stdev.R"))
+source(paste0(scripts_dir, "optimize_gaussfit.R"))
+source(paste0(scripts_dir, "fit_peaks.R"))
+source(paste0(scripts_dir, "fit_gaussians.R"))
+source(paste0(scripts_dir, "estimate_area.R"))
+source(paste0(scripts_dir, "get_fit_quality.R"))
+source(paste0(scripts_dir, "check_overlap.R"))
+source(paste0(scripts_dir, "sum_curves.R"))
+source(paste0(scripts_dir, "within_ppm.R"))
+
+load(breaks_file)
+
+# Load output of AverageTechReplicates for a sample
+sample_avgtechrepl <- get(load(sample_file))
+if (grepl("_pos", sample_file)) {
+  scanmode <- "positive"
+} else if (grepl("_neg", sample_file)) {
+  scanmode <- "negative"
+}
+
+# Initialize
+options(digits = 16)
+# Number used to calculate area under Gaussian curve
+int_factor <- 1 * 10^5 
+# Initial value used to estimate scaling parameter
+scale <- 2
+width <- 1024
+height <- 768
+
+# run the findPeaks function
+
+# do_peakfinding(sample_avgtechrepl, breaks_fwhm, int_factor, scale, resol, outdir, scanmode, FALSE, thresh, width, height)
+do_peakfinding(sample_avgtechrepl, int_factor, scale, resol, outdir, scanmode, FALSE, thresh, width, height)
diff --git a/DIMS/PeakFinding.nf b/DIMS/PeakFinding.nf
new file mode 100644
index 0000000..0097d12
--- /dev/null
+++ b/DIMS/PeakFinding.nf
@@ -0,0 +1,17 @@
+process PeakFinding {
+    tag "DIMS PeakFinding ${rdata_file}"
+    label 'PeakFinding'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       tuple(path(rdata_file), path(breaks_file))
+
+    output:
+       path '*tive.RData'
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/PeakFinding.R $rdata_file $breaks_file $params.resolution $params.scripts_dir
+        """
+}
diff --git a/DIMS/PeakGrouping.R b/DIMS/PeakGrouping.R
new file mode 100644
index 0000000..dc29e76
--- /dev/null
+++ b/DIMS/PeakGrouping.R
@@ -0,0 +1,198 @@
+# adapted from 6-peakGrouping.R
+
+# define parameters
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+hmdb_part_file <- cmd_args[1]
+ppm <- as.numeric(cmd_args[2])
+
+options(digits = 16)
+
+# load part of the HMDB
+hmdb_add_iso <- get(load(hmdb_part_file))
+
+# determine appropriate scanmode based on hmdb_part_file
+if (grepl("negative", basename(hmdb_part_file))) {
+  scanmode <- "negative"
+} else if (grepl("positive", basename(hmdb_part_file))) {
+  scanmode <- "positive"
+}
+
+# determine batch number of HMDB part file
+batch_number <- strsplit(basename(hmdb_part_file), ".", fixed = TRUE)[[1]][2]
+
+# load file with spectrum peaks
+spec_peaks_file <- paste0("SpectrumPeaks_", scanmode, ".RData")
+load(spec_peaks_file)
+outlist_copy <- outlist_total
+rm(outlist_total)
+
+# load replication pattern
+pattern_file <- paste0(scanmode, "_repl_pattern.RData")
+load(pattern_file)
+
+# determine appropriate column name in HMDB part
+if (scanmode == "negative") {
+  column_label <- "MNeg"
+} else {
+  column_label <- "Mpos"
+}
+
+# Initialize
+peakgrouplist_identified <- NULL
+list_of_peaks_used_in_peak_groups_identified <- NULL
+
+# First find peak groups identified based on HMDB masses
+while (dim(hmdb_add_iso)[1] > 0) {
+  index <- 1
+
+  # take one m/z value from the HMDB part and calculate mass tolerance
+  reference_mass <- as.numeric(hmdb_add_iso[index, column_label])
+  mass_tolerance <- (reference_mass * ppm) / 10^6
+
+  # find the peaks in the dataset with corresponding m/z
+  mzmed <- as.numeric(outlist_copy[, "mzmed.pkt"])
+  selp <- which((mzmed > (reference_mass - mass_tolerance)) & (mzmed < (reference_mass + mass_tolerance)))
+  tmplist <- outlist_copy[selp, , drop = FALSE]
+  list_of_peaks_used_in_peak_groups_identified <- rbind(list_of_peaks_used_in_peak_groups_identified, tmplist)
+  nrsamples <- length(selp)
+  # if peaks have been found, create a peak group
+  if (nrsamples > 0) {
+    mzmed_pgrp <- mean(as.numeric(outlist_copy[selp, "mzmed.pkt"]))
+    mzmin_pgrp <- reference_mass - mass_tolerance
+    mzmax_pgrp <- reference_mass + mass_tolerance
+
+    # determine fit quality fq
+    fq_worst_pgrp <- as.numeric(max(outlist_copy[selp, "fq"]))
+    fq_best_pgrp <- as.numeric(min(outlist_copy[selp, "fq"]))
+
+    # set up object for intensities for all samples
+    ints_allsamps <- rep(0, length(names(repl_pattern_filtered)))
+    names(ints_allsamps) <- names(repl_pattern_filtered)
+
+    # Check for each sample if multiple peaks exist, if so take the sum of the intensities
+    labels <- unique(tmplist[, "samplenr"])
+    ints_allsamps[labels] <- as.vector(unlist(lapply(labels, function(x) {
+      sum(as.numeric(tmplist[which(tmplist[, "samplenr"] == x), "height.pkt"]))
+    })))
+
+    # Initialize
+    assi_hmdb <- iso_hmdb <- hmdb_code <- NA
+    tmplist_mass_iso <- tmplist_mass_adduct <- NULL
+
+    # find all entries in HMDB part with mass within ppm range
+    mass_all <- as.numeric(hmdb_add_iso[, column_label])
+    index <- which((mass_all > (reference_mass - mass_tolerance)) & (mass_all < (reference_mass + mass_tolerance)))
+    tmplist_mass <- hmdb_add_iso[index, , drop = FALSE]
+
+    if (dim(tmplist_mass)[1] > 0) {
+      # find isotope entries
+      index_iso <- grep(" iso ", tmplist_mass[, "CompoundName"], fixed = TRUE)
+      if (length(index_iso) > 0) {
+        tmplist_mass_iso <- tmplist_mass[index_iso, , drop = FALSE]
+        tmplist_mass <- tmplist_mass[-index_iso, , drop = FALSE]
+      }
+
+      if (dim(tmplist_mass)[1] > 0) {
+        # find adduct entries
+        index_adduct <- grep(" [M", tmplist_mass[, "CompoundName"], fixed = TRUE)
+        if (length(index_adduct) > 0) {
+          tmplist_mass_adduct <- tmplist_mass[index_adduct, , drop = FALSE]
+          tmplist_mass <- tmplist_mass[-index_adduct, , drop = FALSE]
+        }
+      }
+
+      # Compose a list of compounds, adducts or isotopes with corresponding m/z
+      if (dim(tmplist_mass)[1] > 0) {
+        # metabolites
+        assi_hmdb <- as.character(paste(as.character(tmplist_mass[, "CompoundName"]), 
+					collapse = ";"))
+        hmdb_code <- as.character(paste(as.character(rownames(tmplist_mass)), 
+					collapse = ";"))
+        theormz_hmdb <- as.numeric(tmplist_mass[1, column_label])
+
+        # adducts of metabolites
+        if (!is.null(tmplist_mass_adduct)) {
+          if (dim(tmplist_mass_adduct)[1] > 0) {
+            if (is.na(assi_hmdb)) {
+              assi_hmdb <- as.character(paste(as.character(tmplist_mass_adduct[, "CompoundName"]), 
+					      collapse = ";"))
+              hmdb_code <- as.character(paste(as.character(rownames(tmplist_mass_adduct)), 
+					      collapse = ";"))
+            } else {
+              assi_hmdb <- paste(assi_hmdb,
+                                 as.character(paste(as.character(tmplist_mass_adduct[, "CompoundName"]), 
+						    collapse = ";")), sep = ";")
+              hmdb_code <- paste(hmdb_code,
+                                 as.character(paste(as.character(rownames(tmplist_mass_adduct)), 
+						    collapse = ";")), sep = ";")
+            }
+          }
+        }
+
+        # isotopes of metabolites
+        if (!is.null(tmplist_mass_iso)) {
+          if (dim(tmplist_mass_iso)[1] > 0) {
+            iso_hmdb <- as.character(paste(as.character(tmplist_mass_iso[, "CompoundName"]), 
+					   collapse = ";"))
+          }
+        }
+
+        # if no metabolites have the correct m/z, look for adducts and isotopes only
+      } else if (!is.null(tmplist_mass_adduct)) {
+        theormz_hmdb <- as.numeric(tmplist_mass_adduct[1, column_label])
+
+        # adducts of metabolites
+        if (!is.null(tmplist_mass_adduct)) {
+          if (dim(tmplist_mass_adduct)[1] > 0) {
+            if (is.na(assi_hmdb)) {
+              assi_hmdb <- as.character(paste(as.character(tmplist_mass_adduct[, "CompoundName"]), 
+					      collapse = ";"))
+              hmdb_code <- as.character(paste(as.character(rownames(tmplist_mass_adduct)), 
+					      collapse = ";"))
+            } else {
+              assi_hmdb <- paste(assi_hmdb,
+                                 as.character(paste(as.character(tmplist_mass_adduct[, "CompoundName"]), 
+						    collapse = ";")), sep = ";")
+              hmdb_code <- paste(hmdb_code,
+                                 as.character(paste(as.character(rownames(tmplist_mass_adduct)), 
+						    collapse = ";")), sep = ";")
+            }
+          }
+        }
+
+        # isotopes of metabolites
+        if (!is.null(tmplist_mass_iso)) {
+          if (dim(tmplist_mass_iso)[1] > 0) {
+            iso_hmdb <- as.character(paste(as.character(tmplist_mass_iso[, "CompoundName"]), 
+					   collapse = ";"))
+          }
+        }
+
+        # if no metabolites or adducts can be found, only look for isotopes
+      } else if (!is.null(tmplist_mass_iso)) {
+        if (dim(tmplist_mass_iso)[1] > 0) {
+          theormz_hmdb <- as.numeric(tmplist_mass_iso[1, column_label])
+          iso_hmdb <- as.character(paste(as.character(tmplist_mass_iso[, "CompoundName"]), 
+					 collapse = ";"))
+        }
+      }
+    }
+
+    # combine all information
+    peakgrouplist_identified <- rbind(peakgrouplist_identified, cbind(
+      data.frame("mzmed.pgrp" = mzmed_pgrp, "fq.best" = fq_best_pgrp, "fq.worst" = fq_worst_pgrp, nrsamples, 
+		 "mzmin.pgrp" = mzmin_pgrp, "mzmax.pgrp" = mzmax_pgrp),
+      t(as.matrix(ints_allsamps)),
+      data.frame("assi_HMDB" = assi_hmdb, "iso_HMDB" = iso_hmdb, "HMDB_code" = hmdb_code, "theormz_HMDB" = theormz_hmdb)
+    ))
+  }
+
+  # remove index metabolite from HMDB part and continue while loop
+  hmdb_add_iso <- hmdb_add_iso[-index, ]
+}
+
+# save peak list corresponding to masses in HMDB part
+save(list_of_peaks_used_in_peak_groups_identified, file = paste0(batch_number, "_", scanmode, "_peaks_used.RData"))
+# save peak group list, identified part
+save(peakgrouplist_identified, file = paste0(batch_number, "_", scanmode, "_identified.RData"))
diff --git a/DIMS/PeakGrouping.nf b/DIMS/PeakGrouping.nf
new file mode 100644
index 0000000..ae4382c
--- /dev/null
+++ b/DIMS/PeakGrouping.nf
@@ -0,0 +1,20 @@
+process PeakGrouping {
+    tag "DIMS PeakGrouping ${hmdbpart_file}"
+    label 'PeakGrouping'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       path(hmdbpart_file)
+       each path(spectrumpeak_file)
+       each path(pattern_file)
+
+    output:
+       path '*_peaks_used.RData', emit: peaks_used
+       path '*_identified.RData', emit: grouped_identified
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/PeakGrouping.R $hmdbpart_file $params.ppm
+        """
+}
diff --git a/DIMS/SpectrumPeakFinding.R b/DIMS/SpectrumPeakFinding.R
new file mode 100644
index 0000000..8b249e2
--- /dev/null
+++ b/DIMS/SpectrumPeakFinding.R
@@ -0,0 +1,61 @@
+## adapted from 5-collectSamples.R
+
+# define parameters
+scanmodes <- c("positive", "negative")
+
+# Check whether all jobs terminated correctly
+not_run <- NULL
+
+# collect spectrum peaks for each scanmode
+for (scanmode in scanmodes) {
+  # load peak lists of all biological samples
+  peaklist_files <- list.files(pattern = paste0("_", scanmode, ".RData"))
+
+  # get sample names
+  load(paste0(scanmode, "_repl_pattern.RData"))
+  group_names <- names(repl_pattern_filtered)
+  for (sample_nr in 1:length(group_names)) {
+    group <- paste0(group_names[sample_nr], "_", scanmode, ".RData")
+    if (!(group %in% peaklist_files)) {
+      not_run <- c(not_run, group)
+    }
+  }
+
+  # Collecting samples
+  outlist_total <- NULL
+  for (file_nr in 1:length(peaklist_files)) {
+    cat("\n", peaklist_files[file_nr])
+    load(peaklist_files[file_nr])
+    if (is.null(outlist_persample) || (dim(outlist_persample)[1] == 0)) {
+      tmp <- strsplit(peaklist_files[file_nr], "/")[[1]]
+      fname <- tmp[length(tmp)]
+      fname <- strsplit(fname, ".RData")[[1]][1]
+      fname <- substr(fname, 13, nchar(fname))
+      if (file_nr == 1) {
+        outlist_total <- c(fname, rep("-1", 5))
+      } else {
+        outlist_total <- rbind(outlist_total, c(fname, rep("-1", 5)))
+      }
+    } else {
+      if (file_nr == 1) {
+        outlist_total <- outlist_persample
+      } else {
+        outlist_total <- rbind(outlist_total, outlist_persample)
+      }
+    }
+  }
+
+  # remove negative values
+  index <- which(outlist_total[, "height.pkt"] <= 0)
+  if (length(index) > 0) outlist_total <- outlist_total[-index, ]
+  index <- which(outlist_total[, "mzmed.pkt"] <= 0)
+  if (length(index) > 0) outlist_total <- outlist_total[-index, ]
+
+  save(outlist_total, file = paste0("./SpectrumPeaks_", scanmode, ".RData"))
+
+  if (!is.null(not_run)) {
+    for (i in 1:length(not_run)) {
+      message(paste(not_run[i], "was not generated"))
+    }
+  }
+}
diff --git a/DIMS/SpectrumPeakFinding.nf b/DIMS/SpectrumPeakFinding.nf
new file mode 100644
index 0000000..3590265
--- /dev/null
+++ b/DIMS/SpectrumPeakFinding.nf
@@ -0,0 +1,18 @@
+process SpectrumPeakFinding {
+    tag "DIMS SpectrumPeakFinding"
+    label 'SpectrumPeakFinding'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       path(rdata_files)
+       path(replication_pattern)
+
+    output:
+       path 'SpectrumPeaks_*.RData'
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/SpectrumPeakFinding.R 
+        """
+}
diff --git a/DIMS/SumAdducts.R b/DIMS/SumAdducts.R
new file mode 100755
index 0000000..699da4a
--- /dev/null
+++ b/DIMS/SumAdducts.R
@@ -0,0 +1,87 @@
+## adapted from 11-runSumAdducts.R
+
+# define parameters
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+hmdbpart_main_file <- cmd_args[1]
+scripts_dir <- cmd_args[2]
+z_score <- as.numeric(cmd_args[3])
+
+sum_adducts <- function(peaklist, theor_mz, grpnames_long, adducts, batch_number, scanmode, outdir, z_score) {
+  hmdb_codes <- rownames(theor_mz)
+  hmdb_names <- theor_mz[, 1, drop = FALSE]
+  hmdb_names[] <- lapply(hmdb_names, as.character)
+
+  # remove isotopes
+  index <- grep("HMDB", hmdb_codes, fixed = TRUE)
+  hmdb_codes <- hmdb_codes[index]
+  hmdb_names <- hmdb_names[index, ]
+  index <- grep("_", rownames(hmdb_codes), fixed = TRUE)
+  if (length(index) > 0) hmdb_codes <- hmdb_codes[-index]
+  if (length(index) > 0) hmdb_names <- hmdb_names[-index]
+
+  # negative
+  names <- NULL
+  adductsum <- NULL
+  names_long <- NULL
+
+  if (length(hmdb_codes) != 0) {
+    for (i in 1:length(hmdb_codes)) {
+      compound <- hmdb_codes[i]
+      compound_plus <- c(compound, paste(compound, adducts, sep = "_"))
+
+      metab <- unlist(lapply(peaklist$HMDB_code, function(x) {
+        (length(intersect(unlist(strsplit(as.vector(x), ";")), compound_plus)) > 0)
+      }))
+
+      total <- c()
+
+      # get the intensities for selected metabolite.
+      if (z_score == 1) {
+        int_cols_C <- grep("C", colnames(peaklist)[1:which(colnames(peaklist) == "avg.ctrls")])
+        int_cols_P <- grep("P", colnames(peaklist)[1:which(colnames(peaklist) == "avg.ctrls")])
+        int_cols <- c(int_cols_C, int_cols_P)
+        ints <- peaklist[metab, int_cols]
+      } else {
+        ints <- peaklist[metab, c(3:(length(grpnames_long) + 2))]
+      }
+      total <- apply(ints, 2, sum)
+
+      if (sum(total) != 0) {
+        names <- c(names, compound)
+        adductsum <- rbind(adductsum, total)
+        names_long <- c(names_long, hmdb_names[i])
+      }
+    }
+
+    if (!is.null(adductsum)) {
+      rownames(adductsum) <- names
+      adductsum <- cbind(adductsum, "HMDB_name" = names_long)
+      save(adductsum, file = paste(scanmode, "_", batch_number, "_SummedAdducts.RData", sep = ""))
+    }
+  }
+}
+
+if (grepl("positive_hmdb", hmdbpart_main_file)) {
+  scanmode <- "positive"
+  # for the adduct sum: include adducts M+Na (1) and M+K (2)
+  adducts <- c(1, 2)
+} else if (grepl("negative_hmdb", hmdbpart_main_file)) {
+  scanmode <- "negative"
+  # for the adduct sum: include adduct M+Cl (1)
+  adducts <- c(1)
+}
+
+# load input files
+collect_file <- paste0("outlist_identified_", scanmode, ".RData")
+load(collect_file)
+repl_file <- paste0(scanmode, "_repl_pattern.RData")
+load(repl_file)
+outlist_part <- get(load(hmdbpart_main_file))
+
+# get the number from the file name
+batch_number <- strsplit(basename(hmdbpart_main_file), ".", fixed = TRUE)[[1]][2]
+
+outlist_total <- unique(outlist_ident)
+
+sum_adducts(outlist_total, outlist_part, names(repl_pattern_filtered), adducts, batch_number, scanmode, outdir, z_score)
diff --git a/DIMS/SumAdducts.nf b/DIMS/SumAdducts.nf
new file mode 100644
index 0000000..bbd6420
--- /dev/null
+++ b/DIMS/SumAdducts.nf
@@ -0,0 +1,19 @@
+process SumAdducts {
+    tag "DIMS SumAdducts"
+    label 'SumAdducts'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       each path(collect_files)
+       each path(replication_pattern)
+       path(HMDBpart_main_file)
+
+    output:
+       path('*_SummedAdducts.RData')
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/SumAdducts.R $HMDBpart_main_file $params.scripts_dir $params.zscore
+        """
+}
diff --git a/DIMS/ThermoRawFileParser.nf b/DIMS/ThermoRawFileParser.nf
new file mode 100644
index 0000000..022a2c9
--- /dev/null
+++ b/DIMS/ThermoRawFileParser.nf
@@ -0,0 +1,18 @@
+process ConvertRawFile {
+    tag "DIMS ConvertRawFile ${file_id}"
+    label 'ThermoRawFileParser_1_1_11'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+        tuple(val(file_id), path(raw_file))
+
+    output:
+        tuple(val(file_id), path("${file_id}.mzML"))
+
+    script:
+
+        """
+        source /hpc/dbg_mz/tools/mono/etc/profile
+        mono /hpc/dbg_mz/tools/ThermoRawFileParser_1.1.11/ThermoRawFileParser.exe -i=${raw_file} --output=./ -p
+        """
+}
diff --git a/DIMS/UnidentifiedCalcZscores.R b/DIMS/UnidentifiedCalcZscores.R
new file mode 100755
index 0000000..ec31a3f
--- /dev/null
+++ b/DIMS/UnidentifiedCalcZscores.R
@@ -0,0 +1,61 @@
+## adapted from 10-collectSamplesFilled.R
+
+# define parameters
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+scripts_dir <- cmd_args[1]
+ppm <- as.numeric(cmd_args[2])
+z_score <- as.numeric(cmd_args[3])
+
+source(paste0(scripts_dir, "merge_duplicate_rows.R"))
+source(paste0(scripts_dir, "calculate_zscores.R"))
+
+# for each scan mode, collect all filled peak group lists
+scanmodes <- c("positive", "negative")
+
+for (scanmode in scanmodes) {
+  filled_files <- list.files("./", full.names = TRUE, pattern = paste0(scanmode, ".{1,}_Unidentified_filled"))
+  # load files and combine into one object
+  outlist_total <- NULL
+  for (file_nr in 1:length(filled_files)) {
+    peakgrouplist_filled <- get(load(filled_files[file_nr]))
+    outlist_total <- rbind(outlist_total, peakgrouplist_filled)
+  }
+
+  # remove duplicates; peak groups with exactly the same m/z
+  outlist_total <- merge_duplicate_rows(outlist_total)
+
+  # sort on mass
+  outlist_total <- outlist_total[order(outlist_total[, "mzmed.pgrp"]), ]
+
+  # load replication pattern
+  pattern_file <- paste0(scanmode, "_repl_pattern.RData")
+  repl_pattern <- get(load(pattern_file))
+
+  if (z_score == 1) {
+    # calculate Z-scores
+    outlist_stats <- calculate_zscores(outlist_total, adducts = FALSE)
+    nr_removed_samples <- length(which(repl_pattern[] == "character(0)"))
+    order_index_int <- order(colnames(outlist_stats)[8:(length(repl_pattern) - nr_removed_samples + 7)])
+    outlist_stats_more <- cbind(outlist_stats[, 1:7],
+                                outlist_stats[, (length(repl_pattern) - nr_removed_samples + 8):
+					      (length(repl_pattern) - nr_removed_samples + 8 + 6)],
+                                outlist_stats[, 8:(length(repl_pattern) - nr_removed_samples + 7)][order_index_int],
+                                outlist_stats[, (length(repl_pattern) - nr_removed_samples + 5 + 10):ncol(outlist_stats)])
+
+    tmp_index <- grep("_Zscore", colnames(outlist_stats_more), fixed = TRUE)
+    tmp_index_order <- order(colnames(outlist_stats_more[, tmp_index]))
+    tmp <- outlist_stats_more[, tmp_index[tmp_index_order]]
+    outlist_stats_more <- outlist_stats_more[, -tmp_index]
+    outlist_stats_more <- cbind(outlist_stats_more, tmp)
+    outlist_total <- outlist_stats_more
+  }
+
+  outlist_not_ident <- outlist_total
+
+  # Save output
+  remove_columns <- c("fq.best", "fq.worst", "mzmin.pgrp", "mzmax.pgrp")
+  remove_colindex <- which(colnames(outlist_not_ident) %in% remove_columns)
+  outlist_not_ident <- outlist_not_ident[, -remove_colindex]
+  save(outlist_not_ident, file = paste0("unidentified_outlist_", scanmode, ".RData"))
+}
diff --git a/DIMS/UnidentifiedCalcZscores.nf b/DIMS/UnidentifiedCalcZscores.nf
new file mode 100644
index 0000000..675d9cd
--- /dev/null
+++ b/DIMS/UnidentifiedCalcZscores.nf
@@ -0,0 +1,18 @@
+process UnidentifiedCalcZscores {
+    tag "DIMS UnidentifiedCalcZscores"
+    label 'UnidentifiedCalcZscores'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       path(unidentified_filled_files)
+       each path(replication_pattern)
+
+    output:
+       path('unidentified_outlist*.RData')
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/UnidentifiedCalcZscores.R $params.scripts_dir $params.ppm $params.zscore
+        """
+}
diff --git a/DIMS/UnidentifiedCollectPeaks.R b/DIMS/UnidentifiedCollectPeaks.R
new file mode 100755
index 0000000..1536213
--- /dev/null
+++ b/DIMS/UnidentifiedCollectPeaks.R
@@ -0,0 +1,68 @@
+## adapted from 7-collectSamplesGroupedHMDB.R
+
+# load required packages
+suppressMessages(library("dplyr"))
+
+options(digits = 16)
+
+# define parameters
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+ppm <- as.numeric(cmd_args[1])
+
+scanmodes <- c("positive", "negative")
+
+for (scanmode in scanmodes) {
+  # get list of all files that contain lists of peaks that were used in identified peak grouping
+  files <- list.files("./", pattern = paste(scanmode, "_peaks_used.RData", sep = ""))
+  # load the list of all peaks
+  load(paste0("SpectrumPeaks_", scanmode, ".RData"))
+
+  # Make a list of indexes of peaks that have been identified, then remove these from the peaklist.
+  remove <- NULL
+  for (file_index in 1:length(files)) {
+    # load list_of_peaks_used_in_peak_groups_identified
+    load(files[file_index])
+    remove <- c(remove, 
+		which(outlist_total[, "mzmed.pkt"] %in% list_of_peaks_used_in_peak_groups_identified[, "mzmed.pkt"])) # nolint
+  }
+  outlist_rest <- outlist_total[-remove, ]
+
+  # sort on mass
+  outlist <- outlist_rest[order(as.numeric(outlist_rest[, "mzmed.pkt"])), ]
+  # save output
+  # save(outlist, file = paste0("SpectrumPeaks_", scanmode, "_Unidentified.RData"))
+
+  size_parts <- 10000
+  # start <- 1
+
+  num_parts <- ceiling(nrow(outlist) / size_parts)
+
+  for (part in 1:num_parts){
+    if (part == 1) {
+      start_part <- 1
+      end_part <- size_parts
+    } else {
+      start_part <- (part - 1) * size_parts + 1
+    }
+    if (part == num_parts) {
+      end_part <- nrow(outlist)
+    } else if (part != 1) {
+      end_part <- part * size_parts
+    }
+
+    outlist_part <- outlist[c(start_part:end_part), ]
+
+    # add ppm extra after end
+    if (part != num_parts) {
+      mz_end <- outlist_part[nrow(outlist_part), "mzmed.pkt"]
+      mz_ppm_range <- ppm * as.numeric(mz_end) / 1e+06
+      mz_end_plus_ppm <- as.numeric(mz_end) + mz_ppm_range
+      outlist_after_part <- as.data.frame(outlist) %>% filter(mzmed.pkt > mz_end & mzmed.pkt <= mz_end_plus_ppm)
+
+      outlist_part <- rbind(outlist_part, outlist_after_part)
+    }
+
+    save(outlist_part, file = paste0(scanmode, "_", paste0("unidentified_part_", part, ".RData")))
+  }
+}
diff --git a/DIMS/UnidentifiedCollectPeaks.nf b/DIMS/UnidentifiedCollectPeaks.nf
new file mode 100644
index 0000000..0983e9e
--- /dev/null
+++ b/DIMS/UnidentifiedCollectPeaks.nf
@@ -0,0 +1,18 @@
+process UnidentifiedCollectPeaks {
+    tag "DIMS UnidentifiedCollectPeaks"
+    label 'UnidentifiedCollectPeaks'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       path(spectrumpeaks_file)
+       path(peaklist_identified)
+
+    output:
+       path('*.RData')
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/UnidentifiedCollectPeaks.R $params.ppm
+        """
+}
diff --git a/DIMS/UnidentifiedFillMissing.R b/DIMS/UnidentifiedFillMissing.R
new file mode 100755
index 0000000..78b7951
--- /dev/null
+++ b/DIMS/UnidentifiedFillMissing.R
@@ -0,0 +1,48 @@
+# adapted from 9-runFillMissing.R
+
+# define parameters
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+peakgrouplist_file <- cmd_args[1]
+# peakgrouplist_file2 <- cmd_args[2]
+scripts_dir <- cmd_args[2]
+thresh <- as.numeric(cmd_args[3])
+resol <- as.numeric(cmd_args[4])
+ppm <- as.numeric(cmd_args[5])
+outdir  <- "./"
+
+# load in function scripts
+source(paste0(scripts_dir, "replace_zeros.R"))
+source(paste0(scripts_dir, "fit_optim.R"))
+source(paste0(scripts_dir, "get_fwhm.R"))
+source(paste0(scripts_dir, "get_stdev.R"))
+source(paste0(scripts_dir, "estimate_area.R"))
+source(paste0(scripts_dir, "optimize_gaussfit.R"))
+source(paste0(scripts_dir, "identify_noisepeaks.R"))
+source(paste0(scripts_dir, "get_element_info.R"))
+source(paste0(scripts_dir, "atomic_info.R"))
+
+# peakgrouplist_files <- c(peakgrouplist_file1, peakgrouplist_file2)
+# for (peakgrouplist_file in peakgrouplist_files) {
+
+if (grepl("_pos", basename(peakgrouplist_file))) {
+  scanmode <- "positive"
+} else if (grepl("_neg", basename(peakgrouplist_file))) {
+  scanmode <- "negative"
+}
+
+# get replication pattern for sample names
+pattern_file <- paste0(scanmode, "_repl_pattern.RData")
+repl_pattern <- get(load(pattern_file))
+
+# load peak group list and determine output file name
+outpgrlist_identified <- get(load(peakgrouplist_file))
+
+outputfile_name <- gsub(".RData", "_filled.RData", peakgrouplist_file)
+
+# replace missing values (zeros) with random noise
+peakgrouplist_filled <- replace_zeros(outpgrlist_identified, repl_pattern, scanmode, resol, outdir, thresh, ppm)
+
+# save output
+save(peakgrouplist_filled, file = outputfile_name)
+# }
diff --git a/DIMS/UnidentifiedFillMissing.nf b/DIMS/UnidentifiedFillMissing.nf
new file mode 100644
index 0000000..3f9098e
--- /dev/null
+++ b/DIMS/UnidentifiedFillMissing.nf
@@ -0,0 +1,18 @@
+process UnidentifiedFillMissing {
+    tag "DIMS UnidentifiedFillMissing ${groupedlist_file}"
+    label 'UnidentifiedFillMissing'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       path(groupedlist_file)
+       each path(replication_pattern)
+
+    output:
+       path('*_filled.RData')
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/UnidentifiedFillMissing.R $groupedlist_file $params.scripts_dir $params.thresh $params.resolution $params.ppm
+        """
+}
diff --git a/DIMS/UnidentifiedPeakGrouping.R b/DIMS/UnidentifiedPeakGrouping.R
new file mode 100755
index 0000000..edeb2f7
--- /dev/null
+++ b/DIMS/UnidentifiedPeakGrouping.R
@@ -0,0 +1,108 @@
+## adapted from 8-peakGrouping.rest.R
+
+# define parameters
+cmd_args <- commandArgs(trailingOnly = TRUE)
+
+unidentified_peaklist <- cmd_args[1]
+resol <- as.numeric(cmd_args[2])
+ppm <- as.numeric(cmd_args[3])
+outdir <- "./"
+
+options(digits = 16)
+
+# function for grouping unidentified peaks
+grouping_rest <- function(outdir, unidentified_peaklist, scanmode, ppm) {
+  outlist_copy <- get(load(unidentified_peaklist))
+  load(paste0(scanmode, "_repl_pattern.RData"))
+  outpgrlist <- NULL
+
+  # group on highest peaks
+  range <- ppm * 1e-06
+
+  # temporary: speed up this step by limiting the number of rows used in while loop
+  # this script needs to be parallellized
+  nrow_div <- nrow(outlist_copy) / 1.1
+
+  # while (dim(outlist_copy)[1] > 0) {
+  while (dim(outlist_copy)[1] > nrow_div) {
+    select_max_height <- which(as.numeric(outlist_copy[, "height.pkt"]) == max(as.numeric(outlist_copy[, "height.pkt"])))[1]
+
+    # ppm range around max
+    mzref <- as.numeric(outlist_copy[select_max_height, "mzmed.pkt"])
+    pkmin <- -(range * mzref - mzref)
+    pkmax <- 2 * mzref - pkmin
+
+    select_peaks_within_range <- as.numeric(outlist_copy[, "mzmed.pkt"]) > pkmin & as.numeric(outlist_copy[, "mzmed.pkt"]) < pkmax
+    tmplist <- outlist_copy[select_peaks_within_range, , drop = FALSE]
+
+    nrsamples <- length(unique(tmplist[, "samplenr"]))
+    if (nrsamples > 0) {
+      mzmed_pgrp <- mean(as.numeric(outlist_copy[select_peaks_within_range, "mzmed.pkt"]))
+      mzmin_pgrp <- -(range * mzmed_pgrp - mzmed_pgrp)
+      mzmax_pgrp <- 2 * mzmed_pgrp - mzmin_pgrp
+      # select peaks within mz range
+      select_peaks_within_range <- as.numeric(outlist_copy[, "mzmed.pkt"]) > mzmin_pgrp & as.numeric(outlist_copy[, "mzmed.pkt"]) < mzmax_pgrp
+      tmplist <- outlist_copy[select_peaks_within_range, , drop = FALSE]
+
+      # remove used peaks
+      tmp <- as.vector(which(tmplist[, "height.pkt"] == -1))
+      if (length(tmp) > 0) tmplist <- tmplist[-tmp, , drop = FALSE]
+
+      nrsamples <- length(unique(tmplist[, "samplenr"]))
+      fq_worst_pgrp <- as.numeric(max(outlist_copy[select_peaks_within_range, "fq"]))
+      fq_best_pgrp  <- as.numeric(min(outlist_copy[select_peaks_within_range, "fq"]))
+      ints_allsamps <- rep(0, length(names(repl_pattern_filtered)))
+      names(ints_allsamps) <- names(repl_pattern_filtered)
+
+      # Check for each sample if multiple peaks exists, if so take the sum
+      labels <- unique(tmplist[, "samplenr"])
+      ints_allsamps[labels] <- as.vector(unlist(lapply(labels, function(x) {
+        sum(as.numeric(tmplist[which(tmplist[, "samplenr"] == x), "height.pkt"]))
+      })))
+
+      # combine all information
+      outpgrlist <- rbind(outpgrlist, c(mzmed_pgrp, fq_best_pgrp, fq_worst_pgrp, nrsamples, 
+					mzmin_pgrp, mzmax_pgrp, ints_allsamps, NA, NA, NA, NA))
+    }
+
+    outlist_copy <- outlist_copy[-which(select_peaks_within_range == TRUE), , drop = FALSE]
+  }
+
+  outpgrlist <- as.data.frame(outpgrlist)
+  colnames(outpgrlist)[1:6] <- c("mzmed.pgrp", "fq.best", "fq.worst", "nrsamples", "mzmin.pgrp", "mzmax.pgrp")
+  colnames(outpgrlist)[(length(repl_pattern_filtered) + 7):ncol(outpgrlist)] <- c("assi_HMDB", "iso_HMDB", 
+										  "HMDB_code", "theormz_HMDB")
+
+  return(outpgrlist)
+}
+
+# scanmodes <- c("positive", "negative")
+
+# for (scanmode in scanmodes) {
+#   # generate peak group lists of the unidentified peaks
+#   unidentified_peaklist <- paste0("SpectrumPeaks_", scanmode, "_Unidentified.RData")
+#   outpgrlist <- grouping_rest(outdir, unidentified_peaklist, scanmode, ppm = ppm)
+#   write.table(outpgrlist, file = paste0("PeakGroupList_", scanmode, "_Unidentified.txt"))
+
+#   # save output in RData format for further processing
+#   save(outpgrlist, file=paste0("PeakGroupList_", scanmode, "_Unidentified.RData"))
+# }
+
+# determine appropriate scanmode based on unidentified_peaklist file
+if (grepl("negative", basename(unidentified_peaklist))) {
+  scanmode <- "negative"
+} else if (grepl("positive", basename(unidentified_peaklist))) {
+  scanmode <- "positive"
+}
+
+# generate peak group lists of the unidentified peaks
+outpgrlist <- grouping_rest(outdir, unidentified_peaklist, scanmode, ppm = ppm)
+
+# determine part number of unidentified_peaklist file
+part_number <- gsub("\\D", "", basename(unidentified_peaklist))
+
+# save output in txt format
+write.table(outpgrlist, file = paste0("PeakGroupList_", scanmode, "_part_", part_number, "_Unidentified.txt"))
+
+# save output in RData format for further processing
+save(outpgrlist, file=paste0("PeakGroupList_", scanmode, "_part_", part_number, "_Unidentified.RData"))
diff --git a/DIMS/UnidentifiedPeakGrouping.nf b/DIMS/UnidentifiedPeakGrouping.nf
new file mode 100644
index 0000000..ce140ec
--- /dev/null
+++ b/DIMS/UnidentifiedPeakGrouping.nf
@@ -0,0 +1,19 @@
+process UnidentifiedPeakGrouping {
+    tag "DIMS UnidentifiedPeakGrouping ${unidentified_spectrumpeaks_files}"
+    label 'UnidentifiedPeakGrouping'
+    container = 'docker://umcugenbioinf/dims:1.3'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    input:
+       path(unidentified_spectrumpeaks_files)
+       each path(replication_pattern)
+
+    output:
+       path('*_Unidentified.txt')
+       path('*_Unidentified.RData'), emit: grouped_unidentified
+
+    script:
+        """
+        Rscript ${baseDir}/CustomModules/DIMS/UnidentifiedPeakGrouping.R $unidentified_spectrumpeaks_files $params.resolution $params.ppm
+        """
+}
diff --git a/DIMS/Utils/RawFiles.nf b/DIMS/Utils/RawFiles.nf
new file mode 100644
index 0000000..8cc0efe
--- /dev/null
+++ b/DIMS/Utils/RawFiles.nf
@@ -0,0 +1,12 @@
+def extractRawfilesFromDir(dir) {
+    // Original code from: https://github.com/SciLifeLab/Sarek - MIT License - Copyright (c) 2016 SciLifeLab
+    dir = dir.tokenize().collect{"$it/*.raw"}
+    Channel
+    .fromPath(dir, type:'file')
+    .ifEmpty { error "No raw files found in ${dir}." }
+    .map { rawfiles_path ->
+        def file_id = rawfiles_path.getSimpleName()
+        [file_id, rawfiles_path]
+    }
+}
+
diff --git a/DIMS/Utils/add_lab_id_and_onderzoeksnummer.R b/DIMS/Utils/add_lab_id_and_onderzoeksnummer.R
new file mode 100644
index 0000000..f2ff13e
--- /dev/null
+++ b/DIMS/Utils/add_lab_id_and_onderzoeksnummer.R
@@ -0,0 +1,16 @@
+add_lab_id_and_onderzoeksnummer <- function(df_metabs_helix) {
+  #' Adding labnummer and Onderzoeksnummer to a dataframe
+  #'
+  #' @param df_metabs_helix: dataframe with patient data to be uploaded to Helix
+  #'
+  #' @return: dataframe with added labnummer and Onderzoeksnummer columns
+
+  # Split patient number into labnummer and Onderzoeksnummer
+  for (row in 1:nrow(df_metabs_helix)) {
+    df_metabs_helix[row, "labnummer"] <- gsub("^P|\\.[0-9]*", "", df_metabs_helix[row, "Patient"])
+    labnummer_split <- strsplit(as.character(df_metabs_helix[row, "labnummer"]), "M")[[1]]
+    df_metabs_helix[row, "Onderzoeksnummer"] <- paste0("MB", labnummer_split[1], "/", labnummer_split[2])
+  }
+  
+  return(df_metabs_helix)
+}
diff --git a/DIMS/Utils/atomic_info.R b/DIMS/Utils/atomic_info.R
new file mode 100644
index 0000000..94d0e83
--- /dev/null
+++ b/DIMS/Utils/atomic_info.R
@@ -0,0 +1,92 @@
+## adapted from globalAssignments.HPC.R
+# relative abundancies from theoretical mass and composition
+# not a function, but a large amount of objects that will become available in memory
+# refactor: include in get_element_info
+# check if all these elements are necessary. Only use snake_make for Hmass -> hydrogen_mass.
+# move source library to higher level
+options(digits = 16)
+
+suppressPackageStartupMessages(library(lattice))
+
+# The following list was copied from Rdisop elements.R and corrected for C, H, O, Cl, S according to NIST
+Ba <- list(name = "Ba", mass = 130, isotope = list(mass = c(-0.093718, 0, -0.094958, 0, -0.095514, -0.094335, -0.095447, -0.094188, -0.094768), abundance = c(0.00106, 0, 0.00101, 0, 0.02417, 0.06592, 0.07854, 0.1123, 0.717)))
+Br <- list(name = "Br", mass = 79, isotope = list(mass = c(-0.0816639, 0, -0.083711), abundance = c(0.5069, 0, 0.4931)))
+C <- list(name = "C", mass = 12, isotope = list(mass = c(0, 0.003354838, 0.003241989), abundance = c(0.9893, 0.0107, 0)))
+Ca <- list(name = "Ca", mass = 40, isotope = list(mass = c(-0.0374094, 0, -0.0413824, -0.0412338, -0.0445194, 0, -0.046311, 0, -0.047467), abundance = c(0.96941, 0, 0.00647, 0.00135, 0.02086, 0, 4e-05, 0, 0.00187)))
+Cl <- list(name = "Cl", mass = 35, isotope = list(mass = c(-0.03114732, 0, -0.03409741), abundance = c(0.7576, 0, 0.2424)))
+Cr <- list(name = "Cr", mass = 50, isotope = list(mass = c(-0.0539536, 0, -0.0594902, -0.0593487, -0.0611175), abundance = c(0.04345, 0, 0.83789, 0.09501, 0.02365)))
+Cu <- list(name = "Cu", mass = 63, isotope = list(mass = c(-0.0704011, 0, -0.0722071), abundance = c(0.6917, 0, 0.3083)))
+F <- list(name = "F", mass = 19, isotope = list(mass = c(-0.00159678), abundance = c(1)))
+Fe <- list(name = "Fe", mass = 54, isotope = list(mass = c(-0.0603873, 0, -0.0650607, -0.0646042, -0.0667227), abundance = c(0.058, 0, 0.9172, 0.022, 0.0028)))
+H <- list(name = "H", mass = 1, isotope = list(mass = c(0.00782503207, 0.014101778, 0.01604928), abundance = c(0.999885, 0.000115, 0)))
+Hg <- list(name = "Hg", mass = 196, isotope = list(mass = c(-0.034193, 0, -0.033257, -0.031746, -0.0317, -0.029723, -0.029383, 0, -0.026533), abundance = c(0.0015, 0, 0.0997, 0.1687, 0.231, 0.1318, 0.2986, 0, 0.0687)))
+I <- list(name = "I", mass = 127, isotope = list(mass = c(-0.095527), abundance = c(1)))
+K <- list(name = "K", mass = 39, isotope = list(mass = c(-0.0362926, -0.0360008, -0.0381746), abundance = c(0.932581, 0.000117, 0.067302)))
+Li <- list(name = "Li", mass = 6, isotope = list(mass = c(0.0151214, 0.016003), abundance = c(0.075, 0.925)))
+Mg <- list(name = "Mg", mass = 24, isotope = list(mass = c(-0.0149577, -0.0141626, -0.0174063), abundance = c(0.7899, 0.1, 0.1101)))
+Mn <- list(name = "Mn", mass = 55, isotope = list(mass = c(-0.0619529), abundance = c(1)))
+N <- list(name = "N", mass = 14, isotope = list(mass = c(0.003074002, 0.00010897), abundance = c(0.99634, 0.00366)))
+Na <- list(name = "Na", mass = 23, isotope = list(mass = c(-0.0102323), abundance = c(1)))
+Ni <- list(name = "Ni", mass = 58, isotope = list(mass = c(-0.0646538, 0, -0.0692116, -0.0689421, -0.0716539, 0, -0.0720321), abundance = c(0.68077, 0, 0.26223, 0.0114, 0.03634, 0, 0.00926)))
+O <- list(name = "O", mass = 16, isotope = list(mass = c(-0.00508538044, -0.0008683, -0.0008397), abundance = c(0.99757, 0.000381, 0.00205)))
+P <- list(name = "P", mass = 31, isotope = list(mass = c(-0.026238), abundance = c(1)))
+S <- list(name = "S", mass = 32, isotope = list(mass = c(-0.027929, -0.02854124, -0.0321331, 0, -0.03291924), abundance = c(0.9499, 0.0075, 0.0425, 0, 1e-04)))
+Se <- list(name = "Se", mass = 74, isotope = list(mass = c(-0.0775254, 0, -0.080788, -0.0800875, -0.0826924, 0, -0.0834804, 0, -0.0833022), abundance = c(0.0089, 0, 0.0936, 0.0763, 0.2378, 0, 0.4961, 0, 0.0873)))
+Si <- list(name = "Si", mass = 28, isotope = list(mass = c(-0.0230729, -0.0235051, -0.0262293), abundance = c(0.9223, 0.0467, 0.031)))
+Sn <- list(name = "Sn", mass = 112, isotope = list(mass = c(-0.095174, 0, -0.097216, -0.096652, -0.098253, -0.097044, -0.098391, -0.09669, -0.0978009, 0, -0.0965596, 0, -0.0947257), abundance = c(0.0097, 0, 0.0065, 0.0034, 0.1453, 0.0768, 0.2423, 0.0859, 0.3259, 0, 0.0463, 0, 0.0579)))
+Zn <- list(name = "Zn", mass = 64, isotope = list(mass = c(-0.0708552, 0, -0.0739653, -0.0728709, -0.0751541, 0, -0.074675), abundance = c(0.486, 0, 0.279, 0.041, 0.188, 0, 0.006)))
+
+# The following list is for our own use
+NH4 <- list(name = "NH4", mass = 18, isotope = list(mass = c(0.03437, 0.03141, -0.95935)), abundance = c(0.995, 0.004, 0.001)) # SISweb: 18.03437 100 19.03141 0.4 19.04065 0.1
+Ac <- list(name = "Ac", mass = 60, isotope = list(mass = c(0.02114, 0.02450, 0.02538)), abundance = c(0.975, 0.021, 0.004)) # SISweb: 60.02114 100 61.02450 2.2 62.02538 0.4
+NaCl <- list(name = "NaCl", mass = 58, isotope = list(mass = c(-0.04137, 0, -0.04433)), abundance = c(0.755, 0, 0.245)) # SISweb: 57.95862 100 59.95567 32.4
+NaCl2 <- list(name = "NaCl2", mass = 116, isotope = list(mass = c(-0.08274, 0, -0.08866)), abundance = c(0.755, 0, 0.245)) # SISweb: 57.95862 100 59.95567 32.4
+NaCl3 <- list(name = "NaCl3", mass = 174, isotope = list(mass = c(-0.12411, 0, -0.13299)), abundance = c(0.755, 0, 0.245)) # SISweb: 57.95862 100 59.95567 32.4
+NaCl4 <- list(name = "NaCl4", mass = 232, isotope = list(mass = c(-0.16548, 0, -0.17732)), abundance = c(0.755, 0, 0.245)) # SISweb: 57.95862 100 59.95567 32.4
+NaCl5 <- list(name = "NaCl5", mass = 290, isotope = list(mass = c(-0.20685, 0, -0.22165)), abundance = c(0.755, 0, 0.245)) # SISweb: 57.95862 100 59.95567 32.4
+For <- list(name = "For", mass = 45, isotope = list(mass = c(-0.00233, 0.00103)), abundance = c(0.989, 0.011)) # SISweb: 46.00549 100 47.00885 1.1 (47.0097 0.1) 48.00973 0.4
+Na2 <- list(name = "2Na-H", mass = 46, isotope = list(mass = c(-1.0282896)), abundance = c(1)) # SISweb for Na2: 45.97954 100 # minus 1 H !
+Met <- list(name = "CH3OH", mass = 32, isotope = list(mass = c(1.034045, 1.037405)), abundance = c(0.989, 0.011)) # SISweb: 32.02622 100 33.02958 1.1 33.0325 0.1 34.03046 0.2
+CH3OH <- list(name = "CH3OH", mass = 32, isotope = list(mass = c(1.034045, 1.037405)), abundance = c(0.989, 0.011)) # SISweb: 32.02622 100 33.02958 1.1 33.0325 0.1 34.03046 0.2
+Na3 <- list(name = "3Na-2H", mass = 69, isotope = list(mass = c(-2.0463469)), abundance = c(1)) # SISweb for Na2: 45.97954 100 # minus 1 H !
+KCl <- list(name = "KCl", mass = 74, isotope = list(mass = c(-0.06744, 0.92961, -0.06744, 0.92772)), abundance = c(0.7047, 0.2283, 0.0507, 0.0162)) # SISweb: 73.93256 100 75.92961 32.4 75.93067 7.2 77.92772 2.3
+H2PO4 <- list(name = "H2PO4", mass = 97, isotope = list(mass = c(-0.03091)), abundance = c(1))
+HSO4 <- list(name = "HSO4", mass = 97, isotope = list(mass = c(-0.04042, 0, -0.04462)), abundance = c(0.96, 0, 0.04))
+Met2 <- list(name = "Met2", mass = 64, isotope = list(mass = c(1.060265, 1.013405)), abundance = c(0.978, 0.022))
+Met3 <- list(name = "Met3", mass = 96, isotope = list(mass = c(1.086485, 1.089845)), abundance = c(0.969, 0.031))
+Met4 <- list(name = "Met4", mass = 128, isotope = list(mass = c(1.112705, 1.116065)), abundance = c(0.959, 0.041))
+Met5 <- list(name = "Met5", mass = 160, isotope = list(mass = c(1.20935, 1.142285)), abundance = c(0.949, 0.051))
+NaminH <- list(name = "Na-H", mass = 21, isotope = list(mass = c(-0.02571416)), abundance = c(1))
+KminH <- list(name = "K-H", mass = 37, isotope = list(mass = c(-0.05194, 0.94617)), abundance = c(0.9328, 0.0672))
+H2O <- list(name = "H2O", mass = -19, isotope = list(mass = c(-0.01894358)), abundance = c(1))
+NaK <- list(name = "NaK-H", mass = 61, isotope = list(mass = c(-0.054345, 0.943765)), abundance = c(0.9328, 0.0672))
+min2H <- list(name = "min2H", mass = -2, isotope = list(mass = c(-0.0151014)), abundance = c(1))
+plus2H <- list(name = "plus2H", mass = 2, isotope = list(mass = c(0.0151014)), abundance = c(1))
+plus2Na <- list(name = "plus2Na", mass = 46, isotope = list(mass = c(-0.02046), abundance = c(1)))
+plusNaH <- list(name = "plusNaH", mass = 24, isotope = list(mass = c(-0.00295588), abundance = c(1)))
+plusKH <- list(name = "plusKH", mass = 40, isotope = list(mass = c(-0.029008, 0.969101)), abundance = c(0.9328, 0.0672))
+plusHNH <- list(name = "plusHNH", mass = 19, isotope = list(mass = c(0.04164642)), abundance = c(1))
+min3H <- list(name = "min3H", mass = -3, isotope = list(mass = c(-0.02182926)), abundance = c(1))
+plus3H <- list(name = "plus3H", mass = 3, isotope = list(mass = c(0.02182926)), abundance = c(1))
+plus3Na <- list(name = "plus3Na", mass = 68, isotope = list(mass = c(0.96931)), abundance = c(1))
+plus2NaH <- list(name = "plus2NaH", mass = 47, isotope = list(mass = c(0.2985712)), abundance = c(1))
+plusNa2H <- list(name = "plusNa2H", mass = 25, isotope = list(mass = c(0.00432284)), abundance = c(1))
+
+# add first isotope of Cl because of high abundance
+Cl37 <- list(name = "Cl37", mass = 37, isotope = list(mass = c(-0.03409741)), abundance = c(1))
+
+all_elements <- list(Ba, Br, C, Ca, Cl, Cr, Cu, F, Fe, H, Hg, I, K, Li, Mg, Mn, N, Na, Ni, O, P, S, Se, Si, Sn, Zn)
+all_adducts <- list(Ba, Br, Ca, Cl, Cl37, Cr, Cu, Fe, Hg, I, K, Li, Mg, Mn, Na, Ni, Se, Si, Sn, Zn, 
+                    NH4, Ac, NaCl, For, Na2, CH3OH, NaCl2, NaCl3, NaCl4, NaCl5, Na3, KCl, H2PO4, HSO4,
+                    Met2, Met3, Met4, Met5, NaminH, KminH, H2O, NaK, min2H, plus2H, plus2Na, plusNaH,
+                    plusKH, min3H, plus3H, plusHNH, plus3Na, plus2NaH, plusNa2H)
+
+atoms_inuse <- c("P", "O", "N", "C", "H", "S", "Cl", "D", "13C", "34S", "18O", "37Cl")
+atomic_weights <- c(30.97376163, 15.99491463, 14.0030740052, 12.0000, 1.0078250321, 31.9720707, 34.968852721, 2.0141017778, 13.0033548378, 33.96786690, 17.9991610, 36.96590259)
+electron <- 0.00054858
+
+hydrogen_mass <- Hmass <- H$mass + H$isotope$mass[1]
+Dmass <- H$mass + 1 + H$isotope$mass[2]
+Tmass <- H$mass + 2 + H$isotope$mass[3]
+C13mass <- C$mass + 1 + C$isotope$mass[2]
+N15mass <- N$mass + 1 + N$isotope$mass[2]
diff --git a/DIMS/Utils/calculate_zscores.R b/DIMS/Utils/calculate_zscores.R
new file mode 100644
index 0000000..d687376
--- /dev/null
+++ b/DIMS/Utils/calculate_zscores.R
@@ -0,0 +1,63 @@
+## adapted from statistics_z.R
+# refactor: change column names from avg.ctrls to avg_ctrls, sd.ctrls to sd_ctrls
+# check logic of parameter adducts
+calculate_zscores <- function(peakgroup_list, adducts) {
+  #' Calculate Z-scores for peak groups based on average and standard deviation of controls
+  #'
+  #' @param peakgroup_list: Peak group list (matrix)
+  #' @param sort_col: Column to sort on (string)
+  #' @param adducts: Parameter indicating whether there are adducts in the list (boolean)
+  #'
+  #' @return peakgroup_list_dedup: de-duplicated peak group list (matrix)
+
+  case_label <- "P"
+  control_label <- "C"
+  # get index for new column names
+  startcol <- ncol(peakgroup_list) + 3
+
+  # calculate mean and standard deviation for Control group
+  ctrl_cols <- grep(control_label, colnames(peakgroup_list), fixed = TRUE)
+  case_cols <- grep(case_label, colnames(peakgroup_list), fixed = TRUE)
+  int_cols <- c(ctrl_cols, case_cols)
+  # set al zeros to NA
+  peakgroup_list[, int_cols][peakgroup_list[, int_cols] == 0] <- NA
+  ctrl_ints <- peakgroup_list[, ctrl_cols, drop = FALSE]
+  peakgroup_list$avg.ctrls <- apply(ctrl_ints, 1, function(x) mean(as.numeric(x), na.rm = TRUE))
+  peakgroup_list$sd.ctrls <- apply(ctrl_ints, 1, function(x) sd(as.numeric(x), na.rm = TRUE))
+
+  # set new column names and calculate Z-scores
+  colnames_zscores <- NULL
+  for (col_index in int_cols) {
+    col_name <- colnames(peakgroup_list)[col_index]
+    colnames_zscores <- c(colnames_zscores, paste0(col_name, "_Zscore"))
+    zscores_1col <- (as.numeric(as.vector(unlist(peakgroup_list[, col_index]))) -
+                     peakgroup_list$avg.ctrls) / peakgroup_list$sd.ctrls
+    peakgroup_list <- cbind(peakgroup_list, zscores_1col)
+  }
+
+  # apply new column names to columns at end plus avg and sd columns
+  colnames(peakgroup_list)[startcol:ncol(peakgroup_list)] <- colnames_zscores
+
+  # add ppm deviation column
+  zscore_cols <- grep("Zscore", colnames(peakgroup_list), fixed = TRUE)
+  if (!adducts) {
+    if ((dim(peakgroup_list[, zscore_cols])[2] + 6) != (startcol - 1)) {
+      ppmdev <- array(1:nrow(peakgroup_list), dim = c(nrow(peakgroup_list)))
+      # calculate ppm deviation
+      for (i in 1:nrow(peakgroup_list)) {
+        if (!is.na(peakgroup_list$theormz_HMDB[i]) && 
+            !is.null(peakgroup_list$theormz_HMDB[i]) &&
+            (peakgroup_list$theormz_HMDB[i] != "")) {
+          ppmdev[i] <- 10^6 * (as.numeric(as.vector(peakgroup_list$mzmed.pgrp[i])) -
+                               as.numeric(as.vector(peakgroup_list$theormz_HMDB[i]))) /
+                               as.numeric(as.vector(peakgroup_list$theormz_HMDB[i]))
+        } else {
+          ppmdev[i] <- NA
+        }
+      }
+      peakgroup_list <- cbind(peakgroup_list[, 1:6], ppmdev = ppmdev, peakgroup_list[, 7:ncol(peakgroup_list)])
+    }
+  }
+
+  return(peakgroup_list)
+}
diff --git a/DIMS/Utils/check_overlap.R b/DIMS/Utils/check_overlap.R
new file mode 100644
index 0000000..66492e8
--- /dev/null
+++ b/DIMS/Utils/check_overlap.R
@@ -0,0 +1,24 @@
+## adapted from checkOverlap.R
+check_overlap <- function(range1, range2) {
+  #' Modify range1 and range2 in case of overlap
+  #'
+  #' @param range1: Vector of m/z values for first peak (float)
+  #' @param range2: Vector of m/z values for second peak (float)
+  #'
+  #' @return new_ranges: list of two ranges (list)
+  
+  # Check for overlap
+  if (length(intersect(range1, range2)) == 2) {
+    if (length(range1) >= length(range2)) {
+      range1 <- range1[-length(range1)]  
+    } else {
+      range2 <- range2[-1]
+    }
+  } else if (length(intersect(range1, range2)) == 3) {
+    range1 <- range1[-length(range1)]  
+    range2 <- range2[-1]
+  }
+  new_ranges <- list("range1" = range1, "range2" = range2)
+  return(new_ranges)
+}
+
diff --git a/DIMS/Utils/check_same_samplename.R b/DIMS/Utils/check_same_samplename.R
new file mode 100644
index 0000000..6e80841
--- /dev/null
+++ b/DIMS/Utils/check_same_samplename.R
@@ -0,0 +1,10 @@
+check_same_samplename <- function(int_col_name, zscore_col_name) {
+  #' A check to see if intensity and Z-score columns match
+  #'
+  #' @param int_col_name: name of an intensity column (string)
+  #' @param zscore_col_name: name of a Z-score column (string)
+  #'
+  #' @return: match or mismatch of the columns (boolean)
+
+  paste0(int_col_name, "_Zscore") == zscore_col_name
+}
diff --git a/DIMS/Utils/create_violin_plots.R b/DIMS/Utils/create_violin_plots.R
new file mode 100644
index 0000000..be4f1f6
--- /dev/null
+++ b/DIMS/Utils/create_violin_plots.R
@@ -0,0 +1,122 @@
+# remove parameter default value
+# add explanation variable to parameters
+create_violin_plots <- function(pdf_dir, pt_name, metab_perpage, top_metab_pt = NULL) {
+  #' Create violin plots for each patient
+  #'
+  #' @param pdf_dir: location where to save the pdf file (string)
+  #' @param pt_name: patient code (string)
+  #' @param metab_perpage: list of dataframe with a dataframe for each page in the violinplot pdf (list)
+  #' @param top_metab_pt: dataframe with increased and decrease metabolites for this patient (dataframe)
+
+  # set parameters for plots
+  plot_height <- 9.6
+  plot_width <- 6
+  fontsize <- 1
+  circlesize <- 0.8
+  colors_4plot <- c("#22E4AC", "#00B0F0", "#504FFF", "#A704FD", "#F36265", "#DA0641")
+  #                   green     blue      blue/purple purple    orange     red
+
+  # patient plots, create the PDF device
+  pt_name_sub <- pt_name
+  suffix <- ""
+  if (grepl("Diagnostics", pdf_dir) & is_diagnostic_patient(pt_name)) {
+    prefix <- "MB"
+    suffix <- "_DIMS_PL_DIAG"
+    # substitute P and M in P2020M00001 into right format for Helix
+    pt_name_sub <- gsub("[PM]", "", pt_name)
+    pt_name_sub <- gsub("\\..*", "", pt_name_sub)
+  } else if (grepl("Diagnostics", pdf_dir)) {
+    prefix <- "Dx_"
+  } else if (grepl("IEM", pdf_dir)) {
+    prefix <- "IEM_"
+  } else {
+    prefix <- "R_"
+  }
+
+  pdf(paste0(pdf_dir, "/", prefix, pt_name_sub, suffix, ".pdf"),
+      onefile = TRUE,
+      width = plot_width,
+      height = plot_height)
+
+  # page headers:
+  page_headers <- names(metab_perpage)
+
+  # put table into PDF file, if not empty
+  if (!is.null(dim(top_metab_pt))) {
+    plot.new()
+    # get the names and numbers in the table aligned
+    table_theme <- ttheme_default(core = list(fg_params = list(hjust = 0, x = 0.05, fontsize = 6)),
+                                  colhead = list(fg_params = list(fontsize = 8, fontface = "bold")))
+    grid.table(top_metab_pt, theme = table_theme, rows = NULL)
+    # g <- tableGrob(top_metab_pt)
+    # grid.draw(g)
+    text(x = 0.45, y = 1.02, paste0("Top deviating metabolites for patient: ", pt_name), font = 1, cex = 1)
+  }
+
+  # violin plots
+  for (page_index in 1:length(metab_perpage)) {
+    # extract list of metabolites to plot on a page
+    metab_list_2plot <- metab_perpage[[page_index]]
+    # extract original data for patient of interest (pt_name) before cut-offs
+    pt_list_2plot_orig <- metab_list_2plot[which(metab_list_2plot$variable == pt_name), ]
+    # cut off Z-scores higher than 20 or lower than -5 (for nicer plots)
+    metab_list_2plot$value[metab_list_2plot$value >  20] <-  20
+    metab_list_2plot$value[metab_list_2plot$value <  -5] <-  -5
+    # extract data for patient of interest (pt_name)
+    pt_list_2plot <- metab_list_2plot[which(metab_list_2plot$variable == pt_name), ]
+    # restore original Z-score before cut-off, for showing Z-scores in PDF
+    pt_list_2plot$value_orig <- pt_list_2plot_orig$value
+    # remove patient of interest (pt_name) from list; violins will be made up of controls and other patients
+    metab_list_2plot <- metab_list_2plot[-which(metab_list_2plot$variable == pt_name), ]
+    # subtitle per page
+    sub_perpage <- gsub("_", " ", page_headers[page_index])
+    # for IEM plots, put subtitle on two lines
+    sub_perpage <- gsub("probability", "\nprobability", sub_perpage)
+    # add size parameter for showing Z-score of patient per metabolite
+    z_size <- rep(3, nrow(pt_list_2plot))
+    # set size to 0 if row is empty
+    z_size[is.na(pt_list_2plot$value)] <- 0
+
+    # draw violin plot. shape=22 gives square for patient of interest
+    ggplot_object <- ggplot(metab_list_2plot, aes(x = value, y = HMDB_name)) +
+      theme(axis.text.y = element_text(size = rel(fontsize)), plot.caption = element_text(size = rel(fontsize))) +
+      xlim(-5, 20) +
+      geom_violin(scale = "width") +
+      geom_point(data = pt_list_2plot, aes(color = value), size = 3.5 * circlesize, shape = 22, fill = "white") +
+      scale_fill_gradientn(
+        colors = colors_4plot, values = NULL, space = "Lab", na.value = "grey50", guide = "colourbar",
+        aesthetics = "colour"
+      ) +
+      # add Z-score value for patient of interest at x=16
+      geom_text(
+        data = pt_list_2plot, aes(16, label = paste0("Z=", round(value_orig, 2))), hjust = "left", vjust = +0.2,
+        size = z_size
+      ) +
+      # add labels. Use font Courier to get all the plots in the same location.
+      labs(x = "Z-scores", y = "Metabolites", subtitle = sub_perpage, color = "z-score") +
+      theme(axis.text.y = element_text(family = "Courier", size = 6)) +
+      # do not show legend
+      theme(legend.position = "none") +
+      # add title
+      ggtitle(label = paste0("Results for patient ", pt_name)) +
+      # add vertical lines
+      geom_vline(xintercept = 2, col = "grey", lwd = 0.5, lty = 2) +
+      geom_vline(xintercept = -2, col = "grey", lwd = 0.5, lty = 2)
+
+    suppressWarnings(print(ggplot_object))
+
+  }
+
+  # add explanation of violin plots, version number etc.
+  plot(NA, xlim = c(0, 5), ylim = c(0, 5), bty = "n", xaxt = "n", yaxt = "n", xlab = "", ylab = "")
+  if (length(explanation) > 0) {
+    text(0.2, 5, explanation[1], pos = 4, cex = 0.8)
+    for (line_index in 2:length(explanation)) {
+      text_y_position <- 5 - (line_index * 0.2)
+      text(-0.2, text_y_position, explanation[line_index], pos = 4, cex = 0.5)
+    }
+  }
+
+  # close the PDF file
+  dev.off()
+}
diff --git a/DIMS/Utils/do_peakfinding.R b/DIMS/Utils/do_peakfinding.R
new file mode 100644
index 0000000..fff6afe
--- /dev/null
+++ b/DIMS/Utils/do_peakfinding.R
@@ -0,0 +1,55 @@
+## adapted from findpeaks.Gauss.HPC.R
+# NB: this function will be taken up into PeakFinding.R
+# variables with fixed values will be removed from function parameters
+# int_factor, scale, outdir, plot, thresh, width, height
+do_peakfinding <- function(sample_avgtechrepl, int_factor, scale, resol, outdir, scanmode, plot, thresh, width, height) {
+  #' start peak finding
+  #'
+  #' @param sample_avgtechrepl: Dataframe with binned intensities averaged over technical replicates for a sample
+  #' @param int_factor: Value used to calculate area under Gaussian curve (integer)
+  #' @param scale: Initial value used to estimate scaling parameter (integer)
+  #' @param resol: Value for resolution (integer)
+  #' @param outdir: Path for output directory (string)
+  #' @param scanmode: Scan mode, positive or negative (string)
+  #' @param plot: Parameter indicating whether plots should be made (boolean)
+  #' @param thresh: Value for noise level threshold (integer)
+  #' @param width: Value for width of plot (integer)
+  #' @param height: Value for height of plot (integer)
+  #'
+  #' @return save output to file
+
+  sample_name <- colnames(sample_avgtechrepl)[1]
+
+  # turn dataframe with intensities into a named list
+  ints_fullrange <- as.vector(sample_avgtechrepl)
+  names(ints_fullrange) <- rownames(sample_avgtechrepl)
+
+  # initialise list to store results for all peaks
+  allpeaks_values <- list("mean" = NULL, "area" = NULL, "nr" = NULL,
+                          "min" = NULL, "max" = NULL, "qual" = NULL, "spikes" = 0)
+
+  # look for m/z range for all peaks
+  allpeaks_values <- search_mzrange(ints_fullrange, allpeaks_values, int_factor, scale, resol,
+                                    outdir, sample_name, scanmode,
+                                    plot, width, height, thresh)
+
+  # turn the list into a dataframe
+  outlist_persample <- NULL
+  outlist_persample <- cbind("samplenr" = allpeaks_values$nr,
+                             "mzmed.pkt" = allpeaks_values$mean,
+                             "fq" = allpeaks_values$qual,
+                             "mzmin.pkt" = allpeaks_values$min,
+                             "mzmax.pkt" = allpeaks_values$max,
+                             "height.pkt" = allpeaks_values$area)
+
+  # remove peaks with height = 0
+  outlist_persample <- outlist_persample[outlist_persample[, "height.pkt"] != 0, ]
+
+  # save output to file
+  save(outlist_persample, file = paste0(sample_name, "_", scanmode, ".RData"))
+
+  # generate text output to log file on number of spikes for this sample
+  # spikes are peaks that are too narrow, e.g. 1 data point
+  cat(paste("There were", allpeaks_values$spikes, "spikes"))
+}
+
diff --git a/DIMS/Utils/estimate_area.R b/DIMS/Utils/estimate_area.R
new file mode 100644
index 0000000..b191f55
--- /dev/null
+++ b/DIMS/Utils/estimate_area.R
@@ -0,0 +1,30 @@
+## adapted from getArea.R
+# variables with fixed values will be removed from function parameters
+# int_factor
+estimate_area <- function(mass_max, resol, scale, sigma, int_factor) {
+  #' Estimate area of Gaussian curve
+  #'
+  #' @param mass_max: Value for m/z at maximum intensity of a peak (float)
+  #' @param resol: Value for resolution (integer)
+  #' @param scale: Value for peak width (float)
+  #' @param sigma: Value for standard deviation (float)
+  #' @param int_factor: Value used to calculate area under Gaussian curve (integer)
+  #'
+  #' @return area_curve: Value for area under the Gaussian curve (float)
+
+  # avoid vectors that are too big (cannot allocate vector of size ...)
+  if (mass_max > 1200) return(0)
+
+  # generate a mass_vector with equally spaced m/z values
+  fwhm <- get_fwhm(mass_max, resol)
+  mz_min <- mass_max - 2 * fwhm
+  mz_max <- mass_max + 2 * fwhm
+  mz_range <- mz_max - mz_min 
+  mass_vector2 <- seq(mz_min, mz_max, length = mz_range * int_factor)
+
+  # estimate area under the curve
+  area_curve <- sum(scale * dnorm(mass_vector2, mass_max, sigma)) / 100
+
+  return(area_curve)  
+}
+
diff --git a/DIMS/Utils/fit_gaussian.R b/DIMS/Utils/fit_gaussian.R
new file mode 100644
index 0000000..6be9660
--- /dev/null
+++ b/DIMS/Utils/fit_gaussian.R
@@ -0,0 +1,319 @@
+## adapted from fitGaussian.R
+# variables with fixed values will be removed from function parameters
+# scale, outdir, plot, width, height
+# max_index doesn't need to be passed to this function, can be determined here.
+# remove plot sections (commented out)
+# several functions need to be loaded before this function can run
+fit_gaussian <- function(mass_vector2, mass_vector, int_vector, max_index, scale, resol,
+                         outdir, force, use_bounds, plot, scanmode,
+                         int_factor, width, height) {
+  #' Fit 1, 2, 3 or 4 Gaussian peaks in small region of m/z
+  #'
+  #' @param mass_vector2: Vector of equally spaced m/z values (float)
+  #' @param mass_vector: Vector of m/z values for a region of interest (float)
+  #' @param int_vector: Value used to calculate area under Gaussian curve (integer)
+  #' @param max_index: Index in int_vector with the highest intensity (integer)
+  #' @param scale: Initial value used to estimate scaling parameter (integer)
+  #' @param resol: Value for resolution (integer)
+  #' @param outdir: Path for output directory (string)
+  #' @param force: Number of local maxima in int_vector (integer)
+  #' @param use_bounds: Boolean to indicate whether boundaries are to be used
+  #' @param plot: Parameter indicating whether plots should be made (boolean)
+  #' @param scanmode: Scan mode, positive or negative (string)
+  #' @param int_factor: Value used to calculate area under Gaussian curve (integer)
+  #' @param width: Value for width of plot (integer)
+  #' @param height: Value for height of plot (integer)
+  #'
+  #' @return roi_value_list: list of fit values for region of interest (list)
+
+  # Initialise
+  peak_mean <- NULL
+  peak_area <- NULL
+  peak_qual <- NULL
+  peak_min <- NULL
+  peak_max <- NULL
+  fit_quality1 <- 0.15
+  fit_quality <- 0.2
+
+  # One local maximum:
+  if (force == 1) {
+    # determine fit values for 1 Gaussian peak (mean, scale, sigma, qual)
+    fit_values <- fit_1peak(mass_vector2, mass_vector, int_vector, max_index, scale, resol,
+                            plot, fit_quality1, use_bounds)
+    # set initial value for scale factor
+    scale <- 2
+    # test if the mean is outside the m/z range
+    if (fit_values$mean[1] < mass_vector[1] || fit_values$mean[1] > mass_vector[length(mass_vector)]) {
+      # run this function again with fixed boundaries
+      return(fit_gaussian(mass_vector2, mass_vector, int_vector, max_index, scale, resol,
+                          outdir, force = 1, use_bounds = TRUE, plot, scanmode, int_factor, width, height))
+    } else {
+      # test if the fit is bad
+      if (fit_values$qual > fit_quality1) {
+        # Try to fit two curves; find two local maxima
+        new_index <- which(diff(sign(diff(int_vector))) == -2) + 1
+        # test if there are two indices in new_index
+        if (length(new_index) != 2) {
+          new_index <- round(length(mass_vector) / 3)
+          new_index <- c(new_index, 2 * new_index)
+        }
+        # run this function again with two local maxima
+        return(fit_gaussian(mass_vector2, mass_vector, int_vector, new_index,
+                            scale, resol, outdir, force = 2, use_bounds = FALSE,
+                            plot, scanmode, int_factor, width, height))
+        # good fit
+      } else {
+        peak_mean <- c(peak_mean, fit_values$mean)
+        peak_area <- c(peak_area, estimate_area(fit_values$mean, resol, fit_values$scale,
+                                                fit_values$sigma, int_factor))
+        peak_qual <- fit_values$qual
+        peak_min <- mass_vector[1]
+        peak_max <- mass_vector[length(mass_vector)]
+      }
+    }
+
+  #### Two local maxima; need at least 6 data points for this ####
+  } else if (force == 2 && (length(mass_vector) > 6)) {
+    # determine fit values for 2 Gaussian peaks (mean, scale, sigma, qual)
+    fit_values <- fit_2peaks(mass_vector2, mass_vector, int_vector, max_index, scale, resol,
+                             use_bounds, plot, fit_quality, int_factor)
+    # test if one of the means is outside the m/z range
+    if (fit_values$mean[1] < mass_vector[1] || fit_values$mean[1] > mass_vector[length(mass_vector)] ||
+        fit_values$mean[2] < mass_vector[1] || fit_values$mean[2] > mass_vector[length(mass_vector)]) {
+      # check if fit quality is bad
+      if (fit_values$qual > fit_quality) {
+        # run this function again with fixed boundaries
+        return(fit_gaussian(mass_vector2, mass_vector, int_vector, max_index, scale, resol,
+                            outdir, force = 2, use_bounds = TRUE,
+                            plot, scanmode, int_factor, width, height))
+      } else {
+        # check which mean is outside range and remove it from the list of means
+        # NB: peak_mean and other variables have not been given values from 2-peak fit yet!
+        for (i in 1:length(fit_values$mean)){
+          if (fit_values$mean[i] < mass_vector[1] || fit_values$mean[i] > mass_vector[length(mass_vector)]) {
+            peak_mean <- c(peak_mean, -i)
+            peak_area <- c(peak_area, -i)
+          } else {
+            peak_mean <- c(peak_mean, fit_values$mean[i])
+            peak_area <- c(peak_area, fit_values$area[i])
+          }
+        }
+        peak_qual <- fit_values$qual
+        peak_min <- mass_vector[1]
+        peak_max <- mass_vector[length(mass_vector)]
+      }
+      # if all means are within range
+    } else {
+      # check for bad fit
+      if (fit_values$qual > fit_quality) {
+        # Try to fit three curves; find three local maxima
+        new_index <- which(diff(sign(diff(int_vector))) == -2) + 1
+        # test if there are three indices in new_index
+        if (length(new_index) != 3) {
+          new_index <- round(length(mass_vector) / 4)
+          new_index <- c(new_index, 2 * new_index, 3 * new_index)
+        }
+        # run this function again with three local maxima
+        return(fit_gaussian(mass_vector2, mass_vector, int_vector, new_index,
+                            scale, resol, outdir, force = 3, use_bounds = FALSE,
+                            plot, scanmode, int_factor, width, height))
+        # good fit, all means are within m/z range
+      } else {
+        # check if means are within 3 ppm and sum if so
+        tmp <- fit_values$qual
+        nr_means_new <- -1
+        nr_means <- length(fit_values$mean)
+        while (nr_means != nr_means_new) {
+          nr_means <- length(fit_values$mean)
+          fit_values <- within_ppm(fit_values$mean, fit_values$scale, fit_values$sigma, fit_values$area, 
+                                     mass_vector2, mass_vector, ppm = 4, resol, plot)
+          nr_means_new <- length(fit_values$mean)
+        }
+        # restore original quality score
+        fit_values$qual <- tmp
+
+        for (i in 1:length(fit_values$mean)){
+          peak_mean <- c(peak_mean, fit_values$mean[i])
+          peak_area <- c(peak_area, fit_values$area[i])
+        }
+        peak_qual <- fit_values$qual
+        peak_min <- mass_vector[1]
+        peak_max <- mass_vector[length(mass_vector)]
+      }
+    }
+
+    # Three local maxima; need at least 6 data points for this 
+  } else if (force == 3 && (length(mass_vector) > 6)) {
+    # determine fit values for 3 Gaussian peaks (mean, scale, sigma, qual)
+    fit_values <- fit_3peaks(mass_vector2, mass_vector, int_vector, max_index, scale, resol,
+                             use_bounds, plot, fit_quality, int_factor)
+    # test if one of the means is outside the m/z range
+    if (fit_values$mean[1] < mass_vector[1] || fit_values$mean[1] > mass_vector[length(mass_vector)] ||
+        fit_values$mean[2] < mass_vector[1] || fit_values$mean[2] > mass_vector[length(mass_vector)] ||
+        fit_values$mean[3] < mass_vector[1] || fit_values$mean[3] > mass_vector[length(mass_vector)]) {
+      # check if fit quality is bad
+      if (fit_values$qual > fit_quality) {
+        # run this function again with fixed boundaries
+        return(fit_gaussian(mass_vector2, mass_vector, int_vector, max_index, scale, resol,
+                            outdir, force, use_bounds = TRUE,
+                            plot, scanmode, int_factor, width, height))
+      } else {
+        # check which mean is outside range and remove it from the list of means
+        # NB: peak_mean and other variables have not been given values from 2-peak fit yet!
+        for (i in 1:length(fit_values$mean)) {
+          if (fit_values$mean[i] < mass_vector[1] || fit_values$mean[i] > mass_vector[length(mass_vector)]) {
+            peak_mean <- c(peak_mean, -i)
+            peak_area <- c(peak_area, -i)
+          } else {
+            peak_mean <- c(peak_mean, fit_values$mean[i])
+            peak_area <- c(peak_area, fit_values$area[i])
+          }
+        }
+        peak_qual <- fit_values$qual
+        peak_min <- mass_vector[1]
+        peak_max <- mass_vector[length(mass_vector)]
+      }
+      # if all means are within range
+    } else {
+      # check for bad fit
+      if (fit_values$qual > fit_quality) {
+        # Try to fit four curves; find four local maxima
+        new_index <- which(diff(sign(diff(int_vector))) == -2) + 1
+        # test if there are four indices in new_index
+        if (length(new_index) != 4) {
+          new_index <- round(length(mass_vector) / 5)
+          new_index <- c(new_index, 2 * new_index, 3 * new_index, 4 * new_index)
+        }
+        # run this function again with four local maxima
+        return(fit_gaussian(mass_vector2, mass_vector, int_vector, new_index, scale, resol,
+                            outdir, force = 4, use_bounds = FALSE, plot, scanmode,
+                            int_factor, width, height))
+        # good fit, all means are within m/z range
+      } else {
+        # check if means are within 4 ppm and sum if so
+        tmp <- fit_values$qual
+        nr_means_new <- -1
+        nr_means <- length(fit_values$mean)
+        while (nr_means != nr_means_new) {
+          nr_means <- length(fit_values$mean)
+          fit_values <- within_ppm(fit_values$mean, fit_values$scale, fit_values$sigma, fit_values$area, 
+                                   mass_vector2, mass_vector, ppm = 4, resol, plot)
+          nr_means_new <- length(fit_values$mean)
+        }
+        # restore original quality score
+        fit_values$qual <- tmp
+
+        for (i in 1:length(fit_values$mean)){
+          peak_mean <- c(peak_mean, fit_values$mean[i])
+          peak_area <- c(peak_area, fit_values$area[i])
+        }
+        peak_qual <- fit_values$qual
+        peak_min <- mass_vector[1]
+        peak_max <- mass_vector[length(mass_vector)]
+
+      }
+    }
+
+    #### Four local maxima; need at least 6 data points for this ####  
+  } else if (force == 4 && (length(mass_vector) > 6)) {
+    # determine fit values for 4 Gaussian peaks (mean, scale, sigma, qual)
+    fit_values <- fit_4peaks(mass_vector2, mass_vector, int_vector, max_index, scale, resol,
+                             use_bounds, plot, fit_quality, int_factor)
+    # test if one of the means is outside the m/z range
+    if (fit_values$mean[1] < mass_vector[1] || fit_values$mean[1] > mass_vector[length(mass_vector)] ||
+        fit_values$mean[2] < mass_vector[1] || fit_values$mean[2] > mass_vector[length(mass_vector)] ||
+        fit_values$mean[3] < mass_vector[1] || fit_values$mean[3] > mass_vector[length(mass_vector)] ||
+        fit_values$mean[4] < mass_vector[1] || fit_values$mean[4] > mass_vector[length(mass_vector)]) {
+      # check if quality of fit is bad
+      if (fit_values$qual > fit_quality) {
+        # run this function again with fixed boundaries
+        return(fit_gaussian(mass_vector2, mass_vector, int_vector, max_index, scale, resol,
+                            outdir, force, use_bounds = TRUE,
+                            plot, scanmode, int_factor, width, height))
+
+      } else {
+        # check which mean is outside range and remove it from the list of means
+        # NB: peak_mean and other variables have not been given values from 2-peak fit yet!
+        for (i in 1:length(fit_values$mean)) {
+          if (fit_values$mean[i] < mass_vector[1] | fit_values$mean[i] > mass_vector[length(mass_vector)]) {
+            peak_mean <- c(peak_mean, -i)
+            peak_area <- c(peak_area, -i)
+          } else {
+            peak_mean <- c(peak_mean, fit_values$mean[i])
+            peak_area <- c(peak_area, fit_values$area[i])
+          }
+        }
+        peak_qual <- fit_values$qual
+        peak_min <- mass_vector[1]
+        peak_max <- mass_vector[length(mass_vector)]
+      }
+      # if all means are within range
+    } else {
+      # check for bad fit
+      if (fit_values$qual > fit_quality) {
+        # Try to fit 1 curve, force = 5
+        return(fit_gaussian(mass_vector2, mass_vector, int_vector, max_index, scale, resol,
+                            outdir, force = 5, use_bounds = FALSE,
+                            plot, scanmode, int_factor, width, height))
+        # good fit, all means are within m/z range
+      } else {
+        # check if means are within 4 ppm and sum if so
+        tmp <- fit_values$qual 
+        nr_means_new <- -1
+        nr_means <- length(fit_values$mean)
+        while (nr_means != nr_means_new) {
+          nr_means <- length(fit_values$mean)
+          fit_values <- within_ppm(fit_values$mean, fit_values$scale, fit_values$sigma, fit_values$area, 
+                                   mass_vector2, mass_vector, ppm = 4, resol, plot)
+          nr_means_new <- length(fit_values$mean)
+        }
+        # restore original quality score
+        fit_values$qual <- tmp
+
+        for (i in 1:length(fit_values$mean)){
+          peak_mean <- c(peak_mean, fit_values$mean[i])
+          peak_area <- c(peak_area, fit_values$area[i])
+        }
+        peak_qual <- fit_values$qual
+        peak_min <- mass_vector[1]
+        peak_max <- mass_vector[length(mass_vector)]
+
+      }
+    }
+
+    # More than four local maxima; fit 1 peak.
+  } else {
+    scale <- 2
+    fit_quality1 <- 0.40
+    use_bounds <- TRUE
+    max_index <- which(int_vector == max(int_vector))
+    fit_values <- fit_1peak(mass_vector2, mass_vector, int_vector, max_index, scale, resol,
+                            plot, fit_quality1, use_bounds)
+    # check for bad fit
+    if (fit_values$qual > fit_quality1) {
+      # remove
+      if (plot) dev.off()
+      # get fit values from fit_optim
+      fit_values <- fit_optim(mass_vector, int_vector, resol, plot, scanmode, int_factor, width, height)
+      peak_mean <- c(peak_mean, fit_values$mean)
+      peak_area <- c(peak_area, fit_values$area)
+      peak_min <- fit_values$min
+      peak_max <- fit_values$max
+      peak_qual <- 0
+    } else {
+      peak_mean <- c(peak_mean, fit_values$mean)
+      peak_area <- c(peak_area, estimate_area(fit_values$mean, resol, fit_values$scale, fit_values$sigma, int_factor))
+      peak_qual <- fit_values$qual
+      peak_min <- mass_vector[1]
+      peak_max <- mass_vector[length(mass_vector)]
+    }
+  }
+
+  # put all values for this region of interest into a list
+  roi_value_list <- list("mean" = peak_mean,
+                         "area" = peak_area,
+                         "qual" = peak_qual,
+                         "min" = peak_min,
+                         "max" = peak_max)
+  return(roi_value_list)
+}
diff --git a/DIMS/Utils/fit_gaussians.R b/DIMS/Utils/fit_gaussians.R
new file mode 100644
index 0000000..05bff86
--- /dev/null
+++ b/DIMS/Utils/fit_gaussians.R
@@ -0,0 +1,196 @@
+# Gaussian fit functions
+## adapted from fitG.R, fit2G.R, fit3G.R and fit4G.R (combined)
+fit_1gaussian <- function(mass_vector, int_vector, sigma, query_mass, scale, use_bounds) {
+  #' Fit a Gaussian curve for a peak with given parameters
+  #'
+  #' @param mass_vector: Vector of masses (float)
+  #' @param int_vector: Vector of intensities (float)
+  #' @param sigma: Value for width of the peak (float)
+  #' @param query_mass: Value for mass at center of peak (float)
+  #' @param scale: Value for scaling intensities (float)
+  #' @param use_bounds: Boolean to indicate whether boundaries are to be used
+  #'
+  #' @return opt_fit: list of parameters and values describing the optimal fit
+
+  # define optimization function for optim based on normal distribution
+  opt_f <- function(params) {
+    d <- params[2] * dnorm(mass_vector, mean = params[1], sd = sigma)
+    sum((d - int_vector) ^ 2)
+  }
+  if (use_bounds) {
+    # determine lower and upper boundaries
+    lower <- c(mass_vector[1], 0, mass_vector[1], 0)
+    upper <- c(mass_vector[length(mass_vector)], Inf, mass_vector[length(mass_vector)], Inf)
+    # get optimal value for fitted Gaussian curve
+    opt_fit <- optim(c(as.numeric(query_mass), as.numeric(scale)),
+                     opt_f, control = list(maxit = 10000), method = "L-BFGS-B",
+                     lower = lower, upper = upper)
+  } else {
+    opt_fit <- optim(c(as.numeric(query_mass), as.numeric(scale)),
+                     opt_f, control = list(maxit = 10000))
+  }
+  return(opt_fit)
+}
+
+
+fit_2gaussians <- function(mass_vector, int_vector, sigma1, sigma2,
+                           query_mass1, scale1,
+                           query_mass2, scale2, use_bounds) {
+  #' Fit two Gaussian curves for a peak with given parameters
+  #'
+  #' @param mass_vector: Vector of masses (float)
+  #' @param int_vector: Vector of intensities (float)
+  #' @param sigma1: Value for width of the first peak (float)
+  #' @param sigma2: Value for width of the second peak (float)
+  #' @param query_mass1: Value for mass at center of first peak (float)
+  #' @param scale1: Value for scaling intensities for first peak (float)
+  #' @param query_mass2: Value for mass at center of second peak (float)
+  #' @param scale2: Value for scaling intensities for second peak (float)
+  #' @param use_bounds: Boolean to indicate whether boundaries are to be used
+  #'
+  #' @return opt_fit: list of parameters and values describing the optimal fit
+
+  # define optimization function for optim based on normal distribution
+  opt_f <- function(params) {
+    d <- params[2] * dnorm(mass_vector, mean = params[1], sd = sigma1) +
+         params[4] * dnorm(mass_vector, mean = params[3], sd = sigma2)
+    sum((d - int_vector) ^ 2)
+  }
+
+  if (use_bounds) {
+    # determine lower and upper boundaries
+    lower <- c(mass_vector[1], 0, mass_vector[1], 0)
+    upper <- c(mass_vector[length(mass_vector)], Inf, mass_vector[length(mass_vector)], Inf)
+    # get optimal value for 2 fitted Gaussian curves
+    if (is.null(query_mass2) && is.null(scale2) && is.null(sigma2)) {
+      sigma2 <- sigma1
+      opt_fit <- optim(c(as.numeric(query_mass1), as.numeric(scale1),
+                                as.numeric(query_mass1), as.numeric(scale1)),
+                                opt_f, control = list(maxit = 10000),
+                                method = "L-BFGS-B", lower = lower, upper = upper)
+    } else {
+      opt_fit <- optim(c(as.numeric(query_mass1), as.numeric(scale1),
+                                as.numeric(query_mass2), as.numeric(scale2)),
+                                opt_f, control = list(maxit = 10000),
+                                method = "L-BFGS-B", lower = lower, upper = upper)
+    }
+  } else {
+    if (is.null(query_mass2) && is.null(scale2) && is.null(sigma2)) {
+      sigma2 <- sigma1
+      opt_fit <- optim(c(as.numeric(query_mass1), as.numeric(scale1),
+                         as.numeric(query_mass1), as.numeric(scale1)),
+                       opt_f, control = list(maxit = 10000))
+    } else {
+      opt_fit <- optim(c(as.numeric(query_mass1), as.numeric(scale1),
+                         as.numeric(query_mass2), as.numeric(scale2)),
+                       opt_f, control = list(maxit = 10000))
+    }
+  }
+  return(opt_fit)
+}
+
+
+fit_3gaussians <- function(mass_vector, int_vector, sigma1, sigma2, sigma3,
+                           query_mass1, scale1,
+                           query_mass2, scale2,
+                           query_mass3, scale3, use_bounds) {
+  #' Fit three Gaussian curves for a peak with given parameters
+  #'
+  #' @param mass_vector: Vector of masses (float)
+  #' @param int_vector: Vector of intensities (float)
+  #' @param sigma1: Value for width of the first peak (float)
+  #' @param sigma2: Value for width of the second peak (float)
+  #' @param sigma3: Value for width of the third peak (float)
+  #' @param query_mass1: Value for mass at center of first peak (float)
+  #' @param scale1: Value for scaling intensities for first peak (float)
+  #' @param query_mass2: Value for mass at center of second peak (float)
+  #' @param scale2: Value for scaling intensities for second peak (float)
+  #' @param query_mass3: Value for mass at center of third peak (float)
+  #' @param scale3: Value for scaling intensities for third peak (float)
+  #' @param use_bounds: Boolean to indicate whether boundaries are to be used
+  #'
+  #' @return opt_fit: list of parameters and values describing the optimal fit
+
+  # define optimization function for optim based on normal distribution
+  opt_f <- function(params) {
+    d <- params[2] * dnorm(mass_vector, mean = params[1], sd = sigma1) +
+         params[4] * dnorm(mass_vector, mean = params[3], sd = sigma2) +
+         params[6] * dnorm(mass_vector, mean = params[5], sd = sigma3)
+    sum((d - int_vector) ^ 2)
+  }
+
+  if (use_bounds) {
+    # determine lower and upper boundaries
+    lower <- c(mass_vector[1], 0, mass_vector[1], 0, mass_vector[1], 0)
+    upper <- c(mass_vector[length(mass_vector)], Inf, mass_vector[length(mass_vector)], Inf,
+               mass_vector[length(mass_vector)], Inf)
+    # get optimal value for 3 fitted Gaussian curves
+    opt_fit <- optim(c(query_mass1, scale1,
+                       query_mass2, scale2,
+                       query_mass3, scale3),
+                     opt_f, control = list(maxit = 10000),
+                     method = "L-BFGS-B", lower = lower, upper = upper)
+  } else {
+    opt_fit <- optim(c(query_mass1, scale1,
+                       query_mass2, scale2,
+                       query_mass3, scale3),
+                     opt_f, control = list(maxit = 10000))
+  }
+  return(opt_fit)
+}
+
+fit_4gaussians <- function(mass_vector, int_vector, sigma1, sigma2, sigma3, sigma4,
+                           query_mass1, scale1,
+                           query_mass2, scale2,
+                           query_mass3, scale3,
+                           query_mass4, scale4, use_bounds) {
+  #' Fit four Gaussian curves for a peak with given parameters
+  #'
+  #' @param mass_vector: Vector of masses (float)
+  #' @param int_vector: Vector of intensities (float)
+  #' @param sigma1: Value for width of the first peak (float)
+  #' @param sigma2: Value for width of the second peak (float)
+  #' @param sigma3: Value for width of the third peak (float)
+  #' @param sigma4: Value for width of the fourth peak (float)
+  #' @param query_mass1: Value for mass at center of first peak (float)
+  #' @param scale1: Value for scaling intensities for first peak (float)
+  #' @param query_mass2: Value for mass at center of second peak (float)
+  #' @param scale2: Value for scaling intensities for second peak (float)
+  #' @param query_mass3: Value for mass at center of third peak (float)
+  #' @param scale3: Value for scaling intensities for third peak (float)
+  #' @param query_mass4: Value for mass at center of fourth peak (float)
+  #' @param scale4: Value for scaling intensities for fourth peak (float)
+  #' @param use_bounds: Boolean to indicate whether boundaries are to be used
+  #'
+  #' @return opt_fit: list of parameters and values describing the optimal fit
+
+  # define optimization function for optim based on normal distribution
+  opt_f <- function(params) {
+    d <- params[2] * dnorm(mass_vector, mean = params[1], sd = sigma1) +
+         params[4] * dnorm(mass_vector, mean = params[3], sd = sigma2) +
+         params[6] * dnorm(mass_vector, mean = params[5], sd = sigma3) +
+         params[8] * dnorm(mass_vector, mean = params[7], sd = sigma4)
+    sum((d - int_vector) ^ 2)
+  }
+
+  if (use_bounds) {
+    # determine lower and upper boundaries
+    lower <- c(mass_vector[1], 0, mass_vector[1], 0, mass_vector[1], 0, mass_vector[1], 0)
+    upper <- c(mass_vector[length(mass_vector)], Inf, mass_vector[length(mass_vector)], Inf,
+               mass_vector[length(mass_vector)], Inf, mass_vector[length(mass_vector)], Inf)
+    # get optimal value for 3 fitted Gaussian curves
+    opt_fit <- optim(c(query_mass1, scale1,
+                       query_mass2, scale2,
+                       query_mass3, scale3,
+                       query_mass4, scale4),
+                     opt_f, control = list(maxit = 10000),
+                     method = "L-BFGS-B", lower = lower, upper = upper)
+  } else {
+    opt_fit <- optim(c(query_mass1, scale1,
+                       query_mass2, scale2,
+                       query_mass3, scale3,
+                       query_mass4, scale4),
+                     opt_f, control = list(maxit = 10000))
+  }
+  return(opt_fit)
+}
diff --git a/DIMS/Utils/fit_init.R b/DIMS/Utils/fit_init.R
new file mode 100644
index 0000000..ecae337
--- /dev/null
+++ b/DIMS/Utils/fit_init.R
@@ -0,0 +1,47 @@
+## adapted from fitGaussianInit.R
+# variables with fixed values will be removed from function parameters
+# scale, outdir, plot, width, height
+# mz_index, start_index, end_index, sample_name not used.
+# fit_gaussian should be defined before this function is called.
+fit_init <- function(mass_vector, int_vector, int_factor, scale, resol,
+                     outdir, sample_name, scanmode, plot, width, height,
+                     mz_index, start_index, end_index) {
+  #' Determine initial fit of Gaussian curve to small region of m/z
+  #'
+  #' @param mass_vector: Vector of m/z values for a region of interest (float)
+  #' @param int_vector: Vector of intensities for a region of interest (float)
+  #' @param int_factor: Value used to calculate area under Gaussian curve (integer)
+  #' @param scale: Initial value used to estimate scaling parameter (integer)
+  #' @param resol: Value for resolution (integer)
+  #' @param outdir: Path for output directory (string)
+  #' @param sample_name: Sample name (string)
+  #' @param scanmode: Scan mode, positive or negative (string)
+  #' @param plot: Parameter indicating whether plots should be made (boolean)
+  #' @param width: Value for width of plot (integer)
+  #' @param height: Value for height of plot (integer)
+  #' @param mz_index: Index of m/z value with non-zero intensity (integer)
+  #' @param start_index: Index of start of m/z range in mass_vector (integer)
+  #' @param end_index: Index of end of m/z range in mass_vector (integer)
+  #'
+  #' @return roi_value_list: list of fit values for region of interest (list)
+
+  # define mass_diff as difference between last and first value of mass_vector
+  mass_diff <- mass_vector[length(mass_vector)] - mass_vector[1]
+  # generate a second mass_vector with equally spaced m/z values
+  mass_vector2 <- seq(mass_vector[1], mass_vector[length(mass_vector)],
+                      length = mass_diff * int_factor)
+
+  # Find the index in int_vector with the highest intensity
+  max_index <- which(int_vector == max(int_vector))
+  roi_values <- fit_gaussian(mass_vector2, mass_vector, int_vector, max_index,
+                             scale, resol, outdir, force = length(max_index),
+                             use_bounds = FALSE, plot, scanmode, int_factor, width, height)
+  # put all values for this region of interest into a list
+  roi_value_list <- list("mean" = roi_values$mean,
+                         "area" = roi_values$area,
+                         "qual" = roi_values$qual,
+                         "min" = roi_values$min,
+                         "max" = roi_values$max)
+  return(roi_value_list)
+}
+
diff --git a/DIMS/Utils/fit_optim.R b/DIMS/Utils/fit_optim.R
new file mode 100644
index 0000000..404cc7a
--- /dev/null
+++ b/DIMS/Utils/fit_optim.R
@@ -0,0 +1,47 @@
+## adapted from generateGaussian.R
+# variables with fixed values will be removed from function parameters
+# plot, width, height
+# fit_gaussian should be defined before this function is called.
+fit_optim <- function(mass_vector, int_vector, resol,
+                      plot, scanmode, int_factor, width, height) {
+  #' Determine optimized fit of Gaussian curve to small region of m/z
+  #'
+  #' @param mass_vector: Vector of m/z values for a region of interest (float)
+  #' @param int_vector: Vector of intensities for a region of interest (float)
+  #' @param resol: Value for resolution (integer)
+  #' @param plot: Parameter indicating whether plots should be made (boolean)
+  #' @param scanmode: Scan mode, positive or negative (string)
+  #' @param int_factor: Value used to calculate area under Gaussian curve (integer)
+  #' @param width: Value for width of plot (integer)
+  #' @param height: Value for height of plot (integer)
+  #'
+  #' @return roi_value_list: list of fit values for region of interest (list)
+  
+  factor <- 1.5
+  # Find the index in int_vector with the highest intensity
+  max_index <- which(int_vector == max(int_vector))[1]
+  mass_max <- mass_vector[max_index]
+  int_max <- int_vector[max_index]
+  # get peak width
+  fwhm <- get_fwhm(mass_max, resol)
+  # simplify the peak shape: represent it by a triangle
+  mass_max_simple <- c(mass_max - factor * fwhm, mass_max, mass_max + factor * fwhm)
+  int_max_simple <- c(0, int_max, 0)
+  
+  # define mass_diff as difference between last and first value of mass_max_simple
+  mass_diff <- mass_max_simple[length(mass_max_simple)] - mass_max_simple[1]
+  # generate a second mass_vector with equally spaced m/z values
+  mass_vector2 <- seq(mass_max_simple[1], mass_max_simple[length(mass_max_simple)], 
+                      length = mass_diff * int_factor)
+  sigma <- get_stdev(mass_vector2, int_max_simple)
+  scale <- optimize_gaussfit(mass_vector2, int_max_simple, sigma, mass_max)
+
+  # get an estimate of the area under the peak
+  area <- estimate_area(mass_max, resol, scale, sigma, int_factor)
+  # put all values for this region of interest into a list
+  roi_value_list <- list("mean" = mass_max,
+                         "area" = area,
+                         "min" = mass_vector2[1],
+                         "max" = mass_vector2[length(mass_vector2)])
+  return(roi_value_list)
+}
diff --git a/DIMS/Utils/fit_peaks.R b/DIMS/Utils/fit_peaks.R
new file mode 100644
index 0000000..02d5408
--- /dev/null
+++ b/DIMS/Utils/fit_peaks.R
@@ -0,0 +1,381 @@
+## adapted from fit1Peak.R, fit2peaks.R, fit3peaks.R and fit4peaks.R (combined)
+# variables with fixed values will be removed from function parameters
+# plot, int_factor
+fit_1peak <- function(mass_vector2, mass_vector, int_vector, max_index, scale, resol, plot, fit_quality, use_bounds) {
+  #' Fit 1 Gaussian peak in small region of m/z
+  #'
+  #' @param mass_vector2: Vector of equally spaced m/z values (float)
+  #' @param mass_vector: Vector of m/z values for a region of interest (float)
+  #' @param int_vector: Value used to calculate area under Gaussian curve (integer)
+  #' @param max_index: Index in int_vector with the highest intensity (integer)
+  #' @param scale: Initial value used to estimate scaling parameter (integer)
+  #' @param resol: Value for resolution (integer)
+  #' @param plot: Parameter indicating whether plots should be made (boolean)
+  #' @param fit_quality: Value indicating quality of fit of Gaussian curve (float)
+  #' @param use_bounds: Boolean to indicate whether boundaries are to be used
+  #'
+  #' @return roi_value_list: list of fit values for region of interest (list)
+
+  if (length(int_vector) < 3) {
+    message("Range too small, no fit possible!")
+  } else {
+    if ((length(int_vector) == 4)) {
+      # fit 1 peak
+      mu <- weighted.mean(mass_vector, int_vector)
+      sigma <- get_stdev(mass_vector, int_vector)
+      fitted_peak <- fit_1gaussian(mass_vector, int_vector, sigma, mu, scale, use_bounds)
+    } else {
+      # set range vector
+      if ((length(mass_vector) - length(max_index)) < 2) {
+        range1 <- c((length(mass_vector) - 4) : length(mass_vector))
+      } else if (length(max_index) < 2) {
+        range1 <- c(1:5)
+      } else {
+        range1 <- c(max_index[1] - 2, max_index[1] - 1, max_index[1], max_index[1] + 1, max_index[1] + 2)
+      }
+      if (range1[1] == 0) range1 <- range1[-1]
+      # remove NA
+      if (length(which(is.na(int_vector[range1]))) != 0) {
+        range1 <- range1[-which(is.na(int_vector[range1]))]
+      }
+      # fit 1 peak
+      mu <- weighted.mean(mass_vector[range1], int_vector[range1])
+      sigma <- get_stdev(mass_vector[range1], int_vector[range1])
+      fitted_peak <- fit_1gaussian(mass_vector, int_vector, sigma, mu, scale, use_bounds)
+    }
+
+    p1 <- fitted_peak$par
+
+    # get new value for fit quality and scale
+    fq_new <- get_fit_quality(mass_vector, int_vector, p1[1], p1[1], resol, p1[2], sigma)$fq_new
+    scale_new <- 1.2 * scale
+
+    # bad fit
+    if (fq_new > fit_quality) {
+      # optimize scaling factor
+      fq <- 0
+      scale <- 0
+      if (sum(int_vector) > sum(p1[2] * dnorm(mass_vector, p1[1], sigma))) {
+        while ((round(fq, digits = 3) != round(fq_new, digits = 3)) && (scale_new < 10000)) {
+          fq <- fq_new
+          scale <- scale_new
+          # fit 1 peak
+          fitted_peak <- fit_1gaussian(mass_vector, int_vector, sigma, mu, scale, use_bounds)
+          p1 <- fitted_peak$par
+          # get new value for fit quality and scale
+          fq_new <- get_fit_quality(mass_vector, int_vector, p1[1], p1[1], resol, p1[2], sigma)$fq_new
+          scale_new <- 1.2 * scale
+        }
+      } else {
+        while ((round(fq, digits = 3) != round(fq_new, digits = 3)) && (scale_new < 10000)) {
+          fq <- fq_new
+          scale <- scale_new
+          # fit 1 peak
+          fitted_peak <- fit_1gaussian(mass_vector, int_vector, sigma, mu, scale, use_bounds)
+          p1 <- fitted_peak$par
+          # get new value for fit quality and scale
+          fq_new <- get_fit_quality(mass_vector, int_vector, p1[1], p1[1], resol, p1[2], sigma)$fq_new
+          scale_new <- 0.8 * scale
+        }
+      }
+      # use optimized scale factor to fit 1 peak
+      if (fq < fq_new) {
+        fitted_peak <- fit_1gaussian(mass_vector, int_vector, sigma, mu, scale, use_bounds)
+        p1 <- fitted_peak$par
+        fq_new <- fq
+      }
+    }
+  }
+
+  roi_value_list <- list("mean" = p1[1], "scale" = p1[2], "sigma" = sigma, "qual" = fq_new)
+  return(roi_value_list)
+}
+
+fit_2peaks <- function(mass_vector2, mass_vector, int_vector, max_index, scale, resol, use_bounds = FALSE,
+                       plot = FALSE, fit_quality, int_factor) {
+  #' Fit 2 Gaussian peaks in small region of m/z
+  #'
+  #' @param mass_vector2: Vector of equally spaced m/z values (float)
+  #' @param mass_vector: Vector of m/z values for a region of interest (float)
+  #' @param int_vector: Value used to calculate area under Gaussian curve (integer)
+  #' @param max_index: Index in int_vector with the highest intensity (integer)
+  #' @param scale: Initial value used to estimate scaling parameter (integer)
+  #' @param resol: Value for resolution (integer)
+  #' @param plot: Parameter indicating whether plots should be made (boolean)
+  #' @param fit_quality: Value indicating quality of fit of Gaussian curve (float)
+  #' @param use_bounds: Boolean to indicate whether boundaries are to be used
+  #' @param int_factor: Value used to calculate area under Gaussian curve (integer)
+  #'
+  #' @return roi_value_list: list of fit values for region of interest (list)
+
+  peak_mean <- NULL
+  peak_area <- NULL
+  peak_scale <- NULL
+  peak_sigma <- NULL
+
+  # set range vectors for 2 peaks
+  range1 <- c(max_index[1] - 2, max_index[1] - 1, max_index[1], max_index[1] + 1, max_index[1] + 2)
+  if (range1[1] == 0) range1 <- range1[-1]
+  range2 <- c(max_index[2] - 2, max_index[2] - 1, max_index[2], max_index[2] + 1, max_index[2] + 2)
+  if (length(mass_vector) < range2[length(range2)]) range2 <- range2[-length(range2)]
+  range1 <- check_overlap(range1, range2)[[1]]
+  range2 <- check_overlap(range1, range2)[[2]]
+  # check for negative or 0
+  remove <- which(range1 < 1)
+  if (length(remove) > 0) range1 <- range1[-remove]
+  remove <- which(range2 < 1)
+  if (length(remove) > 0) range2 <- range2[-remove]
+  # remove NA
+  if (length(which(is.na(int_vector[range1]))) != 0) range1 <- range1[-which(is.na(int_vector[range1]))]
+  if (length(which(is.na(int_vector[range2]))) != 0) range2 <- range2[-which(is.na(int_vector[range2]))]
+
+  # fit 2 peaks, first separately, then together
+  mu1 <- weighted.mean(mass_vector[range1], int_vector[range1])
+  sigma1 <- get_stdev(mass_vector[range1], int_vector[range1])
+  fitted_peak <- fit_1gaussian(mass_vector[range1], int_vector[range1], sigma1, mu1, scale, use_bounds)
+  p1 <- fitted_peak$par
+  # second peak
+  mu2 <- weighted.mean(mass_vector[range2], int_vector[range2])
+  sigma2 <- get_stdev(mass_vector[range2], int_vector[range2])
+  fitted_peak <- fit_1gaussian(mass_vector[range2], int_vector[range2], sigma2, mu2, scale, use_bounds)
+  p2 <- fitted_peak$par
+  # combined
+  fitted_2peaks <- fit_2gaussians(mass_vector, int_vector, sigma1, sigma2, p1[1], p1[2], p2[1], p2[2], use_bounds)
+  pc <- fitted_2peaks$par
+
+  # get fit quality
+  if (is.null(sigma2)) sigma2 <- sigma1
+  sum_fit <- (pc[2] * dnorm(mass_vector, pc[1], sigma1)) +
+             (pc[4] * dnorm(mass_vector, pc[3], sigma2))
+  fq <- get_fit_quality(mass_vector, int_vector, sort(c(pc[1], pc[3]))[1], sort(c(pc[1], pc[3]))[2],
+                        resol, sum_fit = sum_fit)$fq_new
+
+  # get parameter values
+  area1 <- estimate_area(pc[1], resol, pc[2], sigma1, int_factor)
+  area2 <- estimate_area(pc[3], resol, pc[4], sigma2, int_factor)
+  peak_area <- c(peak_area, area1)
+  peak_area <- c(peak_area, area2)
+  peak_mean <- c(peak_mean, pc[1])
+  peak_mean <- c(peak_mean, pc[3])
+  peak_scale <- c(peak_scale, pc[2])
+  peak_scale <- c(peak_scale, pc[4])
+  peak_sigma <- c(peak_sigma, sigma1)
+  peak_sigma <- c(peak_sigma, sigma2)
+
+  roi_value_list <- list("mean" = peak_mean, "scale" = peak_scale, "sigma" = peak_sigma, "area" = peak_area, "qual" = fq)
+  return(roi_value_list)
+}
+
+fit_3peaks <- function(mass_vector2, mass_vector, int_vector, max_index, scale, resol, use_bounds = FALSE, 
+                       plot = FALSE, fit_quality, int_factor) {
+  #' Fit 3 Gaussian peaks in small region of m/z
+  #'
+  #' @param mass_vector2: Vector of equally spaced m/z values (float)
+  #' @param mass_vector: Vector of m/z values for a region of interest (float)
+  #' @param int_vector: Value used to calculate area under Gaussian curve (integer)
+  #' @param max_index: Index in int_vector with the highest intensity (integer)
+  #' @param scale: Initial value used to estimate scaling parameter (integer)
+  #' @param resol: Value for resolution (integer)
+  #' @param plot: Parameter indicating whether plots should be made (boolean)
+  #' @param fit_quality: Value indicating quality of fit of Gaussian curve (float)
+  #' @param use_bounds: Boolean to indicate whether boundaries are to be used
+  #' @param int_factor: Value used to calculate area under Gaussian curve (integer)
+  #'
+  #' @return roi_value_list: list of fit values for region of interest (list)
+
+  peak_mean <- NULL
+  peak_area <- NULL
+  peak_scale <- NULL
+  peak_sigma <- NULL
+
+  # set range vectors for 3 peaks
+  range1 <- c(max_index[1] - 2, max_index[1] - 1, max_index[1], max_index[1] + 1, max_index[1] + 2)
+  range2 <- c(max_index[2] - 2, max_index[2] - 1, max_index[2], max_index[2] + 1, max_index[2] + 2)
+  range3 <- c(max_index[3] - 2, max_index[3] - 1, max_index[3], max_index[3] + 1, max_index[3] + 2)
+  remove <- which(range1 < 1)
+  if (length(remove) > 0) {
+    range1 <- range1[-remove]
+  }
+  remove <- which(range2 < 1)
+  if (length(remove) > 0) {
+    range2 <- range2[-remove]
+  }
+  if (length(mass_vector) < range3[length(range3)]) range3 <- range3[-length(range3)]
+  range1 <- check_overlap(range1, range2)[[1]]
+  range2 <- check_overlap(range1, range2)[[2]]
+  range2 <- check_overlap(range2, range3)[[1]]
+  range3 <- check_overlap(range2, range3)[[2]]
+  # check for negative or 0
+  remove <- which(range1 < 1)
+  if (length(remove) > 0) range1 <- range1[-remove]
+  remove <- which(range2 < 1)
+  if (length(remove) > 0) range2 <- range2[-remove]
+  remove <- which(range3 < 1)
+  if (length(remove) > 0) range3 <- range3[-remove]
+  # remove NA
+  if (length(which(is.na(int_vector[range1]))) != 0) range1 <- range1[-which(is.na(int_vector[range1]))]
+  if (length(which(is.na(int_vector[range2]))) != 0) range2 <- range2[-which(is.na(int_vector[range2]))]
+  if (length(which(is.na(int_vector[range3]))) != 0) range3 <- range3[-which(is.na(int_vector[range3]))]
+
+  # fit 3 peaks, first separately, then together
+  mu1 <- weighted.mean(mass_vector[range1], int_vector[range1])
+  sigma1 <- get_stdev(mass_vector[range1], int_vector[range1])
+  fitted_peak <- fit_1gaussian(mass_vector[range1], int_vector[range1], sigma1, mu1, scale, use_bounds)
+  p1 <- fitted_peak$par
+  # second peak
+  mu2 <- weighted.mean(mass_vector[range2], int_vector[range2])
+  sigma2 <- get_stdev(mass_vector[range2], int_vector[range2])
+  fitted_peak <- fit_1gaussian(mass_vector[range2], int_vector[range2], sigma2, mu2, scale, use_bounds)
+  p2 <- fitted_peak$par
+  # third peak
+  mu3 <- weighted.mean(mass_vector[range3], int_vector[range3])
+  sigma3 <- get_stdev(mass_vector[range3], int_vector[range3])
+  fitted_peak <- fit_1gaussian(mass_vector[range3], int_vector[range3], sigma3, mu3, scale, use_bounds)
+  p3 <- fitted_peak$par
+  # combined
+  fitted_3peaks <- fit_3gaussians(mass_vector, int_vector, sigma1, sigma2, sigma3,
+                                  p1[1], p1[2], p2[1], p2[2], p3[1], p3[2], use_bounds)
+  pc <- fitted_3peaks$par
+
+  # get fit quality
+  sum_fit = (pc[2] * dnorm(mass_vector, pc[1], sigma1)) +
+            (pc[4] * dnorm(mass_vector, pc[3], sigma2)) +
+            (pc[6] * dnorm(mass_vector, pc[5], sigma3))
+  fq <- get_fit_quality(mass_vector, int_vector, sort(c(pc[1], pc[3], pc[5]))[1], sort(c(pc[1], pc[3], pc[5]))[3],
+                        resol, sum_fit = sum_fit)$fq_new
+
+  # get parameter values
+  area1 <- estimate_area(pc[1], resol, pc[2], sigma1, int_factor)
+  area2 <- estimate_area(pc[3], resol, pc[4], sigma2, int_factor)
+  area3 <- estimate_area(pc[5], resol, pc[6], sigma3, int_factor)
+  peak_area <- c(peak_area, area1)
+  peak_area <- c(peak_area, area2)
+  peak_area <- c(peak_area, area3)
+  peak_mean <- c(peak_mean, pc[1])
+  peak_mean <- c(peak_mean, pc[3])
+  peak_mean <- c(peak_mean, pc[5])
+  peak_scale <- c(peak_scale, pc[2])
+  peak_scale <- c(peak_scale, pc[4])
+  peak_scale <- c(peak_scale, pc[6])
+  peak_sigma <- c(peak_sigma, sigma1)
+  peak_sigma <- c(peak_sigma, sigma2)
+  peak_sigma <- c(peak_sigma, sigma3)
+
+  roi_value_list <- list("mean" = peak_mean, "scale" = peak_scale, "sigma" = peak_sigma, "area" = peak_area, "qual" = fq)
+  return(roi_value_list)
+}
+
+fit_4peaks <- function(mass_vector2, mass_vector, int_vector, max_index, scale, resol, use_bounds = FALSE,
+                       plot = FALSE, fit_quality, int_factor) {
+  #' Fit 4 Gaussian peaks in small region of m/z
+  #'
+  #' @param mass_vector2: Vector of equally spaced m/z values (float)
+  #' @param mass_vector: Vector of m/z values for a region of interest (float)
+  #' @param int_vector: Value used to calculate area under Gaussian curve (integer)
+  #' @param max_index: Index in int_vector with the highest intensity (integer)
+  #' @param scale: Initial value used to estimate scaling parameter (integer)
+  #' @param resol: Value for resolution (integer)
+  #' @param plot: Parameter indicating whether plots should be made (boolean)
+  #' @param fit_quality: Value indicating quality of fit of Gaussian curve (float)
+  #' @param use_bounds: Boolean to indicate whether boundaries are to be used
+  #' @param int_factor: Value used to calculate area under Gaussian curve (integer)
+  #'
+  #' @return roi_value_list: list of fit values for region of interest (list)
+
+  peak_mean <- NULL
+  peak_area <- NULL
+  peak_scale <- NULL
+  peak_sigma <- NULL
+
+  # set range vectors for 4 peaks
+  range1 <- c(max_index[1] - 2, max_index[1] - 1, max_index[1], max_index[1] + 1, max_index[1] + 2)
+  range2 <- c(max_index[2] - 2, max_index[2] - 1, max_index[2], max_index[2] + 1, max_index[2] + 2)
+  range3 <- c(max_index[3] - 2, max_index[3] - 1, max_index[3], max_index[3] + 1, max_index[3] + 2)
+  range4 <- c(max_index[4] - 2, max_index[4] - 1, max_index[4], max_index[4] + 1, max_index[4] + 2)
+  if (range1[1] == 0) range1 <- range1[-1]
+  if (length(mass_vector) < range4[length(range4)]) range4 <- range4[-length(range4)]
+  range1 <- check_overlap(range1, range2)[[1]]
+  range2 <- check_overlap(range1, range2)[[2]]
+  range2 <- check_overlap(range2, range3)[[1]]
+  range3 <- check_overlap(range2, range3)[[2]]
+  range3 <- check_overlap(range3, range4)[[1]]
+  range4 <- check_overlap(range3, range4)[[2]]
+  remove <- which(range4 > length(mass_vector))
+  if (length(remove) > 0) {
+    range4 <- range4[-remove]
+  }
+  # check for negative or 0
+  remove <- which(range1 < 1)
+  if (length(remove) > 0) range1 <- range1[-remove]
+  remove <- which(range2 < 1)
+  if (length(remove) > 0) range2 <- range2[-remove]
+  remove <- which(range3 < 1)
+  if (length(remove) > 0) range3 <- range3[-remove]
+  remove <- which(range4 < 1)
+  if (length(remove) > 0) range4 <- range4[-remove]
+  # remove NA
+  if (length(which(is.na(int_vector[range1]))) != 0) range1 <- range1[-which(is.na(int_vector[range1]))]
+  if (length(which(is.na(int_vector[range2]))) != 0) range2 <- range2[-which(is.na(int_vector[range2]))]
+  if (length(which(is.na(int_vector[range3]))) != 0) range3 <- range3[-which(is.na(int_vector[range3]))]
+  if (length(which(is.na(int_vector[range4]))) != 0) range4 <- range4[-which(is.na(int_vector[range4]))]
+
+  # fit 4 peaks, first separately, then together
+  mu1 <- weighted.mean(mass_vector[range1], int_vector[range1])
+  sigma1 <- get_stdev(mass_vector[range1], int_vector[range1])
+  fitted_peak <- fit_1gaussian(mass_vector[range1], int_vector[range1], sigma1, mu1, scale, use_bounds)
+  p1 <- fitted_peak$par
+  # second peak
+  mu2 <- weighted.mean(mass_vector[range2], int_vector[range2])
+  sigma2 <- get_stdev(mass_vector[range2], int_vector[range2])
+  fitted_peak <- fit_1gaussian(mass_vector[range2], int_vector[range2], sigma2, mu2, scale, use_bounds)
+  p2 <- fitted_peak$par
+  # third peak
+  mu3 <- weighted.mean(mass_vector[range3], int_vector[range3])
+  sigma3 <- get_stdev(mass_vector[range3], int_vector[range3])
+  fitted_peak <- fit_1gaussian(mass_vector[range3], int_vector[range3], sigma3, mu3, scale, use_bounds)
+  p3 <- fitted_peak$par
+  # fourth peak
+  mu4 <- weighted.mean(mass_vector[range4], int_vector[range4])
+  sigma4 <- get_stdev(mass_vector[range4], int_vector[range4])
+  fitted_peak <- fit_1gaussian(mass_vector[range4], int_vector[range4], sigma4, mu4, scale, use_bounds)
+  p4 <- fitted_peak$par
+  # combined
+  fitted_4peaks <- fit_4gaussians(mass_vector, int_vector, sigma1, sigma2, sigma3, sigma3, 
+                                  p1[1], p1[2], p2[1], p2[2], p3[1], p3[2],  p4[1], p4[2], use_bounds)
+  pc <- fitted_4peaks$par
+
+  # get fit quality
+  sum_fit <-  (pc[2] * dnorm(mass_vector, pc[1], sigma1)) + 
+              (pc[4] * dnorm(mass_vector, pc[3], sigma2)) + 
+              (pc[6] * dnorm(mass_vector, pc[5], sigma3)) + 
+              (pc[8] * dnorm(mass_vector, pc[7], sigma3))
+  fq <- get_fit_quality(mass_vector, int_vector, 
+                        sort(c(pc[1], pc[3], pc[5], pc[7]))[1], sort(c(pc[1], pc[3], pc[5], pc[7]))[4], 
+                        resol, sum_fit = sum_fit)$fq_new
+
+  # get parameter values
+  area1 <- estimate_area(pc[1], resol, pc[2], sigma1, int_factor)
+  area2 <- estimate_area(pc[3], resol, pc[4], sigma2, int_factor)
+  area3 <- estimate_area(pc[5], resol, pc[6], sigma3, int_factor)
+  area4 <- estimate_area(pc[7], resol, pc[8], sigma4, int_factor)
+  peak_area <- c(peak_area, area1)
+  peak_area <- c(peak_area, area2)
+  peak_area <- c(peak_area, area3)
+  peak_area <- c(peak_area, area4)
+  peak_mean <- c(peak_mean, pc[1])
+  peak_mean <- c(peak_mean, pc[3])
+  peak_mean <- c(peak_mean, pc[5])
+  peak_mean <- c(peak_mean, pc[7])
+  peak_scale <- c(peak_scale, pc[2])
+  peak_scale <- c(peak_scale, pc[4])
+  peak_scale <- c(peak_scale, pc[6])
+  peak_scale <- c(peak_scale, pc[8])
+  peak_sigma <- c(peak_sigma, sigma1)
+  peak_sigma <- c(peak_sigma, sigma2)
+  peak_sigma <- c(peak_sigma, sigma3)
+  peak_sigma <- c(peak_sigma, sigma4)
+
+  roi_value_list <- list("mean" = peak_mean, "scale" = peak_scale, "sigma" = peak_sigma, "area" = peak_area, "qual" = fq)
+  return(roi_value_list)
+}
+
diff --git a/DIMS/Utils/get_element_info.R b/DIMS/Utils/get_element_info.R
new file mode 100644
index 0000000..a619f6a
--- /dev/null
+++ b/DIMS/Utils/get_element_info.R
@@ -0,0 +1,24 @@
+## adapted from elementInfo.R, which is adapted from Rdisop function .getElement
+# refactor: check where library is initialised: library(Rdisop)
+get_element_info <- function(name, elements = NULL) {
+  #' Get info on m/z and isotopes for all chemical elements
+  #'
+  #' @param name: Name of adduct, e.g. Na (string)
+  #' @param elements: List of all adducts to take into account (list of strings)
+  #'
+  #' @return element_info: peak group list with filled-in intensities (matrix)
+
+  # get information on all elements
+  if (!is.list(elements) || length(elements) == 0 ) {
+    elements <- initializePSE()
+  }
+  # extract information for a particular adduct
+  if (name == "CH3OH+H") {
+    # regular_expr should be exact match for name, except for methanol
+    regular_expr <- "^CH3OH\\+H$"
+  } else {
+    regular_expr <- paste0("^", name, "$")
+  }
+  element_info <- elements[[grep(regular_expr, sapply(elements, function(x) { x$name }))]]
+  return(element_info)
+}
\ No newline at end of file
diff --git a/DIMS/Utils/get_fit_quality.R b/DIMS/Utils/get_fit_quality.R
new file mode 100644
index 0000000..23b1947
--- /dev/null
+++ b/DIMS/Utils/get_fit_quality.R
@@ -0,0 +1,34 @@
+## adapted from getFitQuality.R
+# parameter not used: mu_last
+get_fit_quality <- function(mass_vector, int_vector, mu_first, mu_last, resol, scale = NULL, sigma = NULL, sum_fit = NULL) {
+  #' Fit 1 Gaussian peak in small region of m/z
+  #'
+  #' @param mass_vector: Vector of m/z values for a region of interest (float)
+  #' @param int_vector: Value used to calculate area under Gaussian curve (integer)
+  #' @param mu_first: Value for first peak (float)
+  #' @param scale: Initial value used to estimate scaling parameter (integer)
+  #' @param resol: Value for resolution (integer)
+  #' @param sum_fit: Value indicating quality of fit of Gaussian curve (float)
+  #'
+  #' @return list_params: list of parameters indicating quality of fit (list)
+  if (is.null(sum_fit)) {
+    mass_vector_int <- mass_vector
+    int_vector_int <- int_vector
+    # get new fit quality
+    fq_new <- mean(abs((scale * dnorm(mass_vector_int, mu_first, sigma)) - int_vector_int) / 
+               rep((max(scale * dnorm(mass_vector_int, mu_first, sigma)) / 2), length(mass_vector_int)))
+  } else {
+    sum_fit_int <- sum_fit
+    int_vector_int <- int_vector
+    mass_vector_int <- mass_vector
+    # get new fit quality
+    fq_new <- mean(abs(sum_fit_int - int_vector_int) / rep(max(sum_fit_int) /2, length(sum_fit_int)))
+  }
+  
+  # Prevent division by 0
+  if (is.nan(fq_new)) fq_new <- 1 
+  
+  list_params <- list("fq_new" = fq_new, "x_int" = mass_vector_int, "y_int" = int_vector_int)
+  return(list_params)
+}
+
diff --git a/DIMS/Utils/get_fwhm.R b/DIMS/Utils/get_fwhm.R
new file mode 100644
index 0000000..7543921
--- /dev/null
+++ b/DIMS/Utils/get_fwhm.R
@@ -0,0 +1,21 @@
+## adapted from getFwhm.R
+get_fwhm <- function(query_mass, resol) {
+  #' Calculate fwhm (full width at half maximum intensity) for a peak
+  #'
+  #' @param query_mass: Value for mass (float)
+  #' @param resol: Value for resolution (integer)
+  #'
+  #' @return fwhm: Value for full width at half maximum (float)
+
+  # set aberrant values of query_mass to zero
+  if (is.nan(query_mass)) query_mass <- 0
+  if (is.na(query_mass)) query_mass <- 0
+  if (is.null(query_mass)) query_mass <- 0
+  if (query_mass < 0) query_mass <- 0
+  # calculate resolution at given m/z value
+  resol_mz <- resol * (1 / sqrt(2) ^ (log2(query_mass / 200)))
+  # calculate full width at half maximum
+  fwhm <- query_mass / resol_mz
+  return(fwhm)
+}
+
diff --git a/DIMS/Utils/get_patient_data_to_helix.R b/DIMS/Utils/get_patient_data_to_helix.R
new file mode 100644
index 0000000..1eeaf7a
--- /dev/null
+++ b/DIMS/Utils/get_patient_data_to_helix.R
@@ -0,0 +1,39 @@
+get_patient_data_to_helix <- function(metab_interest_sorted, metab_list_all) {
+  #' Get patient data to be uploaded to Helix
+  #'
+  #' @param metab_interest_sorted: list of dataframes with metabolite Z-scores for each sample/patient (list)
+  #' @param metab_list_all: list of tables with metabolites for Helix and violin plots (list)
+  #'
+  #' @return: dataframe with patient data with only metabolites for Helix and violin plots 
+  #'          with Helix name, high/low Z-score cutoffs
+  
+  # Combine Z-scores of metab groups together
+  df_all_metabs_zscores <- bind_rows(metab_interest_sorted)
+  # Change columnnames 
+  colnames(df_all_metabs_zscores) <- c("HMDB_name", "Patient", "Z_score")
+  # Change Patient column to character instead of factor
+  df_all_metabs_zscores$Patient <- as.character(df_all_metabs_zscores$Patient)
+  
+  # Delete whitespaces HMDB_name 
+  df_all_metabs_zscores$HMDB_name <- str_trim(df_all_metabs_zscores$HMDB_name, "right")
+  
+  # Split HMDB_name column on "nitine;" for match dims_helix_table
+  df_all_metabs_zscores$HMDB_name_split <- str_split_fixed(df_all_metabs_zscores$HMDB_name, "nitine;", 2)[, 1]
+  
+  # Combine stofgroepen
+  dims_helix_table <- bind_rows(metab_list_all)
+  # Filter table for metabolites for Helix
+  dims_helix_table <- dims_helix_table %>% filter(Helix == "ja")
+  # Split HMDB_name column on "nitine;" for match df_all_metabs_zscores
+  dims_helix_table$HMDB_name_split <- str_split_fixed(dims_helix_table$HMDB_name, "nitine;", 2)[, 1]
+  dims_helix_table <- dims_helix_table %>% select(HMDB_name_split, Helix_naam, high_zscore, low_zscore)
+  
+  # Filter DIMS results for metabolites for Helix
+  df_metabs_helix <- df_all_metabs_zscores %>% filter(HMDB_name_split %in% dims_helix_table$HMDB_name_split)
+  # Combine dims_helix_table and df_metabs_helix, adding Helix codes etc.
+  df_metabs_helix <- df_metabs_helix %>% left_join(dims_helix_table, by = join_by(HMDB_name_split))
+  
+  df_metabs_helix <- df_metabs_helix %>% select(HMDB_name, Patient, Z_score, Helix_naam, high_zscore, low_zscore)
+  
+  return(df_metabs_helix)
+}
diff --git a/DIMS/Utils/get_stdev.R b/DIMS/Utils/get_stdev.R
new file mode 100644
index 0000000..a385187
--- /dev/null
+++ b/DIMS/Utils/get_stdev.R
@@ -0,0 +1,22 @@
+## adapted from getSD.R
+get_stdev <- function(mass_vector, int_vector, resol = 140000) {
+  #' Calculate standard deviation to determine width of a peak
+  #'
+  #' @param mass_vector: Vector of 3 mass values (float)
+  #' @param int_vector: Vector of 3 intensities (float)
+  #' @param resol: Value for resolution (integer)
+  #'
+  #' @return stdev: Value for standard deviation
+  # find maximum intensity in vector
+  max_index <- which(int_vector == max(int_vector))
+  # find corresponding mass at maximum intensity
+  max_mass <- mass_vector[max_index]
+  # calculate resolution at given m/z value
+  resol_mz <- resol * (1 / sqrt(2) ^ (log2(max_mass / 200)))
+  # calculate full width at half maximum
+  fwhm <- max_mass / resol_mz
+  # calculate standard deviation
+  stdev <- (fwhm / 2) * 0.85
+  return(stdev)
+}
+
diff --git a/DIMS/Utils/identify_noisepeaks.R b/DIMS/Utils/identify_noisepeaks.R
new file mode 100644
index 0000000..cf79279
--- /dev/null
+++ b/DIMS/Utils/identify_noisepeaks.R
@@ -0,0 +1,103 @@
+## adapted from ident.hires.noise.HPC
+# refactor: remove variables slope, incpt, ppm_iso_fixed
+# combine with function get_element_info
+# modified identify function to also look for adducts and their isotopes
+identify_noisepeaks <- function(peakgroup_list, all_adducts, scanmode = "Negative", look4 = c("Cl", "Ac"),
+                                noise_mz = NULL, resol = 140000, slope = 0, incpt = 0, ppm_fixed, ppm_iso_fixed) {
+  #' Replace intensities that are zero with random value
+  #'
+  #' @param peakgroup_list: Peak group list (matrix)
+  #' @param all_adducts: List of adducts to take into account (list of strings)
+  #' @param scanmode: Scan mode, positive or negative (string)
+  #' @param look4: List of adducts to look for (list of strings)
+  #' @param noise_mz: All known noise peaks (matrix)
+  #' @param resol: Value for resolution (integer)
+  #' @param slope: Value for slope for mass correction (float)
+  #' @param incpt: Value for intercept for mass correction (float)
+  #' @param ppm_fixed: Value for distance between two values of mass (integer)
+  #' @param ppm_iso_fixed: Value for distance between two values of mass for isotope peaks (integer)
+  #'
+  #' @return final_outlist: peak group list with filled-in intensities (matrix)
+
+  options(stringsAsFactors = FALSE)
+  metlin <- assi <- iso <- rep("", nrow(peakgroup_list))
+  theormz <- nisos <- expint <- conf <- rep(0, nrow(peakgroup_list))
+
+  # add adducts to identification list
+  if (scanmode == "Positive") {
+    adduct_scanmode <-  "+"
+  } else {
+    adduct_scanmode <- "-"
+  }
+  # make a copy of noise_mz
+  noise_mz_orig <- noise_mz
+
+  # loop over type of adduct
+  for (adduct_index in 1:length(look4)) {
+    noise_mz_adduct <- noise_mz_orig
+    noise_mz_adduct[, "CompoundName"] <- as.character(noise_mz_orig[, "CompoundName"])
+
+    if (look4[adduct_index] == "H2O") {
+      add2label <- paste0("[M-", look4[adduct_index], "]", adduct_scanmode)
+    } else {
+      add2label <- paste0("[M+", look4[adduct_index], "]", adduct_scanmode)
+    }
+
+    noise_mz_adduct[, "CompoundName"] <- paste0(noise_mz_adduct[, "CompoundName"], add2label)
+    adduct_info <- get_element_info(look4[adduct_index], all_adducts)
+    if (scanmode == "Positive") {
+      adduct_mass <- adduct_info$mass[1] + adduct_info$isotope$mass[1] - hydrogen_mass
+    } else {
+      adduct_mass <- adduct_info$mass[1] + adduct_info$isotope$mass[1] + hydrogen_mass
+    }
+
+    # loop over compounds in database
+    for (compound_index in 1:nrow(noise_mz_adduct)) {
+      # construct information for compound + adduct:
+      if (scanmode == "Positive") {
+        noise_mz_adduct[compound_index, "Mpos"] <-  as.numeric(noise_mz_adduct[compound_index, "Mpos"]) + adduct_mass
+        noise_mz_adduct[compound_index, "MNeg"] <-  0
+      } else {
+        noise_mz_adduct[compound_index, "Mpos"] <-  0
+        noise_mz_adduct[compound_index, "MNeg"] <-  as.numeric(noise_mz_adduct[compound_index, "MNeg"]) + adduct_mass
+      }
+    }
+    noise_mz <- rbind(noise_mz, noise_mz_adduct)
+  }
+
+  if (scanmode == "Positive") {
+    theor_mcol <- as.numeric(noise_mz[, "Mpos"])
+  } else {
+    theor_mcol <- as.numeric(noise_mz[, "MNeg"])
+  }
+
+  # get mz information from peakgroup_list
+  mcol <- peakgroup_list[, "mzmed.pgrp"]
+  # if column with average intensities is missing, calculate it:
+  if (!("avg.int" %in% colnames(peakgroup_list))) {
+    mzmaxcol <- which(colnames(peakgroup_list) == "mzmax.pgrp")
+    endcol <- ncol(peakgroup_list)
+    peakgroup_list[, "avg.int"] <- apply(peakgroup_list[, (mzmaxcol + 1):(endcol)], 1, mean)
+  }
+
+  # do indentification using own database:
+  for (row_index in 1:nrow(noise_mz)) {
+    theor_mz <- theor_mcol[row_index]
+
+    # set tolerance for mz accuracy of main peak
+    mtol <- theor_mz * ppm_fixed / 1000000
+    # find main peak
+    selp <- which(mcol > (theor_mz - mtol) & mcol < (theor_mz + mtol))
+    # if there is more than one candidate peak for main, select best one based on mz_diff
+    if (length(selp) > 1) {
+      selp <- selp[abs(mcol[selp] - theor_mz) == min(abs(mcol[selp] - theor_mz))]
+    }
+    if (length(selp) == 1) {
+      assi[selp] <- paste(assi[selp], as.character(noise_mz[row_index, "CompoundName"]), sep = ";")
+      theormz[selp] <- theor_mz
+    }
+  }
+
+  final_outlist <- cbind(peakgroup_list, assi, theormz, conf, nisos, iso, expint, metlin)
+  return(final_outlist)
+}
diff --git a/DIMS/Utils/is_diagnostic_patient.R b/DIMS/Utils/is_diagnostic_patient.R
new file mode 100644
index 0000000..5820838
--- /dev/null
+++ b/DIMS/Utils/is_diagnostic_patient.R
@@ -0,0 +1,11 @@
+is_diagnostic_patient <- function(patient_column) {
+  #' Check for Diagnostics patients with correct patient number (e.g. starting with "P2024M")
+  #'
+  #' @param patient_column: a column from dataframe with IDs (character vector)
+  #'
+  #' @return: a logical vector with TRUE or FALSE for each element (vector)
+  
+  diagnostic_patients <- grepl("^P[0-9]{4}M", patient_column)
+  
+  return(diagnostic_patients)
+}
diff --git a/DIMS/Utils/merge_duplicate_rows.R b/DIMS/Utils/merge_duplicate_rows.R
new file mode 100644
index 0000000..da3bc52
--- /dev/null
+++ b/DIMS/Utils/merge_duplicate_rows.R
@@ -0,0 +1,59 @@
+## adapted from mergeDuplicatedRows.R
+merge_duplicate_rows <- function(peakgroup_list) {
+  #' Merge identification info for peak groups with the same mass
+  #'
+  #' @param peakgroup_list: Peak group list (matrix)
+  #'
+  #' @return peakgroup_list_dedup: de-duplicated peak group list (matrix)
+
+  collapse <- function(column_label, peakgroup_list, index_dup) {
+    #' Collapse identification info for peak groups with the same mass
+    #'
+    #' @param column_label: Name of column in peakgroup_list (string)
+    #' @param peakgroup_list: Peak group list (matrix)
+    #' @param index_dup: Index of duplicate peak group (integer)
+    #'
+    #' @return collapsed_items: Semicolon-separated list of info (string)
+    # get the item(s) that need to be collapsed
+    list_items <- as.vector(peakgroup_list[index_dup, column_label])
+    # remove NA
+    if (length(which(is.na(list_items))) > 0) list_items <- list_items[-which(is.na(list_items))]
+    collapsed_items <- paste(list_items, collapse = ";")
+    return(collapsed_items)
+  }
+
+  options(digits = 16)
+  collect <- NULL
+  remove <- NULL
+
+  # check for peak groups with identical mass
+  index_dup <- which(duplicated(peakgroup_list[, "mzmed.pgrp"]))
+
+  while (length(index_dup) > 0) {
+    # get the index for the peak group which is double
+    peaklist_index <- which(peakgroup_list[, "mzmed.pgrp"] == peakgroup_list[index_dup[1], "mzmed.pgrp"])
+    single_peakgroup <- peakgroup_list[peaklist_index[1], , drop = FALSE]
+
+    # use function collapse to concatenate info
+    single_peakgroup[, "assi_HMDB"] <- collapse("assi_HMDB", peakgroup_list, peaklist_index)
+    single_peakgroup[, "iso_HMDB"] <- collapse("iso_HMDB", peakgroup_list, peaklist_index)
+    single_peakgroup[, "HMDB_code"] <- collapse("HMDB_code", peakgroup_list, peaklist_index)
+    single_peakgroup[, "assi_noise"] <- collapse("assi_noise", peakgroup_list, peaklist_index)
+    if (single_peakgroup[, "assi_noise"] == ";") single_peakgroup[, "assi_noise"] <- NA
+    single_peakgroup[, "theormz_noise"] <- collapse("theormz_noise", peakgroup_list, peaklist_index)
+    if (single_peakgroup[,"theormz_noise"] == "0;0") single_peakgroup[, "theormz_noise"] <- NA
+
+    # keep track of deduplicated entries
+    collect <- rbind(collect, single_peakgroup)
+    remove <- c(remove, peaklist_index)
+
+    # remove current entry from index
+    index_dup <- index_dup[-which(peakgroup_list[index_dup, "mzmed.pgrp"] == peakgroup_list[index_dup[1], "mzmed.pgrp"])]
+  }
+
+  # remove duplicate entries
+  if (!is.null(remove)) peakgroup_list <- peakgroup_list[-remove, ]
+  # append deduplicated entries
+  peakgroup_list_dedup <- rbind(peakgroup_list, collect)
+  return(peakgroup_list_dedup)
+}
diff --git a/DIMS/Utils/optimize_gaussfit.R b/DIMS/Utils/optimize_gaussfit.R
new file mode 100644
index 0000000..2d7f95d
--- /dev/null
+++ b/DIMS/Utils/optimize_gaussfit.R
@@ -0,0 +1,23 @@
+## adapted from optimizeGauss.R
+optimize_gaussfit <- function(mass_vector, int_vector, sigma, mass_max) {
+  #' Optimize fit of Gaussian curve to small region of m/z
+  #'
+  #' @param mass_vector: Vector of m/z values for a region of interest (float)
+  #' @param int_vector: Vector of intensities for a region of interest (float)
+  #' @param sigma: Value for standard deviation (float)
+  #' @param mass_max: Value for mass at center of peak (float)
+  #'
+  #' @return opt_fit: list of fit values for region of interest (list)
+
+  # define optimization function for optim based on normal distribution
+  opt_f <- function(p, mass_vector, int_vector, sigma, mass_max) {
+    curve <- p * dnorm(mass_vector, mass_max, sigma)
+    return((max(curve) - max(int_vector))^2)
+  }
+
+  # get optimal value for fitted Gaussian curve
+  opt_fit <- optimize(opt_f, c(0, 100000), tol = 0.0001, mass_vector, int_vector, sigma, mass_max)
+
+  return(opt_fit$minimum)
+}
+
diff --git a/DIMS/Utils/output_helix.R b/DIMS/Utils/output_helix.R
new file mode 100644
index 0000000..09fa1a9
--- /dev/null
+++ b/DIMS/Utils/output_helix.R
@@ -0,0 +1,39 @@
+output_for_helix <- function(protocol_name, df_metabs_helix) {
+  #' Get the output dataframe for Helix
+  #'
+  #' @param protocol_name: protocol name (string)
+  #' @param df_metabs_helix: dataframe with metabolite Z-scores for patients (dataframe)
+  #'
+  #' @return: dataframe with patient metabolite Z-scores in correct format for Helix
+
+  # Remove positive controls
+  df_metabs_helix <- df_metabs_helix %>% filter(is_diagnostic_patient(Patient))
+
+  # Add 'Vial' column, each patient has unique ID
+  df_metabs_helix <- df_metabs_helix %>%
+    group_by(Patient) %>%
+    mutate(Vial = cur_group_id()) %>%
+    ungroup()
+
+  # Split patient number into labnummer and Onderzoeksnummer
+  df_metabs_helix <- add_lab_id_and_onderzoeksnummer(df_metabs_helix)
+
+  # Add column with protocol name
+  df_metabs_helix$Protocol <- protocol_name
+
+  # Change name Z_score and Helix_naam columns to Amount and Name
+  change_columns <- c(Amount = "Z_score", Name = "Helix_naam")
+  df_metabs_helix <- df_metabs_helix %>% rename(all_of(change_columns))
+
+  # Select only necessary columns and set them in correct order
+  df_metabs_helix <- df_metabs_helix %>%
+    select(c(Vial, labnummer, Onderzoeksnummer, Protocol, Name, Amount))
+
+  # Remove duplicate patient-metabolite combinations ("leucine + isoleucine + allo-isoleucin_Z-score" is added 3 times)
+  df_metabs_helix <- df_metabs_helix %>%
+    group_by(Onderzoeksnummer, Name) %>%
+    distinct() %>%
+    ungroup()
+
+  return(df_metabs_helix)
+}
diff --git a/DIMS/Utils/prepare_alarmvalues.R b/DIMS/Utils/prepare_alarmvalues.R
new file mode 100644
index 0000000..94ffbfd
--- /dev/null
+++ b/DIMS/Utils/prepare_alarmvalues.R
@@ -0,0 +1,59 @@
+prepare_alarmvalues <- function(pt_name, dims_helix_table) {
+  #' Create a dataframe with all metabolites that exceed the min and max Z-score cutoffs
+  #'
+  #' @param pt_name: patient code (string)
+  #' @param dims_helix_table: dataframe with metabolite Z-scores for each patient and Helix info (dataframe)
+  #'
+  #' @return: dataframe with metabolites that exceed the min and max Z-score cutoffs for the selected patient
+
+  # extract data for patient of interest (pt_name)
+  pt_metabs_helix <- dims_helix_table %>% filter(Patient == pt_name)
+  pt_metabs_helix$Z_score <- round(pt_metabs_helix$Z_score, 2)
+
+  # Make empty dataframes for metabolites above or below alarmvalues
+  pt_list_high <- data.frame(HMDB_name = character(), Z_score = numeric())
+  pt_list_low <- data.frame(HMDB_name = character(), Z_score = numeric())
+
+  # Loop over individual metabolites
+  for (metab in unique(pt_metabs_helix$HMDB_name)){
+    # Get data for individual metabolite
+    pt_metab <- pt_metabs_helix %>% filter(HMDB_name == metab)
+    # print(pt_metab)
+
+    # Check if zscore is positive of negative
+    if (pt_metab$Z_score > 0) {
+      # Get specific alarmvalue for metabolite
+      high_zscore_cutoff_metab <- pt_metabs_helix %>% filter(HMDB_name == metab) %>% pull(high_zscore)
+
+      # If zscore is above the alarmvalue, add to pt_list_high table
+      if (pt_metab$Z_score > high_zscore_cutoff_metab) {
+        pt_metab_high <- pt_metab %>% select(HMDB_name, Z_score)
+        pt_list_high <- rbind(pt_list_high, pt_metab_high)
+      }
+    } else {
+      # Get specific alarmvalue for metabolite
+      low_zscore_cutoff_metab <- pt_metabs_helix %>% filter(HMDB_name == metab) %>% pull(low_zscore)
+
+      # If zscore is below the alarmvalue, add to pt_list_low table
+      if (pt_metab$Z_score < low_zscore_cutoff_metab) {
+        pt_metab_low <- pt_metab %>% select(HMDB_name, Z_score)
+        pt_list_low <- rbind(pt_list_low, pt_metab_low)
+      }
+    }
+  }
+
+  # sort tables on zscore
+  pt_list_high <- pt_list_high %>% arrange(desc(Z_score))
+  pt_list_low <- pt_list_low %>% arrange(Z_score)
+  # add lines for increased, decreased
+  extra_line1 <- c("Increased", "")
+  extra_line2 <- c("Decreased", "")
+  # combine the two lists
+  top_metab_pt <- rbind(extra_line1, pt_list_high, extra_line2, pt_list_low)
+  # remove row names
+  rownames(top_metab_pt) <- NULL
+  # change column names for display
+  colnames(top_metab_pt) <- c("Metabolite", "Z-score")
+
+  return(top_metab_pt)
+}
diff --git a/DIMS/Utils/prepare_data.R b/DIMS/Utils/prepare_data.R
new file mode 100644
index 0000000..ea3efe5
--- /dev/null
+++ b/DIMS/Utils/prepare_data.R
@@ -0,0 +1,51 @@
+# unused variables will be removed: metab_list_alarm
+prepare_data <- function(metab_list_all, zscore_patients_local) {
+  #' Combine patient Z-scores with metabolite info
+  #'
+  #' @param metab_list_all: list of dataframes with metabolite information for different stofgroepen (list)
+  #' @param zscore_patients_local: dataframe with metabolite Z-scores for all patient
+  #'
+  #' @return: list of dataframes for each stofgroep with data for each metabolite and patient/control per row
+
+  # remove "_Zscore" from column (patient) names
+  colnames(zscore_patients_local) <- gsub("_Zscore", "", colnames(zscore_patients_local))
+  # put data into pages, max 20 violin plots per page in PDF
+  metab_interest_sorted <- list()
+  metab_category <- c()
+  for (metab_class_index in 1:length(metab_list_all)) {
+    metab_class <- names(metab_list_all)[metab_class_index]
+    metab_list <- metab_list_all[[metab_class_index]]
+    if (ncol(metab_list) > 2) {
+      # third column are the alarm values, so reduce the data frame to 2 columns and save list
+      metab_list_alarm <- metab_list
+      metab_list <- metab_list[, c(1, 2)]
+    }
+    # make sure that all HMDB_names have 45 characters
+    for (metab_index in 1:length(metab_list$HMDB_name)) {
+      if (is.character(metab_list$HMDB_name[metab_index])) {
+        hmdb_name_separated <- strsplit(metab_list$HMDB_name[metab_index], "")[[1]]
+      } else {
+        hmdb_name_separated <- "strspliterror"
+      }
+      if (length(hmdb_name_separated) <= 45) {
+        hmdb_name_separated <- c(hmdb_name_separated, rep(" ", 45 - length(hmdb_name_separated)))
+      } else {
+        hmdb_name_separated <- c(hmdb_name_separated[1:42], "...")
+      }
+      metab_list$HMDB_name[metab_index] <- paste0(hmdb_name_separated, collapse = "")
+    }
+    # find metabolites and ratios in data frame zscore_patients_local
+    metab_interest <- inner_join(metab_list, zscore_patients_local[-2], by = "HMDB_code")
+    # remove column "HMDB_code"
+    metab_interest <- metab_interest[, -which(colnames(metab_interest) == "HMDB_code")]
+    # put the data frame in long format
+    metab_interest_melt <- reshape2::melt(metab_interest, id.vars = "HMDB_name")
+    # sort on metabolite names (HMDB_name)
+    sort_order <- order(metab_interest_melt$HMDB_name)
+    metab_interest_sorted[[metab_class_index]] <- metab_interest_melt[sort_order, ]
+    metab_category <- c(metab_category, metab_class)
+  }
+  names(metab_interest_sorted) <- metab_category
+
+  return(metab_interest_sorted)
+}
diff --git a/DIMS/Utils/prepare_data_perpage.R b/DIMS/Utils/prepare_data_perpage.R
new file mode 100644
index 0000000..61d2e15
--- /dev/null
+++ b/DIMS/Utils/prepare_data_perpage.R
@@ -0,0 +1,58 @@
+# remove default variable values
+prepare_data_perpage <- function(metab_interest_sorted, metab_interest_contr, nr_plots_perpage, nr_pat = 20, nr_contr = 30) {
+  #' Combine patient and control data for each page of the violinplot pdf
+  #'
+  #' @param metab_interest_sorted: list of dataframes with data for each metabolite and patient (list)
+  #' @param metab_interest_contr:  list of dataframes with data for each metabolite and control (list)
+  #' @param nr_plots_perpage: number of plots per page in the violinplot pdf (integer)
+  #' @param nr_pat: number of patients (integer)
+  #' @param nr_contr: number of controls (integer)
+  #'
+  #' @return: list of dataframes with metabolite Z-scores for each patient and control,
+  #'          the length of list is the number of pages for the violinplot pdf (list)
+
+  total_nr_pages <- 0
+  metab_perpage <- list()
+  metab_category <- c()
+  for (metab_class_index in 1:length(metab_interest_sorted)) {
+    # split list into pages, each page containing max nr_plots_perpage (20) compounds
+    metab_interest_perclass <- metab_interest_sorted[[metab_class_index]]
+    metab_class <- names(metab_interest_sorted)[metab_class_index]
+    # add controls
+    metab_interest_contr_perclass <- metab_interest_contr[[metab_class_index]]
+    # number of pages for this class
+    nr_pages <- ceiling(length(unique(metab_interest_perclass$HMDB_name)) / nr_plots_perpage)
+    for (page_nr in 1:nr_pages) {
+      total_nr_pages <- total_nr_pages + 1
+      select_rows_start <- (nr_pat * nr_plots_perpage * (page_nr - 1)) + 1
+      select_rows_end <- nr_pat * nr_plots_perpage * page_nr
+      metab_onepage_pat <- metab_interest_perclass[select_rows_start:select_rows_end, ]
+      # same for controls
+      select_rows_start_contr <- (nr_contr * nr_plots_perpage * (page_nr - 1)) + 1
+      select_rows_end_contr <- nr_contr * nr_plots_perpage * page_nr
+      metab_onepage_pcontr <- metab_interest_contr_perclass[select_rows_start_contr:select_rows_end_contr, ]
+      # add controls
+      metab_onepage <- rbind(metab_onepage_pat, metab_onepage_pcontr)
+      # if a page has fewer than nr_plots_perpage plots, fill page with empty plots
+      na_rows <- which(is.na(metab_onepage$HMDB_name))
+      if (length(na_rows) > 0) {
+        # repeat the patient and control variables
+        metab_onepage$variable[na_rows] <- metab_onepage$variable[1:(nr_pat + nr_contr)]
+        # for HMDB name, substitute a number of spaces
+        for (row_nr in na_rows) {
+          metab_onepage$HMDB_name[row_nr] <- paste0(rep("_", ceiling(row_nr / (nr_pat + nr_contr))), collapse = "")
+        }
+        metab_onepage$HMDB_name <- gsub("_", " ", metab_onepage$HMDB_name)
+        # leave the values at NA
+      }
+      # put data for one page into object with data for all pages
+      metab_perpage[[total_nr_pages]] <- metab_onepage
+      # create list of page headers
+      metab_category <- c(metab_category, paste(metab_class, page_nr, sep = "_"))
+    }
+  }
+  # add page headers to list
+  names(metab_perpage) <- metab_category
+
+  return(metab_perpage)
+}
diff --git a/DIMS/Utils/prepare_toplist.R b/DIMS/Utils/prepare_toplist.R
new file mode 100644
index 0000000..cc616c4
--- /dev/null
+++ b/DIMS/Utils/prepare_toplist.R
@@ -0,0 +1,35 @@
+prepare_toplist <- function(pt_name, zscore_patients_copy) {
+  #' Create a dataframe with the top 20 highest and top 10 lowest metabolites per patient
+  #'
+  #' @param pt_name: patient code (string)
+  #' @param zscore_patients_copy: dataframe with metabolite Z-scores per patient (dataframe)
+  #'
+  #' @return: dataframe with 30 metabolites and Z-scores (dataframe)
+
+  # set parameters for table
+  top_highest <- 20
+  top_lowest <- 10
+
+  # extract data for patient of interest (pt_name)
+  pt_list <- zscore_patients_copy[, c(1, 2, which(colnames(zscore_patients_copy) == pt_name))]
+  # sort metabolites on Z-scores for this patient
+  pt_list_sort <- sort(pt_list[, 3], index.return = TRUE)
+  # determine top highest and lowest Z-scores for this patient
+  pt_list_sort <- sort(pt_list[, 3], index.return = TRUE)
+  pt_list_low <- pt_list[pt_list_sort$ix[1:top_lowest], ]
+  pt_list_high <- pt_list[pt_list_sort$ix[length(pt_list_sort$ix):(length(pt_list_sort$ix) - top_highest + 1)], ]
+  # round off Z-scores
+  pt_list_low[, 3] <- round(as.numeric(pt_list_low[, 3]), 2)
+  pt_list_high[, 3] <- round(as.numeric(pt_list_high[, 3]), 2)
+  # add lines for increased, decreased
+  extra_line1 <- c("Increased", "", "")
+  extra_line2 <- c("Decreased", "", "")
+  top_metab_pt <- rbind(extra_line1, pt_list_high, extra_line2, pt_list_low)
+  # remove row names
+  rownames(top_metab_pt) <- NULL
+
+  # change column names for display
+  colnames(top_metab_pt) <- c("HMDB_ID", "Metabolite", "Z-score")
+
+  return(top_metab_pt)
+}
diff --git a/DIMS/Utils/replace_zeros.R b/DIMS/Utils/replace_zeros.R
new file mode 100644
index 0000000..0ef06a3
--- /dev/null
+++ b/DIMS/Utils/replace_zeros.R
@@ -0,0 +1,64 @@
+## adapted from replaceZeros.R
+# this function does two things: replace zeros with random value and identify noise peaks
+# refactor: split into two functions
+# remove parameters outdir and thresh
+# make hard-coded path to file with noise peaks into variable
+# remove variables outdir, thresh
+replace_zeros <- function(peakgroup_list, repl_pattern, scanmode, resol, outdir, thresh, ppm) {
+  #' Replace intensities that are zero with random value
+  #'
+  #' @param peakgroup_list: Peak group list (matrix)
+  #' @param repl_pattern: Replication pattern (list of strings)
+  #' @param scanmode: Scan mode, positive or negative (string)
+  #' @param resol: Value for resolution (integer)
+  #' @param outdir: Path for output directory (string)
+  #' @param thresh: Value for threshold (integer)
+  #' @param ppm: Value for distance between two values of mass (integer)
+  #'
+  #' @return final_outlist: peak group list with filled-in intensities (matrix)
+
+  # replace zeros
+  if (!is.null(peakgroup_list)) {
+    for (sample_index in 1:length(names(repl_pattern))) {
+      sample_peaks <- peakgroup_list[, names(repl_pattern)[sample_index]]
+      zero_intensity <- which(sample_peaks <= 0)
+      if (!length(zero_intensity)) {
+        next
+      }
+      for (zero_index in 1:length(zero_intensity)) {
+        area <- fit_optim(peakgroup_list[zero_intensity[zero_index], "mzmed.pgrp"], thresh,
+                          resol, FALSE, scanmode, int_factor = 1 * 10^5, 1, 1)$area
+        peakgroup_list[zero_intensity[zero_index], names(repl_pattern)[sample_index]] <- rnorm(n = 1, mean = area,
+                                                                                               sd = 0.25 * area)
+      }
+    }
+
+    # Add column with average intensity
+    peakgroup_list <- cbind(peakgroup_list, "avg.int" = apply(peakgroup_list[, 7:(ncol(peakgroup_list) - 4)], 1, mean))
+
+    if (scanmode == "negative") {
+      label <- "MNeg"
+      label2 <- "Negative"
+      # look for adducts in negative mode
+      look4_adducts <- c("Cl", "Cl37", "For", "NaCl", "KCl", "H2PO4", "HSO4", "Na-H", "K-H", "H2O", "I")
+    } else {
+      label <- "Mpos"
+      label2 <- "Positive"
+      # look for adducts in positive mode
+      look4_adducts <- c("Na", "K", "NaCl", "NH4", "2Na-H", "CH3OH", "KCl", "NaK-H")
+    }
+
+    # Identify noise peaks
+    noise_mz <- read.table(file = "/hpc/dbg_mz/tools/db/TheoreticalMZ_NegPos_incNaCl.txt",
+                           sep = "\t", header = TRUE, quote = "")
+    noise_mz <- noise_mz[(noise_mz[, label] != 0), 1:4]
+    outlist_withnoise <- identify_noisepeaks(peakgroup_list, all_adducts, scanmode = label2,
+                                             noise_mz, look4 = look4_adducts, resol = resol,
+                                             slope = 0, incpt = 0, ppm_fixed = ppm, ppm_iso_fixed = ppm)
+    noise_info <- outlist_withnoise[, c("assi", "theormz")]
+    colnames(noise_info) <- c("assi_noise",  "theormz_noise")
+
+    final_outlist <- cbind(peakgroup_list, noise_info)
+    return(final_outlist)
+  }
+}
diff --git a/DIMS/Utils/search_mzrange.R b/DIMS/Utils/search_mzrange.R
new file mode 100644
index 0000000..dcdc42a
--- /dev/null
+++ b/DIMS/Utils/search_mzrange.R
@@ -0,0 +1,180 @@
+## adapted from searchMZRange.R
+# variables with fixed values will be removed from function parameters
+# int_factor, scale, outdir, plot, thresh, width, height
+# allpeaks_values should be generated here, not passed on from do_peakfinding
+search_mzrange <- function(ints_fullrange, allpeaks_values, int_factor, scale, resol,
+                           outdir, sample_name, scanmode, plot, width, height, thresh) {
+  #' Divide the full m/z range into regions of interest with min, max and mean m/z
+  #'
+  #' @param ints_fullrange: Named list of intensities (float)
+  #' @param allpeaks_values: Empty list to store results for all peaks
+  #' @param int_factor: Value used to calculate area under Gaussian curve (integer)
+  #' @param scale: Initial value used to estimate scaling parameter (integer)
+  #' @param resol: Value for resolution (integer)
+  #' @param outdir: Path for output directory (string)
+  #' @param sample_name: Sample name (string)
+  #' @param scanmode: Scan mode, positive or negative (string)
+  #' @param plot: Parameter indicating whether plots should be made (boolean)
+  #' @param width: Value for width of plot (integer)
+  #' @param height: Value for height of plot (integer)
+  #' @param thresh: Value for noise level threshold (integer)
+  #'
+  #' @return allpeaks_values: list of m/z regions of interest
+
+  # find indices where intensity is not equal to zero
+  nonzero_indices <- as.vector(which(ints_fullrange != 0))
+
+  # bad infusion. These should have been taken out in AverageTechReplicates
+  if (length(nonzero_indices) == 0) return(allpeaks_values)
+
+  # initialize
+  end_index <- NULL
+  start_index <- nonzero_indices[1]
+  # maximum length of region of interest
+  max_roi_length <- 15
+
+  # find regions of interest
+  for (mz_index in 1:length(nonzero_indices)) {
+    # check whether mz_index is smaller than length(nonzero_indices).
+    # only false if mz_index == length(nonzero_indixes). 
+    # second check is true at the end of a peak.
+    if (mz_index < length(nonzero_indices) && (nonzero_indices[mz_index + 1] - nonzero_indices[mz_index]) > 1) {
+      end_index <- nonzero_indices[mz_index]
+      # get m/z values and intensities for this region of interest
+      mass_vector <- as.numeric(names(ints_fullrange)[c(start_index:end_index)])
+      int_vector <- as.vector(ints_fullrange[c(start_index:end_index)])
+      # check whether the vector of intensities is not empty. 
+      if (length(int_vector) != 0) {
+        # check if intensity is above threshold or the maximum intensity is NaN
+        if (max(int_vector) < thresh || is.nan(max(int_vector))) {
+          # go to next region of interest
+          start_index <- nonzero_indices[mz_index + 1]
+          next
+        }
+        # check if there are more intensities than maximum for region of interest
+        if (length(int_vector) > max_roi_length) {
+          # trim lowest intensities to zero
+          int_vector[which(int_vector < min(int_vector) * 1.1)] <- 0
+          # split the range into multiple sub ranges
+          sub_range <- int_vector
+          names(sub_range) <- mass_vector
+          allpeaks_values <- search_mzrange(sub_range, allpeaks_values, int_factor,
+                                            scale, resol, outdir, sample_name, scanmode,
+                                            plot, width, height, thresh)
+        # A proper peak needs to have at least 3 intensities above threshold  
+        } else if (length(int_vector) > 3) {
+          # check if the sum of intensities is above zero. Why is this necessary?
+          if (sum(int_vector) == 0) next
+          # get initial fit values
+          roi_values <- fit_init(mass_vector, int_vector, int_factor, scale, resol,
+                                 outdir, sample_name, scanmode, plot, width, height,
+                                 mz_index, start_index, end_index)
+	  print(roi_values)
+          if (roi_values$qual[1] == 1) {
+            # get optimized fit values
+            roi_values <- fit_optim(mass_vector, int_vector, resol, plot,
+                                    scanmode, int_factor, width, height)
+            # add region of interest to list of all peaks
+            allpeaks_values$mean <- c(allpeaks_values$mean, roi_values$mean)
+            allpeaks_values$area <- c(allpeaks_values$area, roi_values$area)
+            allpeaks_values$nr <- c(allpeaks_values$nr, sample_name)
+            allpeaks_values$min <- c(allpeaks_values$min, roi_values$min)
+            allpeaks_values$max <- c(allpeaks_values$max, roi_values$max)
+            allpeaks_values$qual <- c(allpeaks_values$qual, 0)
+            allpeaks_values$spikes <- allpeaks_values$spikes + 1
+
+          } else {
+            for (j in 1:length(roi_values$mean)){
+              allpeaks_values$mean <- c(allpeaks_values$mean, roi_values$mean[j])
+              allpeaks_values$area <- c(allpeaks_values$area, roi_values$area[j])
+              allpeaks_values$nr <- c(allpeaks_values$nr, sample_name)
+              allpeaks_values$min <- c(allpeaks_values$min, roi_values$min[1])
+              allpeaks_values$max <- c(allpeaks_values$max, roi_values$max[1])
+              allpeaks_values$qual <- c(allpeaks_values$qual, roi_values$qual[1])
+            }
+          }
+
+        } else {
+
+          roi_values <- fit_optim(mass_vector, int_vector, resol,
+                                  plot, scanmode, int_factor, width, height)
+          allpeaks_values$mean <- c(allpeaks_values$mean, roi_values$mean)
+          allpeaks_values$area <- c(allpeaks_values$area, roi_values$area)
+          allpeaks_values$nr <- c(allpeaks_values$nr, sample_name)
+          allpeaks_values$min <- c(allpeaks_values$min, roi_values$min)
+          allpeaks_values$max <- c(allpeaks_values$max, roi_values$max)
+          allpeaks_values$qual <- c(allpeaks_values$qual, 0)
+          allpeaks_values$spikes <- allpeaks_values$spikes + 1
+        }
+      }
+      start_index <- nonzero_indices[mz_index + 1]
+    }
+  }
+
+  # last little range
+  end_index <- nonzero_indices[length(nonzero_indices)]
+  mass_vector <- as.numeric(names(ints_fullrange)[c(start_index:end_index)])
+  int_vector <- as.vector(ints_fullrange[c(start_index:end_index)])
+
+  if (length(int_vector) != 0) {
+    # check if intensity above threshold
+    if (max(int_vector) < thresh || is.nan(max(int_vector))) {
+      # do nothing
+    } else {
+      # check if there are more intensities than maximum for region of interest
+      if (length(int_vector) > max_roi_length) {
+        # trim lowest intensities to zero
+        int_vector[which(int_vector < min(int_vector) * 1.1)] <- 0
+        # split the range into multiple sub ranges
+        sub_range <- int_vector
+        names(sub_range) <- mass_vector
+
+        allpeaks_values <- search_mzrange(sub_range, allpeaks_values, int_factor, scale, resol,
+                                          outdir, sample_name, scanmode,
+                                          plot, width, height, thresh)
+
+      } else if (length(int_vector) > 3) {
+        # Check only zeros
+        if (sum(int_vector) == 0) next
+
+        roi_values <- fit_init(mass_vector, int_vector, int_factor, scale, resol, 
+			       outdir, sample_name, scanmode, plot, width, height,
+                               mz_index, start_index, end_index)
+        if (roi_values$qual[1] == 1) {
+          roi_values <- fit_optim(mass_vector, int_vector, resol,
+                                  plot, scanmode, int_factor, width, height)
+
+          allpeaks_values$mean <- c(allpeaks_values$mean, roi_values$mean)
+          allpeaks_values$area <- c(allpeaks_values$area, roi_values$area)
+          allpeaks_values$nr <- c(allpeaks_values$nr, sample_name)
+          allpeaks_values$min <- c(allpeaks_values$min, roi_values$min)
+          allpeaks_values$max <- c(allpeaks_values$max, roi_values$max)
+          allpeaks_values$qual <- c(allpeaks_values$qual, 0)
+          allpeaks_values$spikes <- allpeaks_values$spikes + 1
+
+        } else {
+          for (j in 1:length(roi_values$mean)){
+            allpeaks_values$mean <- c(allpeaks_values$mean, roi_values$mean[j])
+            allpeaks_values$area <- c(allpeaks_values$area, roi_values$area[j])
+            allpeaks_values$nr <- c(allpeaks_values$nr, sample_name)
+            allpeaks_values$min <- c(allpeaks_values$min, roi_values$min[1])
+            allpeaks_values$max <- c(allpeaks_values$max, roi_values$max[1])
+            allpeaks_values$qual <- c(allpeaks_values$qual, roi_values$qual[1])
+          }
+        }
+      } else {
+        roi_values <- fit_optim(mass_vector, int_vector, resol,
+                                plot, scanmode, int_factor, width, height)
+        allpeaks_values$mean <- c(allpeaks_values$mean, roi_values$mean)
+        allpeaks_values$area <- c(allpeaks_values$area, roi_values$area)
+        allpeaks_values$nr <- c(allpeaks_values$nr, sample_name)
+        allpeaks_values$min <- c(allpeaks_values$min, roi_values$min)
+        allpeaks_values$max <- c(allpeaks_values$max, roi_values$max)
+        allpeaks_values$qual <- c(allpeaks_values$qual, 0)
+        allpeaks_values$spikes <- allpeaks_values$spikes + 1
+      }
+    }
+  }
+  return(allpeaks_values)
+}
+
diff --git a/DIMS/Utils/sum_curves.R b/DIMS/Utils/sum_curves.R
new file mode 100644
index 0000000..542026b
--- /dev/null
+++ b/DIMS/Utils/sum_curves.R
@@ -0,0 +1,35 @@
+## adapted from sumCurves.R
+# variables with fixed values will be removed from function parameters
+# plot
+# parameter half_max not used
+sum_curves <- function(mean1, mean2, scale1, scale2, sigma1, sigma2, mass_vector2, mass_vector, resol, plot) {
+  #' Sum two curves
+  #'
+  #' @param mean1: Value for mean m/z of first peak (float)
+  #' @param mean2: Value for mean m/z of second peak (float)
+  #' @param scale1: Initial value used to estimate scaling parameter for first peak (integer)
+  #' @param scale2: Initial value used to estimate scaling parameter for second peak (integer)
+  #' @param sigma1: Value for standard deviation for first peak (float)
+  #' @param sigma2: Value for standard deviation for second peak (float)
+  #' @param mass_vector2: Vector of equally spaced m/z values (float)
+  #' @param mass_vector: Vector of m/z values for a region of interest (float)
+  #' @param resol: Value for resolution (integer)
+  #' @param plot: Parameter indicating whether plots should be made (boolean)
+  #'
+  #' @return list_params: list of parameters indicating quality of fit (list)
+
+  sum_fit <- (scale1 * dnorm(mass_vector2, mean1, sigma1)) + (scale2 * dnorm(mass_vector2, mean2, sigma2))
+
+  mean1_plus2 <- weighted.mean(c(mean1, mean2), c(max(scale1 * dnorm(mass_vector2, mean1, sigma1)),
+                                                  max(scale2 * dnorm(mass_vector2, mean2, sigma2))))
+
+  # get new values for parameters
+  fwhm <- get_fwhm(mean1_plus2, resol)
+  area <- max(sum_fit)
+  scale <- scale1 + scale2
+  sigma <- (fwhm / 2) * 0.85
+
+  list_params <- list("mean" = mean1_plus2, "area" = area, "scale" = scale, "sigma" = sigma)
+  return(list_params)
+}
+
diff --git a/DIMS/Utils/within_ppm.R b/DIMS/Utils/within_ppm.R
new file mode 100644
index 0000000..abdb0d4
--- /dev/null
+++ b/DIMS/Utils/within_ppm.R
@@ -0,0 +1,64 @@
+## adapted from isWithinXppm.R
+# variables with fixed values will be removed from function parameters
+# plot
+within_ppm <- function(mean, scale, sigma, area, mass_vector2, mass_vector, ppm = 4, resol, plot) {
+  #' Test whether two mass ranges are within ppm distance of each other
+  #'
+  #' @param mean: Value for mean m/z (float)
+  #' @param scale: Initial value used to estimate scaling parameter (integer)
+  #' @param sigma: Value for standard deviation (float)
+  #' @param area: Value for area under the curve (float)
+  #' @param mass_vector2: Vector of equally spaced m/z values (float)
+  #' @param mass_vector: Vector of m/z values for a region of interest (float)
+  #' @param ppm: Value for distance between two values of mass (integer)
+  #' @param resol: Value for resolution (integer)
+  #' @param plot: Parameter indicating whether plots should be made (boolean)
+  #'
+  #' @return list_params: list of parameters indicating quality of fit (list)
+
+  # sort
+  index <- order(mean)
+  mean <- mean[index]
+  scale <- scale[index]
+  sigma <- sigma[index]
+  area <- area[index]
+
+  summed <- NULL
+  remove <- NULL
+
+  if (length(mean) > 1) {
+    for (i in 2:length(mean)) {
+      if ((abs(mean[i - 1] - mean[i]) / mean[i - 1]) * 10^6 < ppm) {
+
+        # avoid double occurance in sum
+        if ((i - 1) %in% summed) next
+
+        result_values <- sum_curves(mean[i - 1], mean[i], scale[i - 1], scale[i], sigma[i - 1], sigma[i],
+                                    mass_vector2, mass_vector, resol, plot)
+        summed <- c(summed, i - 1, i)
+        if (is.nan(result_values$mean)) result_values$mean <- 0
+        mean[i - 1] <- result_values$mean
+        mean[i] <- result_values$mean
+        area[i - 1] <- result_values$area
+        area[i] <- result_values$area
+        scale[i - 1] <- result_values$scale
+        scale[i] <- result_values$scale
+        sigma[i - 1] <- result_values$sigma
+        sigma[i] <- result_values$sigma
+
+        remove <- c(remove, i)
+      }
+    }
+  }
+
+  if (length(remove) != 0) {
+    mean <- mean[-c(remove)]
+    area <- area[-c(remove)]
+    scale <- scale[-c(remove)]
+    sigma <- sigma[-c(remove)]
+  }
+
+  list_params <- list("mean" = mean, "area" = area, "scale" = scale, "sigma" = sigma, "qual" = NULL)
+  return(list_params)
+}
+